Generalize workflow recovery steering
- SHA
918941fbed1f1abed10bcd75b830973e65bba696- Parents
-
a759233 - Tree
0185e0b
918941f
918941fbed1f1abed10bcd75b830973e65bba696a759233
0185e0b| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/compaction.py
|
91 | 77 |
| M |
src/loader/runtime/dod.py
|
9 | 57 |
| M |
src/loader/runtime/finalization.py
|
12 | 31 |
| M |
src/loader/runtime/safeguard_services.py
|
25 | 269 |
| A |
src/loader/runtime/semantic_rules/__init__.py
|
2 | 0 |
| A |
src/loader/runtime/semantic_rules/html_toc.py
|
506 | 0 |
| M |
src/loader/runtime/tool_batch_recovery.py
|
74 | 15 |
| M |
src/loader/runtime/tool_batches.py
|
160 | 60 |
| M |
src/loader/runtime/workflow.py
|
210 | 0 |
| M |
src/loader/runtime/workflow_lanes.py
|
37 | 9 |
| M |
tests/test_runtime_harness.py
|
7 | 5 |
| M |
tests/test_safeguard_services.py
|
26 | 10 |
| M |
tests/test_tool_batch_policies.py
|
98 | 1 |
| M |
tests/test_tool_batches.py
|
317 | 4 |
| M |
tests/test_workflow.py
|
111 | 0 |
src/loader/runtime/compaction.pymodified@@ -2,13 +2,13 @@ | ||
| 2 | 2 | |
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | -import html | |
| 6 | 5 | import re |
| 7 | 6 | from collections import Counter |
| 8 | 7 | from dataclasses import dataclass |
| 9 | 8 | from pathlib import Path |
| 10 | 9 | |
| 11 | 10 | from ..llm.base import Message, Role, ToolCall |
| 11 | +from .semantic_rules import html_toc as html_toc_rule | |
| 12 | 12 | |
| 13 | 13 | DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 100_000 |
| 14 | 14 | MIN_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 12_000 |
@@ -300,10 +300,16 @@ def extract_key_files(messages: list[Message], *, limit: int | None = 6) -> list | ||
| 300 | 300 | return files |
| 301 | 301 | |
| 302 | 302 | |
| 303 | -def summarize_confirmed_facts(messages: list[Message], *, max_items: int = 2) -> str | None: | |
| 303 | +def summarize_confirmed_facts( | |
| 304 | + messages: list[Message], | |
| 305 | + *, | |
| 306 | + max_items: int = 2, | |
| 307 | + focus_path: str | None = None, | |
| 308 | +) -> str | None: | |
| 304 | 309 | """Summarize recent confirmed discoveries from successful tool results.""" |
| 305 | 310 | |
| 306 | - facts = _collect_confirmed_facts(messages) | |
| 311 | + relevant_messages = _messages_for_focus_path(messages, focus_path=focus_path) | |
| 312 | + facts = _collect_confirmed_facts(relevant_messages) | |
| 307 | 313 | |
| 308 | 314 | if not facts: |
| 309 | 315 | return None |
@@ -314,15 +320,25 @@ def infer_preferred_next_step( | ||
| 314 | 320 | messages: list[Message], |
| 315 | 321 | *, |
| 316 | 322 | current_task: str | None = None, |
| 323 | + focus_path: str | None = None, | |
| 317 | 324 | ) -> str | None: |
| 318 | 325 | """Infer one concrete next step from the task and recent transcript.""" |
| 319 | 326 | |
| 320 | - if summarize_confirmed_facts(messages, max_items=1) is None: | |
| 327 | + relevant_messages = _messages_for_focus_path(messages, focus_path=focus_path) | |
| 328 | + if summarize_confirmed_facts( | |
| 329 | + relevant_messages, | |
| 330 | + max_items=1, | |
| 331 | + focus_path=focus_path, | |
| 332 | + ) is None: | |
| 321 | 333 | return None |
| 322 | 334 | |
| 323 | - target_path = _choose_target_path(messages, current_task=current_task) | |
| 324 | - has_confirmed_titles = _summarize_html_title_discovery(messages) is not None | |
| 325 | - verification_gap = _summarize_latest_html_verification_gap(messages) | |
| 335 | + target_path = _choose_target_path( | |
| 336 | + relevant_messages, | |
| 337 | + current_task=current_task, | |
| 338 | + focus_path=focus_path, | |
| 339 | + ) | |
| 340 | + has_confirmed_titles = _summarize_html_title_discovery(relevant_messages) is not None | |
| 341 | + verification_gap = _summarize_latest_html_verification_gap(relevant_messages) | |
| 326 | 342 | if target_path: |
| 327 | 343 | if verification_gap: |
| 328 | 344 | return ( |
@@ -548,7 +564,7 @@ def _summarize_html_title_discovery( | ||
| 548 | 564 | if not isinstance(raw_path, str): |
| 549 | 565 | continue |
| 550 | 566 | normalized_path = _normalize_path_candidate(raw_path) or raw_path |
| 551 | - if Path(normalized_path).name == "index.html" or "/chapters/" not in normalized_path: | |
| 567 | + if html_toc_rule.is_html_toc_index_path(normalized_path) or "/chapters/" not in normalized_path: | |
| 552 | 568 | continue |
| 553 | 569 | |
| 554 | 570 | payload = "\n".join( |
@@ -556,7 +572,7 @@ def _summarize_html_title_discovery( | ||
| 556 | 572 | for result in message.tool_results |
| 557 | 573 | if result.content.strip() |
| 558 | 574 | ) or message.content |
| 559 | - title = _extract_html_title(payload) | |
| 575 | + title = html_toc_rule.extract_html_title_from_text(payload) | |
| 560 | 576 | if not title: |
| 561 | 577 | continue |
| 562 | 578 | |
@@ -573,21 +589,6 @@ def _summarize_html_title_discovery( | ||
| 573 | 589 | return f"Chapter titles confirmed: {preview}" |
| 574 | 590 | |
| 575 | 591 | |
| 576 | -def _extract_html_title(payload: str) -> str | None: | |
| 577 | - for pattern in ( | |
| 578 | - r"<h1[^>]*>(.*?)</h1>", | |
| 579 | - r"<title[^>]*>(.*?)</title>", | |
| 580 | - ): | |
| 581 | - match = re.search(pattern, payload, re.IGNORECASE | re.DOTALL) | |
| 582 | - if not match: | |
| 583 | - continue | |
| 584 | - title = re.sub(r"<[^>]+>", " ", match.group(1)) | |
| 585 | - title = _collapse_inline_whitespace(html.unescape(title)) | |
| 586 | - if title: | |
| 587 | - return title | |
| 588 | - return None | |
| 589 | - | |
| 590 | - | |
| 591 | 592 | def _collect_html_file_discovery_fact( |
| 592 | 593 | messages: list[Message], |
| 593 | 594 | *, |
@@ -670,64 +671,18 @@ def _summarize_latest_html_verification_gap( | ||
| 670 | 671 | for result in message.tool_results |
| 671 | 672 | if result.content.strip() |
| 672 | 673 | ) or message.content |
| 673 | - gap = _extract_html_verification_gap(payload, max_items=max_items) | |
| 674 | + gap = html_toc_rule.summarize_html_toc_verification_gap( | |
| 675 | + payload, | |
| 676 | + max_items=max_items, | |
| 677 | + ) | |
| 674 | 678 | if gap: |
| 675 | 679 | return gap |
| 676 | 680 | |
| 677 | 681 | return None |
| 678 | 682 | |
| 679 | 683 | |
| 680 | -def _extract_html_verification_gap(payload: str, *, max_items: int = 2) -> str | None: | |
| 681 | - missing: list[str] = [] | |
| 682 | - mismatches: list[str] = [] | |
| 683 | - mode: str | None = None | |
| 684 | - | |
| 685 | - for raw_line in payload.splitlines(): | |
| 686 | - line = raw_line.strip() | |
| 687 | - if not line: | |
| 688 | - continue | |
| 689 | - lowered = line.lower() | |
| 690 | - if lowered == "missing links:": | |
| 691 | - mode = "missing" | |
| 692 | - continue | |
| 693 | - if lowered == "title mismatches:": | |
| 694 | - mode = "mismatch" | |
| 695 | - continue | |
| 696 | - if mode == "missing" and "->" in line: | |
| 697 | - href = line.split("->", 1)[0].strip() | |
| 698 | - if href and href not in missing: | |
| 699 | - missing.append(href) | |
| 700 | - continue | |
| 701 | - if mode == "mismatch" and "!=" in line: | |
| 702 | - if line not in mismatches: | |
| 703 | - mismatches.append(line) | |
| 704 | - | |
| 705 | - parts: list[str] = [] | |
| 706 | - if missing: | |
| 707 | - preview = ", ".join(missing[:max_items]) | |
| 708 | - if len(missing) > max_items: | |
| 709 | - preview += ", ..." | |
| 710 | - parts.append(f"missing TOC links {preview}") | |
| 711 | - if mismatches: | |
| 712 | - preview = ", ".join(mismatches[:max_items]) | |
| 713 | - if len(mismatches) > max_items: | |
| 714 | - preview += ", ..." | |
| 715 | - parts.append(f"title mismatches {preview}") | |
| 716 | - return "; ".join(parts) if parts else None | |
| 717 | - | |
| 718 | - | |
| 719 | 684 | def _summarize_html_file_discovery(payload: str) -> str | None: |
| 720 | - filenames = re.findall(r"([A-Za-z0-9_.-]+\.html)", payload) | |
| 721 | - unique_names: list[str] = [] | |
| 722 | - for name in filenames: | |
| 723 | - if name not in unique_names: | |
| 724 | - unique_names.append(name) | |
| 725 | - if len(unique_names) < 3: | |
| 726 | - return None | |
| 727 | - preview = ", ".join(unique_names[:6]) | |
| 728 | - if len(unique_names) > 6: | |
| 729 | - preview += ", ..." | |
| 730 | - return f"Existing files include {preview}" | |
| 685 | + return html_toc_rule.summarize_html_file_discovery(payload) | |
| 731 | 686 | |
| 732 | 687 | |
| 733 | 688 | def _resolve_tool_name( |
@@ -750,7 +705,16 @@ def _choose_target_path( | ||
| 750 | 705 | messages: list[Message], |
| 751 | 706 | *, |
| 752 | 707 | current_task: str | None = None, |
| 708 | + focus_path: str | None = None, | |
| 753 | 709 | ) -> str | None: |
| 710 | + if focus_path: | |
| 711 | + normalized_focus = _normalize_path_candidate(focus_path) | |
| 712 | + if normalized_focus: | |
| 713 | + resolved_focus = html_toc_rule.resolve_html_toc_index_path(normalized_focus) | |
| 714 | + if resolved_focus is not None: | |
| 715 | + return str(resolved_focus) | |
| 716 | + return normalized_focus | |
| 717 | + | |
| 754 | 718 | candidates: Counter[str] = Counter() |
| 755 | 719 | for message in messages: |
| 756 | 720 | for tool_call in message.tool_calls: |
@@ -763,7 +727,7 @@ def _choose_target_path( | ||
| 763 | 727 | if not normalized: |
| 764 | 728 | continue |
| 765 | 729 | path_name = Path(normalized).name |
| 766 | - if path_name == "index.html": | |
| 730 | + if html_toc_rule.is_html_toc_index_path(normalized): | |
| 767 | 731 | candidates[normalized] += 10 |
| 768 | 732 | elif path_name.endswith(".html") and "/chapters/" not in normalized: |
| 769 | 733 | candidates[normalized] += 4 |
@@ -775,6 +739,56 @@ def _choose_target_path( | ||
| 775 | 739 | return None |
| 776 | 740 | current_task_paths = extract_key_files([Message(role=Role.USER, content=current_task)], limit=3) |
| 777 | 741 | for path in current_task_paths: |
| 778 | - if Path(path).name == "index.html": | |
| 742 | + if html_toc_rule.is_html_toc_index_path(path): | |
| 779 | 743 | return path |
| 780 | 744 | return current_task_paths[0] if current_task_paths else None |
| 745 | + | |
| 746 | + | |
| 747 | +def _messages_for_focus_path( | |
| 748 | + messages: list[Message], | |
| 749 | + *, | |
| 750 | + focus_path: str | None = None, | |
| 751 | +) -> list[Message]: | |
| 752 | + if not focus_path: | |
| 753 | + return messages | |
| 754 | + | |
| 755 | + anchors = _focus_path_anchors(focus_path) | |
| 756 | + if not anchors: | |
| 757 | + return messages | |
| 758 | + | |
| 759 | + filtered = [ | |
| 760 | + message | |
| 761 | + for message in messages | |
| 762 | + if _message_matches_focus_path(message, anchors) | |
| 763 | + ] | |
| 764 | + return filtered or messages | |
| 765 | + | |
| 766 | + | |
| 767 | +def _focus_path_anchors(focus_path: str) -> tuple[str, ...]: | |
| 768 | + normalized_focus = _normalize_path_candidate(focus_path) or str( | |
| 769 | + Path(focus_path).expanduser() | |
| 770 | + ) | |
| 771 | + focus = Path(normalized_focus).expanduser() | |
| 772 | + anchors = {str(focus)} | |
| 773 | + | |
| 774 | + resolved_index = html_toc_rule.resolve_html_toc_index_path(focus) | |
| 775 | + if resolved_index is not None: | |
| 776 | + anchors.add(str(resolved_index)) | |
| 777 | + anchors.add(str(resolved_index.parent)) | |
| 778 | + anchors.add(str(resolved_index.parent / "chapters")) | |
| 779 | + else: | |
| 780 | + anchors.add(str(focus.parent)) | |
| 781 | + | |
| 782 | + return tuple(anchor for anchor in anchors if anchor) | |
| 783 | + | |
| 784 | + | |
| 785 | +def _message_matches_focus_path(message: Message, anchors: tuple[str, ...]) -> bool: | |
| 786 | + if any(anchor in str(message.content or "") for anchor in anchors): | |
| 787 | + return True | |
| 788 | + | |
| 789 | + for tool_call in message.tool_calls: | |
| 790 | + for key in ("file_path", "path", "cwd"): | |
| 791 | + value = tool_call.arguments.get(key) | |
| 792 | + if isinstance(value, str) and any(anchor in value for anchor in anchors): | |
| 793 | + return True | |
| 794 | + return False | |
src/loader/runtime/dod.pymodified@@ -12,6 +12,7 @@ from typing import Any, Literal | ||
| 12 | 12 | |
| 13 | 13 | from ..llm.base import ToolCall |
| 14 | 14 | from ..tools.shell_tools import BashTool |
| 15 | +from .semantic_rules import html_toc as html_toc_rule | |
| 15 | 16 | from .verification_observations import VerificationAttempt, verification_attempt_id |
| 16 | 17 | |
| 17 | 18 | TaskSize = Literal["small", "standard", "large"] |
@@ -490,72 +491,23 @@ def _derive_html_toc_verification_command( | ||
| 490 | 491 | task_statement: str, |
| 491 | 492 | ) -> str | None: |
| 492 | 493 | task_hints = " ".join([task_statement, *dod.acceptance_criteria]).lower() |
| 493 | - if not any( | |
| 494 | - hint in task_hints | |
| 495 | - for hint in ("href", "link", "links", "table of contents", "chapter title") | |
| 496 | - ): | |
| 494 | + if not html_toc_rule.task_targets_html_toc(task_hints): | |
| 497 | 495 | return None |
| 498 | 496 | |
| 499 | 497 | for path_str in dod.touched_files: |
| 500 | 498 | path = Path(path_str) |
| 501 | 499 | effective_path = path if path.is_absolute() else (project_root / path) |
| 502 | - if effective_path.name != "index.html" or effective_path.suffix != ".html": | |
| 503 | - continue | |
| 504 | - if not (effective_path.parent / "chapters").is_dir(): | |
| 505 | - continue | |
| 506 | - return _build_html_toc_verification_command(effective_path) | |
| 500 | + command = html_toc_rule.build_html_toc_verification_command(effective_path) | |
| 501 | + if command: | |
| 502 | + return command | |
| 507 | 503 | return None |
| 508 | 504 | |
| 509 | 505 | |
| 510 | 506 | def _build_html_toc_verification_command(index_path: Path) -> str: |
| 511 | - path_literal = repr(str(index_path)) | |
| 512 | - return "\n".join( | |
| 513 | - [ | |
| 514 | - "python3 - <<'PY'", | |
| 515 | - "from pathlib import Path", | |
| 516 | - "import re", | |
| 517 | - "import sys", | |
| 518 | - "", | |
| 519 | - f"index = Path({path_literal}).expanduser()", | |
| 520 | - "root = index.parent", | |
| 521 | - "text = index.read_text()", | |
| 522 | - "section_match = re.search(r'<ul class=\"chapter-list\">(.*?)</ul>', text, re.S)", | |
| 523 | - "if section_match is None:", | |
| 524 | - " print('Missing chapter-list table of contents', file=sys.stderr)", | |
| 525 | - " raise SystemExit(1)", | |
| 526 | - "links = re.findall(r'<a href=\"([^\"]+)\">([^<]+)</a>', section_match.group(1))", | |
| 527 | - "if not links:", | |
| 528 | - " print('No chapter links found in table of contents', file=sys.stderr)", | |
| 529 | - " raise SystemExit(1)", | |
| 530 | - "", | |
| 531 | - "missing = []", | |
| 532 | - "mismatched = []", | |
| 533 | - "for href, label in links:", | |
| 534 | - " target = (root / href).resolve()", | |
| 535 | - " if not target.exists():", | |
| 536 | - " missing.append(f'{href} -> missing')", | |
| 537 | - " continue", | |
| 538 | - " body = target.read_text()", | |
| 539 | - " match = re.search(r'<h1>(.*?)</h1>', body, re.S)", | |
| 540 | - " title = match.group(1).strip() if match else ''", | |
| 541 | - " if title and label.strip() != title:", | |
| 542 | - " mismatched.append(f'{href} -> {label.strip()} != {title}')", | |
| 543 | - "", | |
| 544 | - "if missing or mismatched:", | |
| 545 | - " if missing:", | |
| 546 | - " print('Missing links:', file=sys.stderr)", | |
| 547 | - " for item in missing:", | |
| 548 | - " print(item, file=sys.stderr)", | |
| 549 | - " if mismatched:", | |
| 550 | - " print('Title mismatches:', file=sys.stderr)", | |
| 551 | - " for item in mismatched:", | |
| 552 | - " print(item, file=sys.stderr)", | |
| 553 | - " raise SystemExit(1)", | |
| 554 | - "", | |
| 555 | - "print(f'validated {len(links)} toc links in {index.name}')", | |
| 556 | - "PY", | |
| 557 | - ] | |
| 558 | - ) | |
| 507 | + command = html_toc_rule.build_html_toc_verification_command(index_path) | |
| 508 | + if command is None: | |
| 509 | + raise ValueError(f"{index_path} is not a valid HTML TOC target") | |
| 510 | + return command | |
| 559 | 511 | |
| 560 | 512 | |
| 561 | 513 | def _first_non_empty_line(text: str) -> str: |
src/loader/runtime/finalization.pymodified@@ -28,6 +28,7 @@ from .executor import ToolExecutor | ||
| 28 | 28 | from .logging import get_runtime_logger |
| 29 | 29 | from .memory import MemoryStore |
| 30 | 30 | from .policy_timeline import append_verification_timeline_entry |
| 31 | +from .semantic_rules import html_toc as html_toc_rule | |
| 31 | 32 | from .session import normalize_usage |
| 32 | 33 | from .tracing import RuntimeTracer |
| 33 | 34 | from .verification_observations import ( |
@@ -780,7 +781,6 @@ def _verification_observation_from_evidence( | ||
| 780 | 781 | attempt_id: str | None, |
| 781 | 782 | attempt_number: int | None, |
| 782 | 783 | ) -> VerificationObservation: |
| 783 | - command = evidence.command or "verification" | |
| 784 | 784 | return VerificationObservation( |
| 785 | 785 | status=( |
| 786 | 786 | VerificationObservationStatus.SKIPPED.value |
@@ -960,45 +960,26 @@ def _extract_verification_repairs( | ||
| 960 | 960 | fixes: list[str] = [] |
| 961 | 961 | for evidence in evidence_items: |
| 962 | 962 | for candidate in (evidence.stderr, evidence.output, evidence.stdout): |
| 963 | - missing, mismatches = _parse_verification_failures(str(candidate)) | |
| 963 | + missing, mismatches = html_toc_rule.parse_html_toc_verification_failures( | |
| 964 | + str(candidate) | |
| 965 | + ) | |
| 964 | 966 | for href in missing: |
| 965 | - item = f"Fix the missing TOC href `{href}` in `index.html`." | |
| 967 | + item = ( | |
| 968 | + f"Fix the missing TOC href `{href}` in the target HTML " | |
| 969 | + "table-of-contents page." | |
| 970 | + ) | |
| 966 | 971 | if item not in fixes: |
| 967 | 972 | fixes.append(item) |
| 968 | 973 | for mismatch in mismatches: |
| 969 | - item = f"Fix the TOC label mismatch `{mismatch}`." | |
| 974 | + item = ( | |
| 975 | + f"Fix the TOC label mismatch `{mismatch}` in the target HTML " | |
| 976 | + "table-of-contents page." | |
| 977 | + ) | |
| 970 | 978 | if item not in fixes: |
| 971 | 979 | fixes.append(item) |
| 972 | 980 | return fixes |
| 973 | 981 | |
| 974 | 982 | |
| 975 | -def _parse_verification_failures(text: str) -> tuple[list[str], list[str]]: | |
| 976 | - missing: list[str] = [] | |
| 977 | - mismatches: list[str] = [] | |
| 978 | - mode: str | None = None | |
| 979 | - | |
| 980 | - for raw_line in text.splitlines(): | |
| 981 | - line = raw_line.strip() | |
| 982 | - if not line: | |
| 983 | - continue | |
| 984 | - lowered = line.lower() | |
| 985 | - if lowered == "missing links:": | |
| 986 | - mode = "missing" | |
| 987 | - continue | |
| 988 | - if lowered == "title mismatches:": | |
| 989 | - mode = "mismatch" | |
| 990 | - continue | |
| 991 | - if mode == "missing" and "->" in line: | |
| 992 | - href = line.split("->", 1)[0].strip() | |
| 993 | - if href and href not in missing: | |
| 994 | - missing.append(href) | |
| 995 | - continue | |
| 996 | - if mode == "mismatch" and "!=" in line and line not in mismatches: | |
| 997 | - mismatches.append(line) | |
| 998 | - | |
| 999 | - return missing, mismatches | |
| 1000 | - | |
| 1001 | - | |
| 1002 | 983 | def _classify_verification_kind(command: str) -> str: |
| 1003 | 984 | """Classify the verification command into a summary kind.""" |
| 1004 | 985 | |
src/loader/runtime/safeguard_services.pymodified@@ -4,10 +4,11 @@ from __future__ import annotations | ||
| 4 | 4 | |
| 5 | 5 | import re |
| 6 | 6 | import shlex |
| 7 | -from difflib import get_close_matches | |
| 8 | 7 | from dataclasses import dataclass |
| 8 | +from difflib import get_close_matches | |
| 9 | 9 | from pathlib import Path |
| 10 | 10 | |
| 11 | +from .semantic_rules import html_toc as html_toc_rule | |
| 11 | 12 | |
| 12 | 13 | TEXT_REWRITE_SUFFIXES = frozenset( |
| 13 | 14 | { |
@@ -133,251 +134,6 @@ def extract_shell_text_rewrite_target(command: str) -> str | None: | ||
| 133 | 134 | return None |
| 134 | 135 | |
| 135 | 136 | |
| 136 | -def extract_html_title_from_text(payload: str) -> str | None: | |
| 137 | - """Extract one human-readable HTML title from raw file contents.""" | |
| 138 | - | |
| 139 | - for pattern in (r"<h1[^>]*>(.*?)</h1>", r"<title[^>]*>(.*?)</title>"): | |
| 140 | - match = re.search(pattern, payload, re.IGNORECASE | re.DOTALL) | |
| 141 | - if not match: | |
| 142 | - continue | |
| 143 | - title = re.sub(r"<[^>]+>", " ", match.group(1)) | |
| 144 | - normalized = " ".join(title.split()).strip() | |
| 145 | - if normalized: | |
| 146 | - return normalized | |
| 147 | - return None | |
| 148 | - | |
| 149 | - | |
| 150 | -def read_html_title(path: Path) -> str: | |
| 151 | - """Read one HTML file title for inventory and validation helpers.""" | |
| 152 | - | |
| 153 | - try: | |
| 154 | - return extract_html_title_from_text(path.read_text()) or "" | |
| 155 | - except OSError: | |
| 156 | - return "" | |
| 157 | - | |
| 158 | - | |
| 159 | -def format_html_inventory_entry(root: Path, candidate: Path) -> str: | |
| 160 | - """Format one exact href/title pair for model-facing guidance.""" | |
| 161 | - | |
| 162 | - normalized_root = root.expanduser().resolve(strict=False) | |
| 163 | - normalized_candidate = candidate.expanduser().resolve(strict=False) | |
| 164 | - try: | |
| 165 | - href = str(normalized_candidate.relative_to(normalized_root)) | |
| 166 | - except ValueError: | |
| 167 | - href = normalized_candidate.name | |
| 168 | - title = read_html_title(candidate) | |
| 169 | - if title: | |
| 170 | - return f"{href} = {title}" | |
| 171 | - return href | |
| 172 | - | |
| 173 | - | |
| 174 | -def _collect_html_inventory_entries(index_path: str | Path) -> list[tuple[str, str]]: | |
| 175 | - """Return exact href/title pairs for sibling HTML chapters.""" | |
| 176 | - | |
| 177 | - index = Path(index_path).expanduser() | |
| 178 | - if index.name != "index.html": | |
| 179 | - return [] | |
| 180 | - | |
| 181 | - chapters_dir = index.parent / "chapters" | |
| 182 | - if not chapters_dir.is_dir(): | |
| 183 | - return [] | |
| 184 | - | |
| 185 | - entries: list[tuple[str, str]] = [] | |
| 186 | - for candidate in sorted(chapters_dir.glob("*.html")): | |
| 187 | - if not candidate.is_file(): | |
| 188 | - continue | |
| 189 | - title = read_html_title(candidate) | |
| 190 | - if not title: | |
| 191 | - continue | |
| 192 | - href = format_html_inventory_entry(index.parent, candidate).split(" = ", 1)[0] | |
| 193 | - entries.append((href, title)) | |
| 194 | - return entries | |
| 195 | - | |
| 196 | - | |
| 197 | -def summarize_html_inventory( | |
| 198 | - index_path: str | Path, | |
| 199 | - *, | |
| 200 | - limit: int | None = 12, | |
| 201 | -) -> str | None: | |
| 202 | - """Summarize the existing sibling HTML inventory for one index page.""" | |
| 203 | - | |
| 204 | - index = Path(index_path).expanduser() | |
| 205 | - if index.name != "index.html": | |
| 206 | - return None | |
| 207 | - | |
| 208 | - entries = [f"{href} = {title}" for href, title in _collect_html_inventory_entries(index)] | |
| 209 | - if not entries: | |
| 210 | - return None | |
| 211 | - | |
| 212 | - if limit is not None and len(entries) > limit: | |
| 213 | - return "; ".join(entries[:limit]) + "; ..." | |
| 214 | - return "; ".join(entries) | |
| 215 | - | |
| 216 | - | |
| 217 | -def extract_html_toc_excerpt( | |
| 218 | - index_path: str | Path, | |
| 219 | - *, | |
| 220 | - max_lines: int = 16, | |
| 221 | -) -> str | None: | |
| 222 | - """Extract the current HTML table-of-contents block for recovery guidance.""" | |
| 223 | - | |
| 224 | - index = Path(index_path).expanduser() | |
| 225 | - if index.name != "index.html": | |
| 226 | - return None | |
| 227 | - | |
| 228 | - try: | |
| 229 | - text = index.read_text() | |
| 230 | - except OSError: | |
| 231 | - return None | |
| 232 | - | |
| 233 | - match = re.search( | |
| 234 | - r"(<h2[^>]*>\s*Table of Contents\s*</h2>.*?</ul>)", | |
| 235 | - text, | |
| 236 | - re.IGNORECASE | re.DOTALL, | |
| 237 | - ) | |
| 238 | - if not match: | |
| 239 | - match = re.search( | |
| 240 | - r"(<ul[^>]*class=\"[^\"]*chapter-list[^\"]*\"[^>]*>.*?</ul>)", | |
| 241 | - text, | |
| 242 | - re.IGNORECASE | re.DOTALL, | |
| 243 | - ) | |
| 244 | - if not match: | |
| 245 | - return None | |
| 246 | - | |
| 247 | - snippet_lines = [line.rstrip() for line in match.group(1).splitlines() if line.strip()] | |
| 248 | - if not snippet_lines: | |
| 249 | - return None | |
| 250 | - if len(snippet_lines) > max_lines: | |
| 251 | - snippet_lines = snippet_lines[:max_lines] + ["..."] | |
| 252 | - return "\n".join(snippet_lines) | |
| 253 | - | |
| 254 | - | |
| 255 | -def build_html_toc_replacement_block(index_path: str | Path) -> str | None: | |
| 256 | - """Build one exact replacement TOC block from the verified sibling inventory.""" | |
| 257 | - | |
| 258 | - entries = _collect_html_inventory_entries(index_path) | |
| 259 | - if not entries: | |
| 260 | - return None | |
| 261 | - | |
| 262 | - excerpt = extract_html_toc_excerpt(index_path, max_lines=64) | |
| 263 | - excerpt_lines = excerpt.splitlines() if excerpt else [] | |
| 264 | - | |
| 265 | - heading_line = next( | |
| 266 | - (line.rstrip() for line in excerpt_lines if "<h2" in line.lower()), | |
| 267 | - "<h2>Table of Contents</h2>", | |
| 268 | - ) | |
| 269 | - ul_line = next( | |
| 270 | - ( | |
| 271 | - line.rstrip() | |
| 272 | - for line in excerpt_lines | |
| 273 | - if "<ul" in line.lower() and "chapter-list" in line.lower() | |
| 274 | - ), | |
| 275 | - ' <ul class="chapter-list">', | |
| 276 | - ) | |
| 277 | - li_indent = next( | |
| 278 | - ( | |
| 279 | - re.match(r"^\s*", line).group(0) | |
| 280 | - for line in excerpt_lines | |
| 281 | - if "<li><a " in line | |
| 282 | - ), | |
| 283 | - re.match(r"^\s*", ul_line).group(0) + " ", | |
| 284 | - ) | |
| 285 | - closing_line = next( | |
| 286 | - (line.rstrip() for line in excerpt_lines if "</ul>" in line.lower()), | |
| 287 | - f"{re.match(r'^\s*', ul_line).group(0)}</ul>", | |
| 288 | - ) | |
| 289 | - | |
| 290 | - lines = [heading_line, ul_line] | |
| 291 | - lines.extend( | |
| 292 | - f'{li_indent}<li><a href="{href}">{title}</a></li>' | |
| 293 | - for href, title in entries | |
| 294 | - ) | |
| 295 | - lines.append(closing_line) | |
| 296 | - return "\n".join(lines) | |
| 297 | - | |
| 298 | - | |
| 299 | -def build_html_toc_edit_call_template(index_path: str | Path) -> str | None: | |
| 300 | - """Build one concrete `edit(...)` template for replacing the TOC block.""" | |
| 301 | - | |
| 302 | - index = Path(index_path).expanduser() | |
| 303 | - excerpt = extract_html_toc_excerpt(index, max_lines=64) | |
| 304 | - replacement = build_html_toc_replacement_block(index) | |
| 305 | - if not excerpt or not replacement: | |
| 306 | - return None | |
| 307 | - | |
| 308 | - return "\n".join( | |
| 309 | - [ | |
| 310 | - "edit(", | |
| 311 | - f' file_path="{index}",', | |
| 312 | - ' old_string="""', | |
| 313 | - excerpt, | |
| 314 | - '""",', | |
| 315 | - ' new_string="""', | |
| 316 | - replacement, | |
| 317 | - '"""', | |
| 318 | - ")", | |
| 319 | - ] | |
| 320 | - ) | |
| 321 | - | |
| 322 | - | |
| 323 | -@dataclass(frozen=True) | |
| 324 | -class HtmlTocValidationResult: | |
| 325 | - """Semantic validation result for one chapter-list table of contents.""" | |
| 326 | - | |
| 327 | - valid: bool | |
| 328 | - link_count: int | |
| 329 | - missing: tuple[str, ...] = () | |
| 330 | - mismatched: tuple[str, ...] = () | |
| 331 | - | |
| 332 | - | |
| 333 | -def validate_html_toc(index_path: str | Path) -> HtmlTocValidationResult | None: | |
| 334 | - """Validate that one HTML index TOC points at real chapter files with matching titles.""" | |
| 335 | - | |
| 336 | - index = Path(index_path).expanduser() | |
| 337 | - if index.name != "index.html": | |
| 338 | - return None | |
| 339 | - | |
| 340 | - try: | |
| 341 | - text = index.read_text() | |
| 342 | - except OSError: | |
| 343 | - return None | |
| 344 | - | |
| 345 | - section_match = re.search(r'<ul class="chapter-list">(.*?)</ul>', text, re.S) | |
| 346 | - if section_match is None: | |
| 347 | - return HtmlTocValidationResult( | |
| 348 | - valid=False, | |
| 349 | - link_count=0, | |
| 350 | - missing=("Missing chapter-list table of contents",), | |
| 351 | - ) | |
| 352 | - | |
| 353 | - links = re.findall(r'<a href="([^"]+)">([^<]+)</a>', section_match.group(1)) | |
| 354 | - if not links: | |
| 355 | - return HtmlTocValidationResult( | |
| 356 | - valid=False, | |
| 357 | - link_count=0, | |
| 358 | - missing=("No chapter links found in table of contents",), | |
| 359 | - ) | |
| 360 | - | |
| 361 | - root = index.parent | |
| 362 | - missing: list[str] = [] | |
| 363 | - mismatched: list[str] = [] | |
| 364 | - for href, label in links: | |
| 365 | - target = (root / href).expanduser().resolve(strict=False) | |
| 366 | - if not target.exists(): | |
| 367 | - missing.append(f"{href} -> missing") | |
| 368 | - continue | |
| 369 | - title = read_html_title(target) | |
| 370 | - if title and label.strip() != title: | |
| 371 | - mismatched.append(f"{href} -> {label.strip()} != {title}") | |
| 372 | - | |
| 373 | - return HtmlTocValidationResult( | |
| 374 | - valid=not missing and not mismatched, | |
| 375 | - link_count=len(links), | |
| 376 | - missing=tuple(missing), | |
| 377 | - mismatched=tuple(mismatched), | |
| 378 | - ) | |
| 379 | - | |
| 380 | - | |
| 381 | 137 | class ActionTracker: |
| 382 | 138 | """Tracks completed actions to prevent duplicates and detect loops.""" |
| 383 | 139 | |
@@ -498,7 +254,7 @@ class ActionTracker: | ||
| 498 | 254 | """Record that one index currently satisfies the semantic chapter-link check.""" |
| 499 | 255 | |
| 500 | 256 | normalized = self._normalize_path(index_path) |
| 501 | - if Path(normalized).name != "index.html": | |
| 257 | + if not html_toc_rule.is_html_toc_index_path(normalized): | |
| 502 | 258 | return |
| 503 | 259 | self._validated_html_tocs[normalized] = self._mutation_epoch |
| 504 | 260 | |
@@ -507,7 +263,7 @@ class ActionTracker: | ||
| 507 | 263 | |
| 508 | 264 | normalized = self._normalize_path(index_path) |
| 509 | 265 | path = Path(normalized) |
| 510 | - chapters_dir = path if path.name == "chapters" else path.parent / "chapters" | |
| 266 | + chapters_dir = path if html_toc_rule.is_html_toc_chapters_dir(path) else path.parent / "chapters" | |
| 511 | 267 | self._verified_html_inventory_dirs.add(self._normalize_path(str(chapters_dir))) |
| 512 | 268 | |
| 513 | 269 | def check_tool_call(self, tool_name: str, arguments: dict) -> tuple[bool, str]: |
@@ -928,7 +684,7 @@ class ActionTracker: | ||
| 928 | 684 | return |
| 929 | 685 | normalized_path = self._normalize_path(file_path) |
| 930 | 686 | path = Path(normalized_path) |
| 931 | - if path.suffix != ".html" or path.name == "index.html" or path.parent.name != "chapters": | |
| 687 | + if not html_toc_rule.is_html_toc_chapter_file(path): | |
| 932 | 688 | return |
| 933 | 689 | |
| 934 | 690 | directory = str(path.parent) |
@@ -959,7 +715,7 @@ class ActionTracker: | ||
| 959 | 715 | return False, "" |
| 960 | 716 | normalized_path = self._normalize_path(file_path) |
| 961 | 717 | path = Path(normalized_path) |
| 962 | - if path.name != "index.html": | |
| 718 | + if not html_toc_rule.is_html_toc_index_path(path): | |
| 963 | 719 | return False, "" |
| 964 | 720 | chapters_dir = str(path.parent / "chapters") |
| 965 | 721 | chapter_count = self._chapter_evidence_count(chapters_dir) |
@@ -976,9 +732,10 @@ class ActionTracker: | ||
| 976 | 732 | return False, "" |
| 977 | 733 | return ( |
| 978 | 734 | True, |
| 979 | - "Already confirmed multiple chapter files in the sibling chapters " | |
| 980 | - "directory; reuse the known file/title evidence and update index.html " | |
| 981 | - "instead of rereading it", | |
| 735 | + "Already confirmed multiple linked chapter files in " | |
| 736 | + f"{html_toc_rule.describe_html_toc_chapters_dir(path)}; reuse that file/title " | |
| 737 | + f"evidence and update {html_toc_rule.describe_html_toc_target(path)} instead of " | |
| 738 | + "rereading it", | |
| 982 | 739 | ) |
| 983 | 740 | |
| 984 | 741 | if tool_name in {"glob", "grep"}: |
@@ -987,7 +744,7 @@ class ActionTracker: | ||
| 987 | 744 | return False, "" |
| 988 | 745 | normalized_path = self._normalize_path(search_path) |
| 989 | 746 | path = Path(normalized_path) |
| 990 | - if path.name != "chapters": | |
| 747 | + if not html_toc_rule.is_html_toc_chapters_dir(path): | |
| 991 | 748 | return False, "" |
| 992 | 749 | chapter_count = self._chapter_evidence_count(str(path)) |
| 993 | 750 | if chapter_count < self.HTML_CHAPTER_EVIDENCE_THRESHOLD: |
@@ -997,9 +754,10 @@ class ActionTracker: | ||
| 997 | 754 | return False, "" |
| 998 | 755 | return ( |
| 999 | 756 | True, |
| 1000 | - "Already confirmed multiple chapter files in this directory; reuse " | |
| 1001 | - "the known filename/title evidence and update the target index instead " | |
| 1002 | - "of rerunning the directory search", | |
| 757 | + "Already confirmed multiple linked chapter files in " | |
| 758 | + f"{html_toc_rule.describe_html_toc_chapters_dir(path)}; reuse that filename/title " | |
| 759 | + f"evidence and update {html_toc_rule.describe_html_toc_target(path)} instead of " | |
| 760 | + "rerunning the directory search", | |
| 1003 | 761 | ) |
| 1004 | 762 | |
| 1005 | 763 | return False, "" |
@@ -1026,9 +784,7 @@ class ActionTracker: | ||
| 1026 | 784 | if self._matches_validated_html_toc(path): |
| 1027 | 785 | return ( |
| 1028 | 786 | True, |
| 1029 | - "The current index.html already passes the validated chapter-link " | |
| 1030 | - "check; stop rereading index.html or chapters/ and finish the task " | |
| 1031 | - "unless a specific href or title is still unresolved", | |
| 787 | + html_toc_rule.build_validated_html_toc_observation_reason(path), | |
| 1032 | 788 | ) |
| 1033 | 789 | return False, "" |
| 1034 | 790 | |
@@ -1045,9 +801,7 @@ class ActionTracker: | ||
| 1045 | 801 | if self._matches_verified_html_inventory(path): |
| 1046 | 802 | return ( |
| 1047 | 803 | True, |
| 1048 | - "The verified chapter inventory already lists the exact href/title " | |
| 1049 | - "pairs for this directory; update index.html from that inventory " | |
| 1050 | - "instead of rereading chapter files", | |
| 804 | + html_toc_rule.build_verified_html_inventory_observation_reason(path), | |
| 1051 | 805 | ) |
| 1052 | 806 | return False, "" |
| 1053 | 807 | |
@@ -1396,7 +1150,7 @@ class PreActionValidator: | ||
| 1396 | 1150 | content: str, |
| 1397 | 1151 | ) -> ValidationResult: |
| 1398 | 1152 | normalized = Path(file_path).expanduser() |
| 1399 | - if normalized.name != "index.html" or "<a " not in content: | |
| 1153 | + if not html_toc_rule.is_html_toc_index_path(normalized) or "<a " not in content: | |
| 1400 | 1154 | return ValidationResult(valid=True) |
| 1401 | 1155 | |
| 1402 | 1156 | link_pairs = re.findall(r'<a\s+href="([^"]+)">([^<]+)</a>', content) |
@@ -1413,7 +1167,7 @@ class PreActionValidator: | ||
| 1413 | 1167 | missing.append(href) |
| 1414 | 1168 | continue |
| 1415 | 1169 | |
| 1416 | - title = read_html_title(target) | |
| 1170 | + title = html_toc_rule.read_html_title(target) | |
| 1417 | 1171 | if title and label.strip() != title: |
| 1418 | 1172 | if href not in mismatched: |
| 1419 | 1173 | mismatched.append(href) |
@@ -1421,7 +1175,7 @@ class PreActionValidator: | ||
| 1421 | 1175 | if missing: |
| 1422 | 1176 | suggestions = self._suggest_existing_html_targets(root, missing) |
| 1423 | 1177 | preview_items = [ |
| 1424 | - format_html_inventory_entry(root, root / suggestion) | |
| 1178 | + html_toc_rule.format_html_inventory_entry(root, root / suggestion) | |
| 1425 | 1179 | for suggestion in suggestions |
| 1426 | 1180 | ] |
| 1427 | 1181 | if not preview_items: |
@@ -1433,7 +1187,8 @@ class PreActionValidator: | ||
| 1433 | 1187 | valid=False, |
| 1434 | 1188 | reason="Edited TOC references chapter files that do not exist", |
| 1435 | 1189 | suggestion=( |
| 1436 | - "Use only existing chapter href/title pairs from beside index.html, for example: " | |
| 1190 | + f"Use only existing chapter href/title pairs from beside " | |
| 1191 | + f"{html_toc_rule.describe_html_toc_target(normalized)}, for example: " | |
| 1437 | 1192 | f"{preview}" |
| 1438 | 1193 | ), |
| 1439 | 1194 | severity="error", |
@@ -1441,7 +1196,7 @@ class PreActionValidator: | ||
| 1441 | 1196 | |
| 1442 | 1197 | if mismatched: |
| 1443 | 1198 | exact_entries = [ |
| 1444 | - format_html_inventory_entry(root, (root / href).resolve(strict=False)) | |
| 1199 | + html_toc_rule.format_html_inventory_entry(root, (root / href).resolve(strict=False)) | |
| 1445 | 1200 | for href in mismatched |
| 1446 | 1201 | if (root / href).resolve(strict=False).exists() |
| 1447 | 1202 | ] |
@@ -1454,7 +1209,8 @@ class PreActionValidator: | ||
| 1454 | 1209 | valid=False, |
| 1455 | 1210 | reason="Edited TOC labels do not match the linked chapter titles", |
| 1456 | 1211 | suggestion=( |
| 1457 | - "Copy the exact href/title pair from the linked HTML file, for example: " | |
| 1212 | + f"Copy the exact href/title pair from the linked HTML file for " | |
| 1213 | + f"{html_toc_rule.describe_html_toc_target(normalized)}, for example: " | |
| 1458 | 1214 | f"{preview}" |
| 1459 | 1215 | ), |
| 1460 | 1216 | severity="error", |
src/loader/runtime/semantic_rules/__init__.pyadded@@ -0,0 +1,2 @@ | ||
| 1 | +"""Internal semantic rule helpers for specialized task classes.""" | |
| 2 | + | |
src/loader/runtime/semantic_rules/html_toc.pyadded@@ -0,0 +1,506 @@ | ||
| 1 | +"""Internal semantic rule helpers for HTML table-of-contents repair tasks.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import re | |
| 6 | +from dataclasses import dataclass | |
| 7 | +from pathlib import Path | |
| 8 | + | |
| 9 | +HTML_TOC_REPAIR_PATTERNS = ( | |
| 10 | + r"\bfix(?:ing|ed)?\b", | |
| 11 | + r"\bcorrect(?:ing|ed)?\b", | |
| 12 | + r"\brepair(?:ing|ed)?\b", | |
| 13 | + r"\bupdate(?:d|s|ing)?\b", | |
| 14 | + r"\bsync(?:hronize|hronized|ing)?\b", | |
| 15 | + r"\balign(?:ed|ing)?\b", | |
| 16 | + r"\bmatch(?:es|ed|ing)?\b", | |
| 17 | + r"\bwrong\b", | |
| 18 | + r"\bincorrect\b", | |
| 19 | + r"\binaccurate\b", | |
| 20 | + r"\bbroken\b", | |
| 21 | + r"\bmismatche?d\b", | |
| 22 | + r"\bmissing\b", | |
| 23 | +) | |
| 24 | +HTML_TOC_SUBJECT_HINTS = ( | |
| 25 | + "table of contents", | |
| 26 | + "toc", | |
| 27 | + "href", | |
| 28 | + "hrefs", | |
| 29 | + "link text", | |
| 30 | + "chapter link", | |
| 31 | + "chapter links", | |
| 32 | + "chapter title", | |
| 33 | + "chapter titles", | |
| 34 | +) | |
| 35 | +HTML_TOC_TARGET_HINTS = ( | |
| 36 | + "index.html", | |
| 37 | + "index page", | |
| 38 | + "index table of contents", | |
| 39 | + "chapters/", | |
| 40 | + "/chapters", | |
| 41 | + "chapters directory", | |
| 42 | + "chapter directory", | |
| 43 | +) | |
| 44 | + | |
| 45 | + | |
| 46 | +def task_targets_html_toc(task_text: str | None) -> bool: | |
| 47 | + """Return True when task text clearly targets one HTML TOC repair flow.""" | |
| 48 | + | |
| 49 | + lowered = str(task_text or "").strip().lower() | |
| 50 | + if not lowered: | |
| 51 | + return False | |
| 52 | + has_subject = any(hint in lowered for hint in HTML_TOC_SUBJECT_HINTS) | |
| 53 | + has_target = any(hint in lowered for hint in HTML_TOC_TARGET_HINTS) | |
| 54 | + has_repair_intent = any( | |
| 55 | + re.search(pattern, lowered) is not None for pattern in HTML_TOC_REPAIR_PATTERNS | |
| 56 | + ) | |
| 57 | + return has_subject and has_target and has_repair_intent | |
| 58 | + | |
| 59 | + | |
| 60 | +def is_html_toc_index_path(path_value: str | Path) -> bool: | |
| 61 | + """Return True when one path is the TOC index target.""" | |
| 62 | + | |
| 63 | + path = Path(path_value).expanduser() | |
| 64 | + return path.name == "index.html" and path.suffix.lower() in {".html", ".htm"} | |
| 65 | + | |
| 66 | + | |
| 67 | +def is_html_toc_chapters_dir(path_value: str | Path) -> bool: | |
| 68 | + """Return True when one path is the sibling chapters directory.""" | |
| 69 | + | |
| 70 | + return Path(path_value).expanduser().name == "chapters" | |
| 71 | + | |
| 72 | + | |
| 73 | +def is_html_toc_chapter_file(path_value: str | Path) -> bool: | |
| 74 | + """Return True when one path is a chapter HTML file beside the TOC index.""" | |
| 75 | + | |
| 76 | + path = Path(path_value).expanduser() | |
| 77 | + return ( | |
| 78 | + path.suffix.lower() in {".html", ".htm"} | |
| 79 | + and path.name != "index.html" | |
| 80 | + and path.parent.name == "chapters" | |
| 81 | + ) | |
| 82 | + | |
| 83 | + | |
| 84 | +def resolve_html_toc_index_path(path_value: str | Path) -> Path | None: | |
| 85 | + """Resolve a related TOC path back to its index target.""" | |
| 86 | + | |
| 87 | + candidate = Path(path_value).expanduser() | |
| 88 | + if is_html_toc_index_path(candidate): | |
| 89 | + return candidate | |
| 90 | + if is_html_toc_chapters_dir(candidate): | |
| 91 | + return candidate.parent / "index.html" | |
| 92 | + if is_html_toc_chapter_file(candidate): | |
| 93 | + return candidate.parent.parent / "index.html" | |
| 94 | + return None | |
| 95 | + | |
| 96 | + | |
| 97 | +def describe_html_toc_target(path_value: str | Path) -> str: | |
| 98 | + """Return one model-facing label for the active TOC target.""" | |
| 99 | + | |
| 100 | + index = resolve_html_toc_index_path(path_value) | |
| 101 | + if index is None: | |
| 102 | + return "`the target HTML table-of-contents page`" | |
| 103 | + return f"`{index}`" | |
| 104 | + | |
| 105 | + | |
| 106 | +def describe_html_toc_chapters_dir(path_value: str | Path) -> str: | |
| 107 | + """Return one model-facing label for the sibling chapter directory.""" | |
| 108 | + | |
| 109 | + index = resolve_html_toc_index_path(path_value) | |
| 110 | + if index is None: | |
| 111 | + return "`the sibling chapter directory`" | |
| 112 | + return f"`{index.parent / 'chapters'}`" | |
| 113 | + | |
| 114 | + | |
| 115 | +def extract_html_title_from_text(payload: str) -> str | None: | |
| 116 | + """Extract one human-readable title from raw HTML text.""" | |
| 117 | + | |
| 118 | + for pattern in (r"<h1[^>]*>(.*?)</h1>", r"<title[^>]*>(.*?)</title>"): | |
| 119 | + match = re.search(pattern, payload, re.IGNORECASE | re.DOTALL) | |
| 120 | + if not match: | |
| 121 | + continue | |
| 122 | + title = re.sub(r"<[^>]+>", " ", match.group(1)) | |
| 123 | + normalized = " ".join(title.split()).strip() | |
| 124 | + if normalized: | |
| 125 | + return normalized | |
| 126 | + return None | |
| 127 | + | |
| 128 | + | |
| 129 | +def read_html_title(path: Path) -> str: | |
| 130 | + """Read one HTML file title for inventory and validation helpers.""" | |
| 131 | + | |
| 132 | + try: | |
| 133 | + return extract_html_title_from_text(path.read_text()) or "" | |
| 134 | + except OSError: | |
| 135 | + return "" | |
| 136 | + | |
| 137 | + | |
| 138 | +def format_html_inventory_entry(root: Path, candidate: Path) -> str: | |
| 139 | + """Format one exact href/title pair for model-facing guidance.""" | |
| 140 | + | |
| 141 | + normalized_root = root.expanduser().resolve(strict=False) | |
| 142 | + normalized_candidate = candidate.expanduser().resolve(strict=False) | |
| 143 | + try: | |
| 144 | + href = str(normalized_candidate.relative_to(normalized_root)) | |
| 145 | + except ValueError: | |
| 146 | + href = normalized_candidate.name | |
| 147 | + title = read_html_title(candidate) | |
| 148 | + if title: | |
| 149 | + return f"{href} = {title}" | |
| 150 | + return href | |
| 151 | + | |
| 152 | + | |
| 153 | +def build_validated_html_toc_observation_reason(path_value: str | Path) -> str: | |
| 154 | + """Build a duplicate-observation reason for one already validated TOC target.""" | |
| 155 | + | |
| 156 | + target = describe_html_toc_target(path_value) | |
| 157 | + chapters_dir = describe_html_toc_chapters_dir(path_value) | |
| 158 | + return ( | |
| 159 | + f"The HTML table-of-contents target {target} already passed semantic link " | |
| 160 | + f"validation; reuse that result instead of rereading {target} or its sibling " | |
| 161 | + f"chapter directory {chapters_dir} unless one specific href or label is still " | |
| 162 | + "unresolved" | |
| 163 | + ) | |
| 164 | + | |
| 165 | + | |
| 166 | +def build_verified_html_inventory_observation_reason(path_value: str | Path) -> str: | |
| 167 | + """Build a duplicate-observation reason for one verified chapter inventory.""" | |
| 168 | + | |
| 169 | + target = describe_html_toc_target(path_value) | |
| 170 | + chapters_dir = describe_html_toc_chapters_dir(path_value) | |
| 171 | + return ( | |
| 172 | + f"The verified sibling chapter inventory for {chapters_dir} already contains the " | |
| 173 | + f"exact href/title pairs needed for {target}; reuse that inventory instead of " | |
| 174 | + "rereading chapter files" | |
| 175 | + ) | |
| 176 | + | |
| 177 | + | |
| 178 | +def _collect_html_inventory_entries(index_path: str | Path) -> list[tuple[str, str]]: | |
| 179 | + """Return exact href/title pairs for sibling HTML chapters.""" | |
| 180 | + | |
| 181 | + index = Path(index_path).expanduser() | |
| 182 | + if not is_html_toc_index_path(index): | |
| 183 | + return [] | |
| 184 | + | |
| 185 | + chapters_dir = index.parent / "chapters" | |
| 186 | + if not chapters_dir.is_dir(): | |
| 187 | + return [] | |
| 188 | + | |
| 189 | + entries: list[tuple[str, str]] = [] | |
| 190 | + for candidate in sorted(chapters_dir.glob("*.html")): | |
| 191 | + if not candidate.is_file(): | |
| 192 | + continue | |
| 193 | + title = read_html_title(candidate) | |
| 194 | + if not title: | |
| 195 | + continue | |
| 196 | + href = format_html_inventory_entry(index.parent, candidate).split(" = ", 1)[0] | |
| 197 | + entries.append((href, title)) | |
| 198 | + return entries | |
| 199 | + | |
| 200 | + | |
| 201 | +def summarize_html_inventory( | |
| 202 | + index_path: str | Path, | |
| 203 | + *, | |
| 204 | + limit: int | None = 12, | |
| 205 | +) -> str | None: | |
| 206 | + """Summarize one sibling HTML inventory for an index page.""" | |
| 207 | + | |
| 208 | + index = Path(index_path).expanduser() | |
| 209 | + if not is_html_toc_index_path(index): | |
| 210 | + return None | |
| 211 | + | |
| 212 | + entries = [f"{href} = {title}" for href, title in _collect_html_inventory_entries(index)] | |
| 213 | + if not entries: | |
| 214 | + return None | |
| 215 | + | |
| 216 | + if limit is not None and len(entries) > limit: | |
| 217 | + return "; ".join(entries[:limit]) + "; ..." | |
| 218 | + return "; ".join(entries) | |
| 219 | + | |
| 220 | + | |
| 221 | +def extract_html_toc_excerpt( | |
| 222 | + index_path: str | Path, | |
| 223 | + *, | |
| 224 | + max_lines: int = 16, | |
| 225 | +) -> str | None: | |
| 226 | + """Extract the current TOC block for recovery guidance.""" | |
| 227 | + | |
| 228 | + index = Path(index_path).expanduser() | |
| 229 | + if not is_html_toc_index_path(index): | |
| 230 | + return None | |
| 231 | + | |
| 232 | + try: | |
| 233 | + text = index.read_text() | |
| 234 | + except OSError: | |
| 235 | + return None | |
| 236 | + | |
| 237 | + match = re.search( | |
| 238 | + r"(<h2[^>]*>\s*Table of Contents\s*</h2>.*?</ul>)", | |
| 239 | + text, | |
| 240 | + re.IGNORECASE | re.DOTALL, | |
| 241 | + ) | |
| 242 | + if not match: | |
| 243 | + match = re.search( | |
| 244 | + r"(<ul[^>]*class=\"[^\"]*chapter-list[^\"]*\"[^>]*>.*?</ul>)", | |
| 245 | + text, | |
| 246 | + re.IGNORECASE | re.DOTALL, | |
| 247 | + ) | |
| 248 | + if not match: | |
| 249 | + return None | |
| 250 | + | |
| 251 | + snippet_lines = [line.rstrip() for line in match.group(1).splitlines() if line.strip()] | |
| 252 | + if not snippet_lines: | |
| 253 | + return None | |
| 254 | + if len(snippet_lines) > max_lines: | |
| 255 | + snippet_lines = snippet_lines[:max_lines] + ["..."] | |
| 256 | + return "\n".join(snippet_lines) | |
| 257 | + | |
| 258 | + | |
| 259 | +def build_html_toc_replacement_block(index_path: str | Path) -> str | None: | |
| 260 | + """Build one exact replacement TOC block from the verified sibling inventory.""" | |
| 261 | + | |
| 262 | + entries = _collect_html_inventory_entries(index_path) | |
| 263 | + if not entries: | |
| 264 | + return None | |
| 265 | + | |
| 266 | + excerpt = extract_html_toc_excerpt(index_path, max_lines=64) | |
| 267 | + excerpt_lines = excerpt.splitlines() if excerpt else [] | |
| 268 | + | |
| 269 | + heading_line = next( | |
| 270 | + (line.rstrip() for line in excerpt_lines if "<h2" in line.lower()), | |
| 271 | + "<h2>Table of Contents</h2>", | |
| 272 | + ) | |
| 273 | + ul_line = next( | |
| 274 | + ( | |
| 275 | + line.rstrip() | |
| 276 | + for line in excerpt_lines | |
| 277 | + if "<ul" in line.lower() and "chapter-list" in line.lower() | |
| 278 | + ), | |
| 279 | + ' <ul class="chapter-list">', | |
| 280 | + ) | |
| 281 | + li_indent = next( | |
| 282 | + ( | |
| 283 | + re.match(r"^\s*", line).group(0) | |
| 284 | + for line in excerpt_lines | |
| 285 | + if "<li><a " in line | |
| 286 | + ), | |
| 287 | + re.match(r"^\s*", ul_line).group(0) + " ", | |
| 288 | + ) | |
| 289 | + ul_indent = re.match(r"^\s*", ul_line).group(0) | |
| 290 | + closing_line = next( | |
| 291 | + (line.rstrip() for line in excerpt_lines if "</ul>" in line.lower()), | |
| 292 | + f"{ul_indent}</ul>", | |
| 293 | + ) | |
| 294 | + | |
| 295 | + lines = [heading_line, ul_line] | |
| 296 | + lines.extend( | |
| 297 | + f'{li_indent}<li><a href="{href}">{title}</a></li>' | |
| 298 | + for href, title in entries | |
| 299 | + ) | |
| 300 | + lines.append(closing_line) | |
| 301 | + return "\n".join(lines) | |
| 302 | + | |
| 303 | + | |
| 304 | +def build_html_toc_edit_call_template(index_path: str | Path) -> str | None: | |
| 305 | + """Build one concrete edit template for replacing the TOC block.""" | |
| 306 | + | |
| 307 | + index = Path(index_path).expanduser() | |
| 308 | + excerpt = extract_html_toc_excerpt(index, max_lines=64) | |
| 309 | + replacement = build_html_toc_replacement_block(index) | |
| 310 | + if not excerpt or not replacement: | |
| 311 | + return None | |
| 312 | + | |
| 313 | + return "\n".join( | |
| 314 | + [ | |
| 315 | + "edit(", | |
| 316 | + f' file_path="{index}",', | |
| 317 | + ' old_string="""', | |
| 318 | + excerpt, | |
| 319 | + '""",', | |
| 320 | + ' new_string="""', | |
| 321 | + replacement, | |
| 322 | + '"""', | |
| 323 | + ")", | |
| 324 | + ] | |
| 325 | + ) | |
| 326 | + | |
| 327 | + | |
| 328 | +@dataclass(frozen=True) | |
| 329 | +class HtmlTocValidationResult: | |
| 330 | + """Semantic validation result for one chapter-list table of contents.""" | |
| 331 | + | |
| 332 | + valid: bool | |
| 333 | + link_count: int | |
| 334 | + missing: tuple[str, ...] = () | |
| 335 | + mismatched: tuple[str, ...] = () | |
| 336 | + | |
| 337 | + | |
| 338 | +def validate_html_toc(index_path: str | Path) -> HtmlTocValidationResult | None: | |
| 339 | + """Validate that one HTML TOC points at real chapter files with matching titles.""" | |
| 340 | + | |
| 341 | + index = Path(index_path).expanduser() | |
| 342 | + if not is_html_toc_index_path(index): | |
| 343 | + return None | |
| 344 | + | |
| 345 | + try: | |
| 346 | + text = index.read_text() | |
| 347 | + except OSError: | |
| 348 | + return None | |
| 349 | + | |
| 350 | + section_match = re.search(r'<ul class="chapter-list">(.*?)</ul>', text, re.S) | |
| 351 | + if section_match is None: | |
| 352 | + return HtmlTocValidationResult( | |
| 353 | + valid=False, | |
| 354 | + link_count=0, | |
| 355 | + missing=("Missing chapter-list table of contents",), | |
| 356 | + ) | |
| 357 | + | |
| 358 | + links = re.findall(r'<a href="([^"]+)">([^<]+)</a>', section_match.group(1)) | |
| 359 | + if not links: | |
| 360 | + return HtmlTocValidationResult( | |
| 361 | + valid=False, | |
| 362 | + link_count=0, | |
| 363 | + missing=("No chapter links found in table of contents",), | |
| 364 | + ) | |
| 365 | + | |
| 366 | + root = index.parent | |
| 367 | + missing: list[str] = [] | |
| 368 | + mismatched: list[str] = [] | |
| 369 | + for href, label in links: | |
| 370 | + target = (root / href).expanduser().resolve(strict=False) | |
| 371 | + if not target.exists(): | |
| 372 | + missing.append(f"{href} -> missing") | |
| 373 | + continue | |
| 374 | + title = read_html_title(target) | |
| 375 | + if title and label.strip() != title: | |
| 376 | + mismatched.append(f"{href} -> {label.strip()} != {title}") | |
| 377 | + | |
| 378 | + return HtmlTocValidationResult( | |
| 379 | + valid=not missing and not mismatched, | |
| 380 | + link_count=len(links), | |
| 381 | + missing=tuple(missing), | |
| 382 | + mismatched=tuple(mismatched), | |
| 383 | + ) | |
| 384 | + | |
| 385 | + | |
| 386 | +def build_html_toc_verification_command(index_path: str | Path) -> str | None: | |
| 387 | + """Build the semantic verification command for one HTML TOC target.""" | |
| 388 | + | |
| 389 | + index = Path(index_path).expanduser() | |
| 390 | + if not is_html_toc_index_path(index): | |
| 391 | + return None | |
| 392 | + if not (index.parent / "chapters").is_dir(): | |
| 393 | + return None | |
| 394 | + | |
| 395 | + path_literal = repr(str(index)) | |
| 396 | + return "\n".join( | |
| 397 | + [ | |
| 398 | + "python3 - <<'PY'", | |
| 399 | + "from pathlib import Path", | |
| 400 | + "import re", | |
| 401 | + "import sys", | |
| 402 | + "", | |
| 403 | + f"index = Path({path_literal}).expanduser()", | |
| 404 | + "root = index.parent", | |
| 405 | + "text = index.read_text()", | |
| 406 | + "section_match = re.search(r'<ul class=\"chapter-list\">(.*?)</ul>', text, re.S)", | |
| 407 | + "if section_match is None:", | |
| 408 | + " print('Missing chapter-list table of contents', file=sys.stderr)", | |
| 409 | + " raise SystemExit(1)", | |
| 410 | + "links = re.findall(r'<a href=\"([^\"]+)\">([^<]+)</a>', section_match.group(1))", | |
| 411 | + "if not links:", | |
| 412 | + " print('No chapter links found in table of contents', file=sys.stderr)", | |
| 413 | + " raise SystemExit(1)", | |
| 414 | + "", | |
| 415 | + "missing = []", | |
| 416 | + "mismatched = []", | |
| 417 | + "for href, label in links:", | |
| 418 | + " target = (root / href).resolve()", | |
| 419 | + " if not target.exists():", | |
| 420 | + " missing.append(f'{href} -> missing')", | |
| 421 | + " continue", | |
| 422 | + " body = target.read_text()", | |
| 423 | + " match = re.search(r'<h1>(.*?)</h1>', body, re.S)", | |
| 424 | + " title = match.group(1).strip() if match else ''", | |
| 425 | + " if title and label.strip() != title:", | |
| 426 | + " mismatched.append(f'{href} -> {label.strip()} != {title}')", | |
| 427 | + "", | |
| 428 | + "if missing or mismatched:", | |
| 429 | + " if missing:", | |
| 430 | + " print('Missing links:', file=sys.stderr)", | |
| 431 | + " for item in missing:", | |
| 432 | + " print(item, file=sys.stderr)", | |
| 433 | + " if mismatched:", | |
| 434 | + " print('Title mismatches:', file=sys.stderr)", | |
| 435 | + " for item in mismatched:", | |
| 436 | + " print(item, file=sys.stderr)", | |
| 437 | + " raise SystemExit(1)", | |
| 438 | + "", | |
| 439 | + "print(f'validated {len(links)} toc links in {index.name}')", | |
| 440 | + "PY", | |
| 441 | + ] | |
| 442 | + ) | |
| 443 | + | |
| 444 | + | |
| 445 | +def parse_html_toc_verification_failures(text: str) -> tuple[list[str], list[str]]: | |
| 446 | + """Parse missing hrefs and mismatched labels from verifier output.""" | |
| 447 | + | |
| 448 | + missing: list[str] = [] | |
| 449 | + mismatches: list[str] = [] | |
| 450 | + mode: str | None = None | |
| 451 | + | |
| 452 | + for raw_line in text.splitlines(): | |
| 453 | + line = raw_line.strip() | |
| 454 | + if not line: | |
| 455 | + continue | |
| 456 | + lowered = line.lower() | |
| 457 | + if lowered == "missing links:": | |
| 458 | + mode = "missing" | |
| 459 | + continue | |
| 460 | + if lowered == "title mismatches:": | |
| 461 | + mode = "mismatch" | |
| 462 | + continue | |
| 463 | + if mode == "missing" and "->" in line: | |
| 464 | + href = line.split("->", 1)[0].strip() | |
| 465 | + if href and href not in missing: | |
| 466 | + missing.append(href) | |
| 467 | + continue | |
| 468 | + if mode == "mismatch" and "!=" in line and line not in mismatches: | |
| 469 | + mismatches.append(line) | |
| 470 | + | |
| 471 | + return missing, mismatches | |
| 472 | + | |
| 473 | + | |
| 474 | +def summarize_html_toc_verification_gap(payload: str, *, max_items: int = 2) -> str | None: | |
| 475 | + """Summarize the latest semantic verifier gap from shell output.""" | |
| 476 | + | |
| 477 | + missing, mismatches = parse_html_toc_verification_failures(payload) | |
| 478 | + | |
| 479 | + parts: list[str] = [] | |
| 480 | + if missing: | |
| 481 | + preview = ", ".join(missing[:max_items]) | |
| 482 | + if len(missing) > max_items: | |
| 483 | + preview += ", ..." | |
| 484 | + parts.append(f"missing TOC links {preview}") | |
| 485 | + if mismatches: | |
| 486 | + preview = ", ".join(mismatches[:max_items]) | |
| 487 | + if len(mismatches) > max_items: | |
| 488 | + preview += ", ..." | |
| 489 | + parts.append(f"title mismatches {preview}") | |
| 490 | + return "; ".join(parts) if parts else None | |
| 491 | + | |
| 492 | + | |
| 493 | +def summarize_html_file_discovery(payload: str) -> str | None: | |
| 494 | + """Summarize a set of discovered HTML filenames from tool output.""" | |
| 495 | + | |
| 496 | + filenames = re.findall(r"([A-Za-z0-9_.-]+\.html)", payload) | |
| 497 | + unique_names: list[str] = [] | |
| 498 | + for name in filenames: | |
| 499 | + if name not in unique_names: | |
| 500 | + unique_names.append(name) | |
| 501 | + if len(unique_names) < 3: | |
| 502 | + return None | |
| 503 | + preview = ", ".join(unique_names[:6]) | |
| 504 | + if len(unique_names) > 6: | |
| 505 | + preview += ", ..." | |
| 506 | + return f"Existing files include {preview}" | |
src/loader/runtime/tool_batch_recovery.pymodified@@ -2,24 +2,22 @@ | ||
| 2 | 2 | |
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | +import re | |
| 5 | 6 | from collections.abc import Awaitable, Callable |
| 6 | 7 | from difflib import SequenceMatcher |
| 7 | 8 | from pathlib import Path |
| 8 | -import re | |
| 9 | 9 | |
| 10 | -from ..llm.base import Message, ToolCall | |
| 11 | -from .compaction import infer_preferred_next_step, summarize_confirmed_facts | |
| 10 | +from ..llm.base import Message, Role, ToolCall | |
| 11 | +from .compaction import ( | |
| 12 | + extract_key_files, | |
| 13 | + infer_preferred_next_step, | |
| 14 | + summarize_confirmed_facts, | |
| 15 | +) | |
| 12 | 16 | from .context import RuntimeContext |
| 13 | 17 | from .events import AgentEvent |
| 14 | 18 | from .executor import ToolExecutionOutcome |
| 15 | 19 | from .recovery import RecoveryContext, format_failure_message, format_recovery_prompt |
| 16 | -from .safeguard_services import ( | |
| 17 | - build_html_toc_edit_call_template, | |
| 18 | - build_html_toc_replacement_block, | |
| 19 | - extract_html_toc_excerpt, | |
| 20 | - read_html_title, | |
| 21 | - summarize_html_inventory, | |
| 22 | -) | |
| 20 | +from .semantic_rules import html_toc as html_toc_rule | |
| 23 | 21 | |
| 24 | 22 | EventSink = Callable[[AgentEvent], Awaitable[None]] |
| 25 | 23 | |
@@ -131,10 +129,15 @@ class ToolBatchRecoveryController: | ||
| 131 | 129 | |
| 132 | 130 | session = self.context.session |
| 133 | 131 | current_task = getattr(session, "current_task", None) |
| 132 | + focus_path = self._preferred_focus_path( | |
| 133 | + tool_call=tool_call, | |
| 134 | + current_task=current_task, | |
| 135 | + ) | |
| 134 | 136 | confirmed_facts = summarize_confirmed_facts(session.messages) |
| 135 | 137 | preferred_next_step = infer_preferred_next_step( |
| 136 | 138 | session.messages, |
| 137 | 139 | current_task=current_task, |
| 140 | + focus_path=focus_path or None, | |
| 138 | 141 | ) |
| 139 | 142 | actionable_known_state = bool(confirmed_facts and preferred_next_step) |
| 140 | 143 | lines = [prompt] |
@@ -169,6 +172,59 @@ class ToolBatchRecoveryController: | ||
| 169 | 172 | lines.extend(["", "## CURRENT TARGET EXCERPT", *target_excerpt_lines]) |
| 170 | 173 | return "\n".join(lines) |
| 171 | 174 | |
| 175 | + def _preferred_focus_path( | |
| 176 | + self, | |
| 177 | + *, | |
| 178 | + tool_call: ToolCall, | |
| 179 | + current_task: str | None, | |
| 180 | + ) -> str: | |
| 181 | + raw_path = str( | |
| 182 | + tool_call.arguments.get("file_path") | |
| 183 | + or tool_call.arguments.get("path") | |
| 184 | + or "" | |
| 185 | + ).strip() | |
| 186 | + if not raw_path: | |
| 187 | + return "" | |
| 188 | + if tool_call.name in {"write", "edit", "patch"} or not current_task: | |
| 189 | + return raw_path | |
| 190 | + | |
| 191 | + primary_target = self._primary_task_target_path(current_task) | |
| 192 | + if not primary_target: | |
| 193 | + return raw_path | |
| 194 | + | |
| 195 | + candidate = self._canonicalize_path(raw_path) | |
| 196 | + target = self._canonicalize_path(primary_target) | |
| 197 | + if not candidate or not target or candidate == target: | |
| 198 | + return raw_path | |
| 199 | + | |
| 200 | + candidate_path = Path(candidate) | |
| 201 | + target_path = Path(target) | |
| 202 | + if ( | |
| 203 | + tool_call.name == "read" | |
| 204 | + and candidate_path.suffix == ".html" | |
| 205 | + and candidate_path.parent == target_path.parent / "chapters" | |
| 206 | + ): | |
| 207 | + return target | |
| 208 | + | |
| 209 | + return raw_path | |
| 210 | + | |
| 211 | + def _primary_task_target_path(self, current_task: str) -> str | None: | |
| 212 | + paths = extract_key_files( | |
| 213 | + [Message(role=Role.USER, content=current_task)], | |
| 214 | + limit=6, | |
| 215 | + ) | |
| 216 | + for path in paths: | |
| 217 | + normalized = self._canonicalize_path(path) | |
| 218 | + if not normalized: | |
| 219 | + continue | |
| 220 | + if normalized.endswith(".html") and "/chapters/" not in normalized: | |
| 221 | + return normalized | |
| 222 | + for path in paths: | |
| 223 | + normalized = self._canonicalize_path(path) | |
| 224 | + if normalized: | |
| 225 | + return normalized | |
| 226 | + return None | |
| 227 | + | |
| 172 | 228 | def _file_not_found_candidate_lines( |
| 173 | 229 | self, |
| 174 | 230 | tool_call: ToolCall, |
@@ -262,7 +318,7 @@ class ToolBatchRecoveryController: | ||
| 262 | 318 | path = Path(candidate) |
| 263 | 319 | label = f"`{path.name}`" |
| 264 | 320 | if path.suffix == ".html": |
| 265 | - title = read_html_title(path) | |
| 321 | + title = html_toc_rule.read_html_title(path) | |
| 266 | 322 | if title: |
| 267 | 323 | return f"{label} = {title}" |
| 268 | 324 | return label |
@@ -275,9 +331,12 @@ class ToolBatchRecoveryController: | ||
| 275 | 331 | ).strip() |
| 276 | 332 | if not file_path: |
| 277 | 333 | return [] |
| 334 | + current_task = getattr(self.context.session, "current_task", None) | |
| 335 | + if not html_toc_rule.task_targets_html_toc(current_task): | |
| 336 | + return [] | |
| 278 | 337 | |
| 279 | - inventory = summarize_html_inventory(file_path, limit=12) | |
| 280 | - excerpt = extract_html_toc_excerpt(file_path) | |
| 338 | + inventory = html_toc_rule.summarize_html_inventory(file_path, limit=12) | |
| 339 | + excerpt = html_toc_rule.extract_html_toc_excerpt(file_path) | |
| 281 | 340 | if not inventory and not excerpt: |
| 282 | 341 | return [] |
| 283 | 342 | |
@@ -287,7 +346,7 @@ class ToolBatchRecoveryController: | ||
| 287 | 346 | if excerpt: |
| 288 | 347 | lines.append("- Current TOC block:") |
| 289 | 348 | lines.extend(f" {line}" for line in excerpt.splitlines()) |
| 290 | - replacement = build_html_toc_replacement_block(file_path) | |
| 349 | + replacement = html_toc_rule.build_html_toc_replacement_block(file_path) | |
| 291 | 350 | if replacement: |
| 292 | 351 | lines.append("- Suggested replacement block:") |
| 293 | 352 | lines.extend(f" {line}" for line in replacement.splitlines()) |
@@ -297,7 +356,7 @@ class ToolBatchRecoveryController: | ||
| 297 | 356 | lines.append(" old_string: use the Current TOC block above exactly") |
| 298 | 357 | lines.append(" new_string: use the Suggested replacement block above exactly") |
| 299 | 358 | lines.append(" Do not rewrite the whole file.") |
| 300 | - edit_template = build_html_toc_edit_call_template(file_path) | |
| 359 | + edit_template = html_toc_rule.build_html_toc_edit_call_template(file_path) | |
| 301 | 360 | if edit_template: |
| 302 | 361 | lines.append("- Suggested edit call:") |
| 303 | 362 | lines.extend(f" {line}" for line in edit_template.splitlines()) |
src/loader/runtime/tool_batches.pymodified@@ -8,6 +8,7 @@ from pathlib import Path | ||
| 8 | 8 | from typing import Any |
| 9 | 9 | |
| 10 | 10 | from ..llm.base import Role, ToolCall |
| 11 | +from .compaction import infer_preferred_next_step, summarize_confirmed_facts | |
| 11 | 12 | from .context import RuntimeContext |
| 12 | 13 | from .dod import ( |
| 13 | 14 | DefinitionOfDone, |
@@ -24,23 +25,15 @@ from .evidence_provenance import EvidenceProvenance, EvidenceProvenanceStatus | ||
| 24 | 25 | from .executor import ToolExecutionState, ToolExecutor |
| 25 | 26 | from .logging import get_runtime_logger |
| 26 | 27 | from .policy_timeline import append_verification_timeline_entry |
| 28 | +from .safeguard_services import extract_shell_text_rewrite_target | |
| 29 | +from .semantic_rules import html_toc as html_toc_rule | |
| 27 | 30 | from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate |
| 28 | 31 | from .tool_batch_recovery import ToolBatchRecoveryController |
| 29 | 32 | from .verification_observations import ( |
| 30 | 33 | VerificationObservation, |
| 31 | 34 | VerificationObservationStatus, |
| 32 | 35 | ) |
| 33 | -from .workflow import sync_todos_to_definition_of_done | |
| 34 | -from .workflow import advance_todos_from_tool_call | |
| 35 | -from .compaction import infer_preferred_next_step, summarize_confirmed_facts | |
| 36 | -from .safeguard_services import ( | |
| 37 | - build_html_toc_edit_call_template, | |
| 38 | - build_html_toc_replacement_block, | |
| 39 | - extract_html_toc_excerpt, | |
| 40 | - extract_shell_text_rewrite_target, | |
| 41 | - summarize_html_inventory, | |
| 42 | - validate_html_toc, | |
| 43 | -) | |
| 36 | +from .workflow import advance_todos_from_tool_call, sync_todos_to_definition_of_done | |
| 44 | 37 | |
| 45 | 38 | EventSink = Callable[[AgentEvent], Awaitable[None]] |
| 46 | 39 | ConfirmationHandler = ( |
@@ -49,6 +42,10 @@ ConfirmationHandler = ( | ||
| 49 | 42 | UserQuestionHandler = Callable[[str, list[str] | None], Awaitable[str]] | None |
| 50 | 43 | |
| 51 | 44 | _VERIFY_ITEM = "Collect verification evidence" |
| 45 | +_TODO_NUDGE_EXCLUDED_ITEMS = { | |
| 46 | + "Complete the requested work", | |
| 47 | + _VERIFY_ITEM, | |
| 48 | +} | |
| 52 | 49 | |
| 53 | 50 | |
| 54 | 51 | @dataclass |
@@ -232,7 +229,7 @@ class ToolBatchRunner: | ||
| 232 | 229 | self.context.session.append(outcome.message) |
| 233 | 230 | summary.tool_result_messages.append(outcome.message) |
| 234 | 231 | if outcome.state == ToolExecutionState.DUPLICATE: |
| 235 | - self._queue_duplicate_observation_nudge(tool_call) | |
| 232 | + self._queue_duplicate_observation_nudge(tool_call, dod=dod) | |
| 236 | 233 | elif outcome.state == ToolExecutionState.BLOCKED: |
| 237 | 234 | self._queue_blocked_shell_rewrite_nudge(tool_call) |
| 238 | 235 | self._queue_blocked_html_edit_nudge(tool_call, outcome.event_content) |
@@ -268,17 +265,46 @@ class ToolBatchRunner: | ||
| 268 | 265 | |
| 269 | 266 | return result |
| 270 | 267 | |
| 271 | - def _queue_duplicate_observation_nudge(self, tool_call: ToolCall) -> None: | |
| 268 | + def _queue_duplicate_observation_nudge( | |
| 269 | + self, | |
| 270 | + tool_call: ToolCall, | |
| 271 | + *, | |
| 272 | + dod: DefinitionOfDone, | |
| 273 | + ) -> None: | |
| 272 | 274 | """Queue a concrete next-step nudge after duplicate observational actions.""" |
| 273 | 275 | |
| 274 | 276 | if tool_call.name not in {"read", "glob", "grep", "bash"}: |
| 275 | 277 | return |
| 276 | 278 | |
| 277 | 279 | current_task = getattr(self.context.session, "current_task", None) |
| 280 | + next_pending = next( | |
| 281 | + ( | |
| 282 | + item | |
| 283 | + for item in dod.pending_items | |
| 284 | + if item not in _TODO_NUDGE_EXCLUDED_ITEMS | |
| 285 | + ), | |
| 286 | + None, | |
| 287 | + ) | |
| 278 | 288 | confirmed_facts = summarize_confirmed_facts( |
| 279 | 289 | self.context.session.messages, |
| 280 | 290 | max_items=2, |
| 281 | 291 | ) |
| 292 | + if next_pending and not html_toc_rule.task_targets_html_toc(current_task): | |
| 293 | + if confirmed_facts: | |
| 294 | + self.context.queue_steering_message( | |
| 295 | + "Reuse the earlier observation instead of repeating it. " | |
| 296 | + f"Confirmed facts: {confirmed_facts}. " | |
| 297 | + f"Continue with the next pending item: `{next_pending}`. " | |
| 298 | + "Only gather more evidence if a specific fact required for that step is still unknown." | |
| 299 | + ) | |
| 300 | + else: | |
| 301 | + self.context.queue_steering_message( | |
| 302 | + "Reuse the earlier observation instead of repeating it. " | |
| 303 | + f"Continue with the next pending item: `{next_pending}`. " | |
| 304 | + "Only gather more evidence if a specific fact required for that step is still unknown." | |
| 305 | + ) | |
| 306 | + return | |
| 307 | + | |
| 282 | 308 | preferred_next_step = infer_preferred_next_step( |
| 283 | 309 | self.context.session.messages, |
| 284 | 310 | current_task=current_task, |
@@ -359,12 +385,14 @@ class ToolBatchRunner: | ||
| 359 | 385 | |
| 360 | 386 | if tool_call.name not in {"edit", "patch"}: |
| 361 | 387 | return |
| 388 | + if not self._targets_html_toc_task(): | |
| 389 | + return | |
| 362 | 390 | |
| 363 | 391 | target_path = str(tool_call.arguments.get("file_path", "")).strip() |
| 364 | - if not target_path.endswith("index.html"): | |
| 392 | + if not html_toc_rule.is_html_toc_index_path(target_path): | |
| 365 | 393 | return |
| 366 | 394 | |
| 367 | - validation = validate_html_toc(target_path) | |
| 395 | + validation = html_toc_rule.validate_html_toc(target_path) | |
| 368 | 396 | if ( |
| 369 | 397 | "old_string and new_string are identical" in event_content |
| 370 | 398 | and validation is not None |
@@ -374,12 +402,14 @@ class ToolBatchRunner: | ||
| 374 | 402 | note_validated = getattr(action_tracker, "note_validated_html_toc", None) |
| 375 | 403 | if callable(note_validated): |
| 376 | 404 | note_validated(target_path) |
| 405 | + target_label = html_toc_rule.describe_html_toc_target(target_path) | |
| 377 | 406 | self.context.queue_steering_message( |
| 378 | - "The current `index.html` already matches the validated replacement block. " | |
| 379 | - f"Semantic verification preview: validated {validation.link_count} toc links in " | |
| 380 | - f"`{Path(target_path).name}`. " | |
| 407 | + f"The HTML table-of-contents target {target_label} already matches the " | |
| 408 | + "validated replacement block. " | |
| 409 | + f"Semantic verification preview: validated {validation.link_count} linked " | |
| 410 | + "entries. " | |
| 381 | 411 | "Do not call `edit`, `patch`, or reread the same TOC again. Briefly state " |
| 382 | - "that the table of contents is already updated so Loader can continue the " | |
| 412 | + f"that {target_label} is already updated so Loader can continue the " | |
| 383 | 413 | "verification gate or finish the task." |
| 384 | 414 | ) |
| 385 | 415 | return |
@@ -388,15 +418,18 @@ class ToolBatchRunner: | ||
| 388 | 418 | confirmed_facts = summarize_confirmed_facts( |
| 389 | 419 | self.context.session.messages, |
| 390 | 420 | max_items=2, |
| 421 | + focus_path=target_path, | |
| 391 | 422 | ) |
| 392 | 423 | preferred_next_step = infer_preferred_next_step( |
| 393 | 424 | self.context.session.messages, |
| 394 | 425 | current_task=current_task, |
| 426 | + focus_path=target_path, | |
| 395 | 427 | ) |
| 396 | - verified_inventory = summarize_html_inventory(target_path, limit=12) | |
| 397 | - current_excerpt = extract_html_toc_excerpt(target_path) | |
| 398 | - suggested_replacement = build_html_toc_replacement_block(target_path) | |
| 399 | - suggested_call = build_html_toc_edit_call_template(target_path) | |
| 428 | + verified_inventory = html_toc_rule.summarize_html_inventory(target_path, limit=12) | |
| 429 | + current_excerpt = html_toc_rule.extract_html_toc_excerpt(target_path) | |
| 430 | + suggested_replacement = html_toc_rule.build_html_toc_replacement_block(target_path) | |
| 431 | + suggested_call = html_toc_rule.build_html_toc_edit_call_template(target_path) | |
| 432 | + target_label = html_toc_rule.describe_html_toc_target(target_path) | |
| 400 | 433 | excerpt_suffix = ( |
| 401 | 434 | f"\nCurrent TOC block:\n{current_excerpt}" |
| 402 | 435 | if current_excerpt |
@@ -415,11 +448,12 @@ class ToolBatchRunner: | ||
| 415 | 448 | |
| 416 | 449 | if preferred_next_step and confirmed_facts and verified_inventory: |
| 417 | 450 | self.context.queue_steering_message( |
| 418 | - "Use the current target contents plus the verified sibling inventory instead of guessing. " | |
| 451 | + f"Use the current TOC target contents plus the verified sibling inventory for " | |
| 452 | + f"{target_label} instead of guessing. " | |
| 419 | 453 | f"Confirmed facts: {confirmed_facts}. " |
| 420 | 454 | f"Known chapter inventory: {verified_inventory}. " |
| 421 | 455 | f"{preferred_next_step} " |
| 422 | - "Apply those exact href/title pairs in `index.html`. " | |
| 456 | + f"Apply those exact href/title pairs in {target_label}. " | |
| 423 | 457 | "Do not rewrite the whole document. For `edit`, set `old_string` to the " |
| 424 | 458 | "current TOC block above exactly and set `new_string` to the suggested " |
| 425 | 459 | "replacement block below exactly." |
@@ -431,9 +465,10 @@ class ToolBatchRunner: | ||
| 431 | 465 | |
| 432 | 466 | if verified_inventory: |
| 433 | 467 | self.context.queue_steering_message( |
| 434 | - "Use the current target contents plus the verified sibling inventory instead of guessing. " | |
| 468 | + f"Use the current TOC target contents plus the verified sibling inventory for " | |
| 469 | + f"{target_label} instead of guessing. " | |
| 435 | 470 | f"Known chapter inventory: {verified_inventory}. " |
| 436 | - "Apply those exact href/title pairs in `index.html`. " | |
| 471 | + f"Apply those exact href/title pairs in {target_label}. " | |
| 437 | 472 | "Do not rewrite the whole document. For `edit`, set `old_string` to the " |
| 438 | 473 | "current TOC block above exactly and set `new_string` to the suggested " |
| 439 | 474 | "replacement block below exactly." |
@@ -444,7 +479,8 @@ class ToolBatchRunner: | ||
| 444 | 479 | return |
| 445 | 480 | |
| 446 | 481 | self.context.queue_steering_message( |
| 447 | - "Use the current target contents when retrying this `index.html` edit instead of guessing. " | |
| 482 | + f"Use the current TOC target contents when retrying the edit for {target_label} " | |
| 483 | + "instead of guessing. " | |
| 448 | 484 | f"{excerpt_suffix}".strip() |
| 449 | 485 | ) |
| 450 | 486 | |
@@ -465,16 +501,18 @@ class ToolBatchRunner: | ||
| 465 | 501 | if not self._targets_html_toc_task(): |
| 466 | 502 | return |
| 467 | 503 | |
| 468 | - verified_inventory = summarize_html_inventory(index_path, limit=12) | |
| 504 | + verified_inventory = html_toc_rule.summarize_html_inventory(index_path, limit=12) | |
| 469 | 505 | if not verified_inventory: |
| 470 | 506 | return |
| 471 | 507 | |
| 472 | 508 | self._inventory_hint_targets.add(index_path) |
| 509 | + target_label = html_toc_rule.describe_html_toc_target(index_path) | |
| 510 | + chapters_label = html_toc_rule.describe_html_toc_chapters_dir(index_path) | |
| 473 | 511 | self.context.queue_steering_message( |
| 474 | - "You already have the verified sibling inventory needed for this edit. " | |
| 512 | + f"You already have the verified sibling inventory needed for {target_label}. " | |
| 475 | 513 | f"Known chapter inventory: {verified_inventory}. " |
| 476 | - f"Update `{index_path}` using those exact href/title pairs instead of rereading files " | |
| 477 | - "unless one specific title is still unknown." | |
| 514 | + f"Update {target_label} using those exact href/title pairs instead of rereading " | |
| 515 | + f"files in {chapters_label} unless one specific title is still unknown." | |
| 478 | 516 | ) |
| 479 | 517 | |
| 480 | 518 | def _annotate_verified_html_inventory(self, tool_call: ToolCall, outcome) -> None: |
@@ -491,7 +529,7 @@ class ToolBatchRunner: | ||
| 491 | 529 | return |
| 492 | 530 | |
| 493 | 531 | index_path = str(Path(chapters_path).expanduser().parent / "index.html") |
| 494 | - verified_inventory = summarize_html_inventory(index_path, limit=12) | |
| 532 | + verified_inventory = html_toc_rule.summarize_html_inventory(index_path, limit=12) | |
| 495 | 533 | if not verified_inventory: |
| 496 | 534 | return |
| 497 | 535 | |
@@ -500,10 +538,7 @@ class ToolBatchRunner: | ||
| 500 | 538 | if callable(note_inventory): |
| 501 | 539 | note_inventory(index_path) |
| 502 | 540 | |
| 503 | - note = ( | |
| 504 | - "Verified chapter inventory: " | |
| 505 | - f"{verified_inventory}" | |
| 506 | - ) | |
| 541 | + note = f"Verified chapter inventory: {verified_inventory}" | |
| 507 | 542 | merged_event = outcome.event_content |
| 508 | 543 | if note not in merged_event: |
| 509 | 544 | merged_event = f"{note}\n{merged_event}".strip() |
@@ -516,13 +551,13 @@ class ToolBatchRunner: | ||
| 516 | 551 | def _annotate_validated_html_toc_completion(self, tool_call: ToolCall, outcome) -> None: |
| 517 | 552 | """Attach semantic TOC validation evidence to a successful mutating result.""" |
| 518 | 553 | |
| 554 | + if not self._targets_html_toc_task(): | |
| 555 | + return | |
| 519 | 556 | target_path = self._validated_html_toc_target(tool_call) |
| 520 | 557 | if target_path is None: |
| 521 | 558 | return |
| 522 | - if tool_call.name == "read" and not self._targets_html_toc_task(): | |
| 523 | - return | |
| 524 | 559 | |
| 525 | - validation = validate_html_toc(target_path) | |
| 560 | + validation = html_toc_rule.validate_html_toc(target_path) | |
| 526 | 561 | if validation is None or not validation.valid: |
| 527 | 562 | return |
| 528 | 563 | |
@@ -547,34 +582,41 @@ class ToolBatchRunner: | ||
| 547 | 582 | def _queue_validated_html_toc_completion_nudge(self, tool_call: ToolCall) -> None: |
| 548 | 583 | """Push the next model turn toward finishing once the TOC already validates.""" |
| 549 | 584 | |
| 585 | + if not self._targets_html_toc_task(): | |
| 586 | + return | |
| 550 | 587 | target_path = self._validated_html_toc_target(tool_call) |
| 551 | 588 | if target_path is None: |
| 552 | 589 | return |
| 553 | - if tool_call.name == "read" and not self._targets_html_toc_task(): | |
| 554 | - return | |
| 555 | 590 | |
| 556 | - validation = validate_html_toc(target_path) | |
| 591 | + validation = html_toc_rule.validate_html_toc(target_path) | |
| 557 | 592 | if validation is None or not validation.valid: |
| 558 | 593 | return |
| 559 | 594 | |
| 560 | 595 | if tool_call.name == "read": |
| 596 | + target_label = html_toc_rule.describe_html_toc_target(target_path) | |
| 597 | + chapters_label = html_toc_rule.describe_html_toc_chapters_dir(target_path) | |
| 561 | 598 | self.context.queue_steering_message( |
| 562 | - "The current `index.html` already satisfies the verified chapter-link constraints. " | |
| 563 | - f"Semantic verification preview: validated {validation.link_count} toc links in " | |
| 564 | - f"`{Path(target_path).name}`. " | |
| 599 | + f"The HTML table-of-contents target {target_label} already satisfies the " | |
| 600 | + "verified link/title constraints. " | |
| 601 | + f"Semantic verification preview: validated {validation.link_count} linked " | |
| 602 | + "entries. " | |
| 565 | 603 | "No TOC edit is required unless you can point to one specific incorrect href or " |
| 566 | - "title. Do not reread `index.html` or files in `chapters/` again. Briefly state " | |
| 567 | - "that the table of contents is already correct so Loader can finish the task." | |
| 604 | + f"title. Do not reread {target_label} or files in {chapters_label} again. " | |
| 605 | + "Briefly state that the table of contents is already correct so Loader can " | |
| 606 | + "finish the task." | |
| 568 | 607 | ) |
| 569 | 608 | return |
| 570 | 609 | |
| 610 | + target_label = html_toc_rule.describe_html_toc_target(target_path) | |
| 611 | + chapters_label = html_toc_rule.describe_html_toc_chapters_dir(target_path) | |
| 571 | 612 | self.context.queue_steering_message( |
| 572 | - "The current `index.html` already satisfies the verified chapter-link constraints. " | |
| 573 | - f"Semantic verification preview: validated {validation.link_count} toc links in " | |
| 574 | - f"`{Path(target_path).name}`. " | |
| 575 | - "Do not reread `index.html` or files in `chapters/` unless a specific href or " | |
| 576 | - "title is still unresolved. Briefly state that the table of contents has been " | |
| 577 | - "updated so Loader can run the verification gate." | |
| 613 | + f"The HTML table-of-contents target {target_label} already satisfies the " | |
| 614 | + "verified link/title constraints. " | |
| 615 | + f"Semantic verification preview: validated {validation.link_count} linked " | |
| 616 | + "entries. " | |
| 617 | + f"Do not reread {target_label} or files in {chapters_label} unless a specific " | |
| 618 | + "href or title is still unresolved. Briefly state that the table of contents has " | |
| 619 | + "been updated so Loader can run the verification gate." | |
| 578 | 620 | ) |
| 579 | 621 | |
| 580 | 622 | @staticmethod |
@@ -594,7 +636,7 @@ class ToolBatchRunner: | ||
| 594 | 636 | |
| 595 | 637 | if not target_path: |
| 596 | 638 | return None |
| 597 | - if not target_path.endswith("index.html"): | |
| 639 | + if not html_toc_rule.is_html_toc_index_path(target_path): | |
| 598 | 640 | return None |
| 599 | 641 | return str(Path(target_path).expanduser()) |
| 600 | 642 | |
@@ -608,10 +650,7 @@ class ToolBatchRunner: | ||
| 608 | 650 | if content: |
| 609 | 651 | current_task = content |
| 610 | 652 | break |
| 611 | - return any( | |
| 612 | - hint in current_task | |
| 613 | - for hint in ("href", "link", "links", "table of contents", "chapter", "index.html") | |
| 614 | - ) | |
| 653 | + return html_toc_rule.task_targets_html_toc(current_task) | |
| 615 | 654 | |
| 616 | 655 | async def _record_successful_execution( |
| 617 | 656 | self, |
@@ -646,7 +685,13 @@ class ToolBatchRunner: | ||
| 646 | 685 | if isinstance(new_todos, list): |
| 647 | 686 | sync_todos_to_definition_of_done(dod, new_todos) |
| 648 | 687 | else: |
| 649 | - advance_todos_from_tool_call(dod, tool_call) | |
| 688 | + pending_before = list(dod.pending_items) | |
| 689 | + if advance_todos_from_tool_call(dod, tool_call): | |
| 690 | + self._queue_next_pending_todo_nudge( | |
| 691 | + tool_call=tool_call, | |
| 692 | + pending_before=pending_before, | |
| 693 | + dod=dod, | |
| 694 | + ) | |
| 650 | 695 | self.dod_store.save(dod) |
| 651 | 696 | recovery_context = self.context.recovery_context |
| 652 | 697 | if recovery_context is not None: |
@@ -658,6 +703,61 @@ class ToolBatchRunner: | ||
| 658 | 703 | self.context.recovery_context = None |
| 659 | 704 | return None |
| 660 | 705 | |
| 706 | + def _queue_next_pending_todo_nudge( | |
| 707 | + self, | |
| 708 | + *, | |
| 709 | + tool_call: ToolCall, | |
| 710 | + pending_before: list[str], | |
| 711 | + dod: DefinitionOfDone, | |
| 712 | + ) -> None: | |
| 713 | + if is_state_mutating_tool_call(tool_call): | |
| 714 | + return | |
| 715 | + if tool_call.name not in {"read", "glob", "grep", "bash"}: | |
| 716 | + return | |
| 717 | + if tool_call.name == "bash": | |
| 718 | + command = str(tool_call.arguments.get("command", "")).lower() | |
| 719 | + if not any( | |
| 720 | + token in command | |
| 721 | + for token in ( | |
| 722 | + "ls ", | |
| 723 | + " ls", | |
| 724 | + "find ", | |
| 725 | + "grep ", | |
| 726 | + "rg ", | |
| 727 | + "cat ", | |
| 728 | + "sed ", | |
| 729 | + "head ", | |
| 730 | + "tail ", | |
| 731 | + ) | |
| 732 | + ): | |
| 733 | + return | |
| 734 | + | |
| 735 | + completed_label = next( | |
| 736 | + ( | |
| 737 | + item | |
| 738 | + for item in pending_before | |
| 739 | + if item not in dod.pending_items | |
| 740 | + and item not in _TODO_NUDGE_EXCLUDED_ITEMS | |
| 741 | + ), | |
| 742 | + None, | |
| 743 | + ) | |
| 744 | + next_pending = next( | |
| 745 | + ( | |
| 746 | + item | |
| 747 | + for item in dod.pending_items | |
| 748 | + if item not in _TODO_NUDGE_EXCLUDED_ITEMS | |
| 749 | + ), | |
| 750 | + None, | |
| 751 | + ) | |
| 752 | + if not completed_label or not next_pending or next_pending == completed_label: | |
| 753 | + return | |
| 754 | + | |
| 755 | + self.context.queue_steering_message( | |
| 756 | + f"Confirmed progress: `{completed_label}` is now satisfied by the successful " | |
| 757 | + f"`{tool_call.name}` result. Continue with the next pending item: " | |
| 758 | + f"`{next_pending}` instead of rereading the same evidence." | |
| 759 | + ) | |
| 760 | + | |
| 661 | 761 | |
| 662 | 762 | def _mark_verification_stale( |
| 663 | 763 | *, |
src/loader/runtime/workflow.pymodified@@ -50,6 +50,8 @@ __all__ = [ | ||
| 50 | 50 | "extract_verification_commands_from_markdown", |
| 51 | 51 | "load_brief", |
| 52 | 52 | "load_planning_artifacts", |
| 53 | + "merge_refreshed_todos_with_existing_scope", | |
| 54 | + "preserve_task_grounded_acceptance_criteria", | |
| 53 | 55 | "sync_todos_to_definition_of_done", |
| 54 | 56 | ] |
| 55 | 57 | |
@@ -103,6 +105,7 @@ _PARSE_STEP_HINTS = ( | ||
| 103 | 105 | "determine", |
| 104 | 106 | ) |
| 105 | 107 | _MUTATION_STEP_HINTS = ( |
| 108 | + "create", | |
| 106 | 109 | "update", |
| 107 | 110 | "edit", |
| 108 | 111 | "write", |
@@ -122,6 +125,24 @@ _VERIFY_STEP_HINTS = ( | ||
| 122 | 125 | "confirm", |
| 123 | 126 | "check", |
| 124 | 127 | ) |
| 128 | +_TASK_COVERAGE_STOP_WORDS = { | |
| 129 | + "the", | |
| 130 | + "and", | |
| 131 | + "with", | |
| 132 | + "from", | |
| 133 | + "that", | |
| 134 | + "this", | |
| 135 | + "into", | |
| 136 | + "your", | |
| 137 | + "have", | |
| 138 | + "make", | |
| 139 | + "will", | |
| 140 | + "then", | |
| 141 | + "each", | |
| 142 | + "file", | |
| 143 | + "files", | |
| 144 | + "guide", | |
| 145 | +} | |
| 125 | 146 | _SHELL_COMMAND_START = re.compile( |
| 126 | 147 | r"(?<![\w/.-])(" |
| 127 | 148 | r"ls|grep|pytest|uv|python3?|html5validator|cargo|npm|node|mypy|ruff|find|git|cat|sed|head|tail|test|diff|cmp|bash|sh|make" |
@@ -451,6 +472,25 @@ class PlanningArtifacts: | ||
| 451 | 472 | "It does not run a planner/critic consensus loop.", |
| 452 | 473 | ] |
| 453 | 474 | |
| 475 | + def with_acceptance_criteria(self, acceptance_criteria: list[str]) -> PlanningArtifacts: | |
| 476 | + """Return one copy with a rewritten acceptance-criteria section.""" | |
| 477 | + | |
| 478 | + merged = [item.strip() for item in acceptance_criteria if item.strip()] | |
| 479 | + if not merged or merged == self.acceptance_criteria: | |
| 480 | + return self | |
| 481 | + | |
| 482 | + return PlanningArtifacts( | |
| 483 | + implementation_markdown=self.implementation_markdown, | |
| 484 | + verification_markdown=_replace_markdown_section_items( | |
| 485 | + self.verification_markdown, | |
| 486 | + "Acceptance Criteria", | |
| 487 | + merged, | |
| 488 | + ), | |
| 489 | + verification_commands=list(self.verification_commands), | |
| 490 | + acceptance_criteria=list(merged), | |
| 491 | + implementation_steps=list(self.implementation_steps), | |
| 492 | + ) | |
| 493 | + | |
| 454 | 494 | |
| 455 | 495 | class WorkflowArtifactStore: |
| 456 | 496 | """Persist briefs and plans under `.loader/`.""" |
@@ -546,6 +586,76 @@ def sync_todos_to_definition_of_done( | ||
| 546 | 586 | dod.completed_items = list(dict.fromkeys(completed + special_completed)) |
| 547 | 587 | |
| 548 | 588 | |
| 589 | +def preserve_task_grounded_acceptance_criteria( | |
| 590 | + task_statement: str, | |
| 591 | + *, | |
| 592 | + existing_acceptance_criteria: list[str], | |
| 593 | + refreshed_acceptance_criteria: list[str], | |
| 594 | +) -> list[str]: | |
| 595 | + """Preserve task-grounded scope when refreshed artifacts are narrower.""" | |
| 596 | + | |
| 597 | + grounded_existing = [ | |
| 598 | + item | |
| 599 | + for item in existing_acceptance_criteria | |
| 600 | + if item.strip() | |
| 601 | + and item.strip().lower() != task_statement.strip().lower() | |
| 602 | + and _task_text_covers_requirement(task_statement, item) | |
| 603 | + ] | |
| 604 | + return list(dict.fromkeys([*grounded_existing, *refreshed_acceptance_criteria])) | |
| 605 | + | |
| 606 | + | |
| 607 | +def merge_refreshed_todos_with_existing_scope( | |
| 608 | + task_statement: str, | |
| 609 | + *, | |
| 610 | + existing_pending_items: list[str], | |
| 611 | + existing_completed_items: list[str], | |
| 612 | + refreshed_steps: list[str], | |
| 613 | +) -> list[dict[str, str]]: | |
| 614 | + """Merge one refreshed plan with task-grounded todo scope already in flight.""" | |
| 615 | + | |
| 616 | + grounded_completed = [ | |
| 617 | + item | |
| 618 | + for item in existing_completed_items | |
| 619 | + if item.strip() | |
| 620 | + and item not in _SPECIAL_TODO_ITEMS | |
| 621 | + and _task_text_covers_requirement(task_statement, item) | |
| 622 | + ] | |
| 623 | + grounded_pending = [ | |
| 624 | + item | |
| 625 | + for item in existing_pending_items | |
| 626 | + if item.strip() | |
| 627 | + and item not in _SPECIAL_TODO_ITEMS | |
| 628 | + and _task_text_covers_requirement(task_statement, item) | |
| 629 | + ] | |
| 630 | + | |
| 631 | + todos: list[dict[str, str]] = [] | |
| 632 | + seen: set[str] = set() | |
| 633 | + for item in grounded_completed: | |
| 634 | + if item in seen: | |
| 635 | + continue | |
| 636 | + seen.add(item) | |
| 637 | + todos.append( | |
| 638 | + { | |
| 639 | + "content": item, | |
| 640 | + "active_form": f"Working on: {item}", | |
| 641 | + "status": "completed", | |
| 642 | + } | |
| 643 | + ) | |
| 644 | + for item in [*grounded_pending, *refreshed_steps]: | |
| 645 | + label = item.strip() | |
| 646 | + if not label or label in seen: | |
| 647 | + continue | |
| 648 | + seen.add(label) | |
| 649 | + todos.append( | |
| 650 | + { | |
| 651 | + "content": label, | |
| 652 | + "active_form": f"Working on: {label}", | |
| 653 | + "status": "pending", | |
| 654 | + } | |
| 655 | + ) | |
| 656 | + return todos | |
| 657 | + | |
| 658 | + | |
| 549 | 659 | def advance_todos_from_tool_call(dod, tool_call: ToolCall) -> bool: |
| 550 | 660 | """Advance the best-matching pending todo from a successful tool call.""" |
| 551 | 661 | |
@@ -584,6 +694,21 @@ def _todo_progress_score(item: str, tool_call: ToolCall) -> int: | ||
| 584 | 694 | parent = Path(path_hint).parent.name.lower() if path_hint else "" |
| 585 | 695 | |
| 586 | 696 | score = 0 |
| 697 | + is_discovery_tool = name in {"read", "glob", "grep"} | |
| 698 | + if name == "bash": | |
| 699 | + is_discovery_tool = _looks_like_search_command(command) or _looks_like_read_command(command) | |
| 700 | + if ( | |
| 701 | + is_discovery_tool | |
| 702 | + and _contains_any(text, _MUTATION_STEP_HINTS) | |
| 703 | + and not ( | |
| 704 | + _contains_any(text, _READ_STEP_HINTS) | |
| 705 | + or _contains_any(text, _SEARCH_STEP_HINTS) | |
| 706 | + or _contains_any(text, _PARSE_STEP_HINTS) | |
| 707 | + or _contains_any(text, _VERIFY_STEP_HINTS) | |
| 708 | + ) | |
| 709 | + ): | |
| 710 | + return 0 | |
| 711 | + | |
| 587 | 712 | if basename and basename in text: |
| 588 | 713 | score += 3 |
| 589 | 714 | if parent and parent not in {"", "."} and parent in text: |
@@ -826,6 +951,34 @@ def _render_section(title: str, items: list[str]) -> list[str]: | ||
| 826 | 951 | return lines |
| 827 | 952 | |
| 828 | 953 | |
| 954 | +def _replace_markdown_section_items( | |
| 955 | + markdown: str, | |
| 956 | + title: str, | |
| 957 | + items: list[str], | |
| 958 | +) -> str: | |
| 959 | + lines = markdown.rstrip().splitlines() | |
| 960 | + heading = f"## {title}".lower() | |
| 961 | + start_index: int | None = None | |
| 962 | + end_index = len(lines) | |
| 963 | + for index, line in enumerate(lines): | |
| 964 | + if line.strip().lower() == heading: | |
| 965 | + start_index = index | |
| 966 | + continue | |
| 967 | + if start_index is not None and re.match(r"^##+\s+", line.strip()): | |
| 968 | + end_index = index | |
| 969 | + break | |
| 970 | + | |
| 971 | + replacement = _render_section(title, items) | |
| 972 | + if start_index is None: | |
| 973 | + body = "\n".join(lines).rstrip() | |
| 974 | + suffix = "\n\n" if body else "" | |
| 975 | + replacement_text = "\n".join(replacement).rstrip() | |
| 976 | + return f"{body}{suffix}{replacement_text}\n" | |
| 977 | + | |
| 978 | + updated = [*lines[:start_index], *replacement, *lines[end_index:]] | |
| 979 | + return "\n".join(updated).rstrip() + "\n" | |
| 980 | + | |
| 981 | + | |
| 829 | 982 | def _first_item(items: list[str] | None) -> str | None: |
| 830 | 983 | if not items: |
| 831 | 984 | return None |
@@ -847,6 +1000,63 @@ def _merge_grounded_items( | ||
| 847 | 1000 | return merged, merged != current |
| 848 | 1001 | |
| 849 | 1002 | |
| 1003 | +def _task_text_covers_requirement(task_text: str, requirement: str) -> bool: | |
| 1004 | + normalized_text = task_text.lower() | |
| 1005 | + normalized_requirement = requirement.lower() | |
| 1006 | + if normalized_requirement in normalized_text: | |
| 1007 | + return True | |
| 1008 | + if ( | |
| 1009 | + _requirement_describes_output_scope(normalized_requirement) | |
| 1010 | + and _task_mentions_multiple_outputs(normalized_text) | |
| 1011 | + ): | |
| 1012 | + return True | |
| 1013 | + | |
| 1014 | + tokens = [ | |
| 1015 | + token | |
| 1016 | + for token in re.findall(r"[a-z0-9_./-]+", normalized_requirement) | |
| 1017 | + if len(token) > 2 and token not in _TASK_COVERAGE_STOP_WORDS | |
| 1018 | + ] | |
| 1019 | + if not tokens: | |
| 1020 | + return normalized_requirement.strip() in normalized_text | |
| 1021 | + matches = sum(1 for token in tokens if token in normalized_text) | |
| 1022 | + threshold = max(1, min(2, len(tokens))) | |
| 1023 | + return matches >= threshold | |
| 1024 | + | |
| 1025 | + | |
| 1026 | +def _task_mentions_multiple_outputs(task_text: str) -> bool: | |
| 1027 | + matches = re.findall( | |
| 1028 | + r"(?:~/(?:[A-Za-z0-9_.-]+/)+[A-Za-z0-9_.-]+(?:\.[A-Za-z0-9]+)?|" | |
| 1029 | + r"/(?:Users|home|tmp|var|private)/(?:[A-Za-z0-9_. -]+/)+[A-Za-z0-9_.-]+(?:\.[A-Za-z0-9]+)?|" | |
| 1030 | + r"[A-Za-z0-9_.-]+\.html|chapters/)", | |
| 1031 | + task_text, | |
| 1032 | + ) | |
| 1033 | + if len(matches) >= 2: | |
| 1034 | + return True | |
| 1035 | + if matches and re.search( | |
| 1036 | + r"\b(chapter files?|files?|directories|directory structure|folders|pages|artifacts?|outputs?)\b", | |
| 1037 | + task_text, | |
| 1038 | + ): | |
| 1039 | + return True | |
| 1040 | + return False | |
| 1041 | + | |
| 1042 | + | |
| 1043 | +def _requirement_describes_output_scope(requirement: str) -> bool: | |
| 1044 | + return any( | |
| 1045 | + phrase in requirement | |
| 1046 | + for phrase in ( | |
| 1047 | + "all files", | |
| 1048 | + "file naming", | |
| 1049 | + "correct locations", | |
| 1050 | + "directory structure", | |
| 1051 | + "proper directory structure", | |
| 1052 | + "all links", | |
| 1053 | + "no broken links", | |
| 1054 | + "formatted and consistent", | |
| 1055 | + "consistent in style", | |
| 1056 | + ) | |
| 1057 | + ) | |
| 1058 | + | |
| 1059 | + | |
| 850 | 1060 | def _mark_explicit_section(brief: ClarifyBrief, section: str) -> None: |
| 851 | 1061 | if section in brief.explicit_sections: |
| 852 | 1062 | return |
src/loader/runtime/workflow_lanes.pymodified@@ -37,6 +37,8 @@ from .workflow import ( | ||
| 37 | 37 | WorkflowPolicy, |
| 38 | 38 | WorkflowTimelineEntryKind, |
| 39 | 39 | enrich_clarify_brief_with_grounding, |
| 40 | + merge_refreshed_todos_with_existing_scope, | |
| 41 | + preserve_task_grounded_acceptance_criteria, | |
| 40 | 42 | sync_todos_to_definition_of_done, |
| 41 | 43 | ) |
| 42 | 44 | from .workflow_ledger import ( |
@@ -199,6 +201,13 @@ class WorkflowLaneRunner: | ||
| 199 | 201 | if response.content.strip() |
| 200 | 202 | else PlanningArtifacts.fallback(task_statement=task) |
| 201 | 203 | ) |
| 204 | + if refresh_reasons: | |
| 205 | + preserved_acceptance = preserve_task_grounded_acceptance_criteria( | |
| 206 | + task, | |
| 207 | + existing_acceptance_criteria=list(dod.acceptance_criteria), | |
| 208 | + refreshed_acceptance_criteria=list(artifacts.acceptance_criteria), | |
| 209 | + ) | |
| 210 | + artifacts = artifacts.with_acceptance_criteria(preserved_acceptance) | |
| 202 | 211 | implementation_path, verification_path = self.artifact_store.write_plan( |
| 203 | 212 | task, |
| 204 | 213 | artifacts, |
@@ -206,7 +215,7 @@ class WorkflowLaneRunner: | ||
| 206 | 215 | dod.implementation_plan = str(implementation_path) |
| 207 | 216 | dod.verification_plan = str(verification_path) |
| 208 | 217 | if refresh_reasons: |
| 209 | - dod.acceptance_criteria = list(dict.fromkeys(artifacts.acceptance_criteria)) | |
| 218 | + dod.acceptance_criteria = list(artifacts.acceptance_criteria) | |
| 210 | 219 | else: |
| 211 | 220 | dod.acceptance_criteria = list( |
| 212 | 221 | dict.fromkeys(dod.acceptance_criteria + artifacts.acceptance_criteria) |
@@ -244,6 +253,8 @@ class WorkflowLaneRunner: | ||
| 244 | 253 | dod=dod, |
| 245 | 254 | emit=emit, |
| 246 | 255 | executor=executor, |
| 256 | + task_statement=task, | |
| 257 | + preserve_existing_scope=bool(refresh_reasons), | |
| 247 | 258 | ) |
| 248 | 259 | |
| 249 | 260 | async def _emit_artifact( |
@@ -286,11 +297,21 @@ class WorkflowLaneRunner: | ||
| 286 | 297 | dod: DefinitionOfDone, |
| 287 | 298 | emit: EventSink, |
| 288 | 299 | executor: ToolExecutor | None, |
| 300 | + task_statement: str, | |
| 301 | + preserve_existing_scope: bool = False, | |
| 289 | 302 | ) -> None: |
| 290 | 303 | if not artifacts.implementation_steps: |
| 291 | 304 | return |
| 292 | 305 | assert executor is not None |
| 293 | 306 | |
| 307 | + if preserve_existing_scope: | |
| 308 | + todos = merge_refreshed_todos_with_existing_scope( | |
| 309 | + task_statement, | |
| 310 | + existing_pending_items=list(dod.pending_items), | |
| 311 | + existing_completed_items=list(dod.completed_items), | |
| 312 | + refreshed_steps=list(artifacts.implementation_steps[:8]), | |
| 313 | + ) | |
| 314 | + else: | |
| 294 | 315 | todos = [ |
| 295 | 316 | { |
| 296 | 317 | "content": step, |
@@ -299,6 +320,9 @@ class WorkflowLaneRunner: | ||
| 299 | 320 | } |
| 300 | 321 | for step in artifacts.implementation_steps[:8] |
| 301 | 322 | ] |
| 323 | + if not todos: | |
| 324 | + return | |
| 325 | + | |
| 302 | 326 | tool_call = ToolCall( |
| 303 | 327 | id="plan-todos-1", |
| 304 | 328 | name="TodoWrite", |
@@ -589,6 +613,10 @@ class WorkflowLaneRunner: | ||
| 589 | 613 | refresh_block = ( |
| 590 | 614 | "Refresh the existing planning artifacts instead of creating a fresh plan " |
| 591 | 615 | "from scratch.\n" |
| 616 | + "Preserve the original task outcome and acceptance scope unless the user " | |
| 617 | + "explicitly changed the task.\n" | |
| 618 | + "Do not redefine success around partially completed work or a sample " | |
| 619 | + "artifact.\n" | |
| 592 | 620 | "Use the current task state and these recovery reasons:\n" |
| 593 | 621 | + "\n".join(f"- {item}" for item in refresh_reasons) |
| 594 | 622 | + "\n\n" |
tests/test_runtime_harness.pymodified@@ -2022,9 +2022,10 @@ async def test_blocked_html_index_edit_queues_inventory_reuse_steering( | ||
| 2022 | 2022 | |
| 2023 | 2023 | assert any("TOC references chapter files that do not exist" in message for message in messages) |
| 2024 | 2024 | assert any( |
| 2025 | - "Use the current target contents plus the verified sibling inventory instead of guessing." in message | |
| 2025 | + "Use the current TOC target contents plus the verified sibling inventory" in message | |
| 2026 | 2026 | for message in steering_messages |
| 2027 | 2027 | ) |
| 2028 | + assert any(str(index_file) in message for message in steering_messages) | |
| 2028 | 2029 | assert any( |
| 2029 | 2030 | "chapters/05-input-output.html = Chapter 5: Input and Output" in message |
| 2030 | 2031 | for message in steering_messages |
@@ -2141,7 +2142,7 @@ async def test_verified_html_inventory_blocks_redundant_chapter_reread( | ||
| 2141 | 2142 | for message in messages |
| 2142 | 2143 | ) |
| 2143 | 2144 | assert any( |
| 2144 | - "The verified chapter inventory already lists the exact href/title pairs for this directory" | |
| 2145 | + "verified sibling chapter inventory" | |
| 2145 | 2146 | in message |
| 2146 | 2147 | for message in messages |
| 2147 | 2148 | ) |
@@ -2240,15 +2241,15 @@ async def test_successful_html_toc_edit_blocks_post_success_reread_and_steers_to | ||
| 2240 | 2241 | for message in messages |
| 2241 | 2242 | ) |
| 2242 | 2243 | assert any( |
| 2243 | - "already passes the validated chapter-link check" in message | |
| 2244 | + "already passed semantic link validation" in message | |
| 2244 | 2245 | for message in messages |
| 2245 | 2246 | ) |
| 2246 | 2247 | assert any( |
| 2247 | - "already satisfies the verified chapter-link constraints" in message | |
| 2248 | + "already satisfies the verified link/title constraints" in message | |
| 2248 | 2249 | for message in steering_messages |
| 2249 | 2250 | ) |
| 2250 | 2251 | assert any( |
| 2251 | - "Do not reread `index.html` or files in `chapters/`" in message | |
| 2252 | + "Do not reread" in message and "chapters" in message | |
| 2252 | 2253 | for message in steering_messages |
| 2253 | 2254 | ) |
| 2254 | 2255 | assert "validated 2 toc links in index.html" in run.response |
@@ -2334,6 +2335,7 @@ async def test_exact_prompt_finishes_when_index_toc_is_already_correct( | ||
| 2334 | 2335 | in message |
| 2335 | 2336 | for message in steering_messages |
| 2336 | 2337 | ) |
| 2338 | + assert any(str(index_file) in message for message in steering_messages) | |
| 2337 | 2339 | assert ( |
| 2338 | 2340 | sum( |
| 2339 | 2341 | 1 |
tests/test_safeguard_services.pymodified@@ -11,12 +11,17 @@ from loader.runtime.safeguard_services import ( | ||
| 11 | 11 | ActionTracker, |
| 12 | 12 | PreActionValidator, |
| 13 | 13 | ValidationResult, |
| 14 | +) | |
| 15 | +from loader.runtime.safeguards import RuntimeSafeguards | |
| 16 | +from loader.runtime.semantic_rules.html_toc import ( | |
| 14 | 17 | build_html_toc_edit_call_template, |
| 15 | 18 | build_html_toc_replacement_block, |
| 19 | + build_validated_html_toc_observation_reason, | |
| 20 | + build_verified_html_inventory_observation_reason, | |
| 16 | 21 | format_html_inventory_entry, |
| 22 | + task_targets_html_toc, | |
| 17 | 23 | validate_html_toc, |
| 18 | 24 | ) |
| 19 | -from loader.runtime.safeguards import RuntimeSafeguards | |
| 20 | 25 | |
| 21 | 26 | |
| 22 | 27 | def test_action_tracker_detects_duplicate_write_after_recording(tmp_path) -> None: |
@@ -34,6 +39,17 @@ def test_action_tracker_detects_duplicate_write_after_recording(tmp_path) -> Non | ||
| 34 | 39 | assert str(file_path) in reason |
| 35 | 40 | |
| 36 | 41 | |
| 42 | +def test_task_targets_html_toc_requires_explicit_repair_intent() -> None: | |
| 43 | + prompt = ( | |
| 44 | + "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel " | |
| 45 | + "for the structure and cadence of the guide. We are going to make an all " | |
| 46 | + "new equally thorough guide on how to use the nginx tool. It will live in " | |
| 47 | + "~/Loader/guides/nginx/index.html and ~/Loader/guides/nginx/chapters/." | |
| 48 | + ) | |
| 49 | + | |
| 50 | + assert task_targets_html_toc(prompt) is False | |
| 51 | + | |
| 52 | + | |
| 37 | 53 | def test_build_html_toc_replacement_block_uses_verified_inventory(tmp_path) -> None: |
| 38 | 54 | chapters = tmp_path / "chapters" |
| 39 | 55 | chapters.mkdir() |
@@ -215,25 +231,25 @@ def test_action_tracker_blocks_post_validation_html_rereads_until_new_mutation(t | ||
| 215 | 231 | |
| 216 | 232 | assert tracker.check_tool_call("read", {"file_path": str(index_path)}) == ( |
| 217 | 233 | True, |
| 218 | - "The current index.html already passes the validated chapter-link check; stop rereading index.html or chapters/ and finish the task unless a specific href or title is still unresolved", | |
| 234 | + build_validated_html_toc_observation_reason(index_path), | |
| 219 | 235 | ) |
| 220 | 236 | assert tracker.check_tool_call("read", {"file_path": str(chapter_path)}) == ( |
| 221 | 237 | True, |
| 222 | - "The current index.html already passes the validated chapter-link check; stop rereading index.html or chapters/ and finish the task unless a specific href or title is still unresolved", | |
| 238 | + build_validated_html_toc_observation_reason(chapter_path), | |
| 223 | 239 | ) |
| 224 | 240 | assert tracker.check_tool_call( |
| 225 | 241 | "glob", |
| 226 | 242 | {"path": str(chapters), "pattern": "*.html"}, |
| 227 | 243 | ) == ( |
| 228 | 244 | True, |
| 229 | - "The current index.html already passes the validated chapter-link check; stop rereading index.html or chapters/ and finish the task unless a specific href or title is still unresolved", | |
| 245 | + build_validated_html_toc_observation_reason(chapters), | |
| 230 | 246 | ) |
| 231 | 247 | assert tracker.check_tool_call( |
| 232 | 248 | "bash", |
| 233 | 249 | {"command": f"cat {index_path}"}, |
| 234 | 250 | ) == ( |
| 235 | 251 | True, |
| 236 | - "The current index.html already passes the validated chapter-link check; stop rereading index.html or chapters/ and finish the task unless a specific href or title is still unresolved", | |
| 252 | + build_validated_html_toc_observation_reason(index_path), | |
| 237 | 253 | ) |
| 238 | 254 | |
| 239 | 255 | tracker.record_tool_call( |
@@ -262,21 +278,21 @@ def test_action_tracker_blocks_chapter_rereads_after_verified_inventory(tmp_path | ||
| 262 | 278 | assert tracker.check_tool_call("read", {"file_path": str(index_path)}) == (False, "") |
| 263 | 279 | assert tracker.check_tool_call("read", {"file_path": str(chapter_path)}) == ( |
| 264 | 280 | True, |
| 265 | - "The verified chapter inventory already lists the exact href/title pairs for this directory; update index.html from that inventory instead of rereading chapter files", | |
| 281 | + build_verified_html_inventory_observation_reason(chapter_path), | |
| 266 | 282 | ) |
| 267 | 283 | assert tracker.check_tool_call( |
| 268 | 284 | "glob", |
| 269 | 285 | {"path": str(chapters), "pattern": "*.html"}, |
| 270 | 286 | ) == ( |
| 271 | 287 | True, |
| 272 | - "The verified chapter inventory already lists the exact href/title pairs for this directory; update index.html from that inventory instead of rereading chapter files", | |
| 288 | + build_verified_html_inventory_observation_reason(chapters), | |
| 273 | 289 | ) |
| 274 | 290 | assert tracker.check_tool_call( |
| 275 | 291 | "bash", |
| 276 | 292 | {"command": f"head -20 {chapter_path}"}, |
| 277 | 293 | ) == ( |
| 278 | 294 | True, |
| 279 | - "The verified chapter inventory already lists the exact href/title pairs for this directory; update index.html from that inventory instead of rereading chapter files", | |
| 295 | + build_verified_html_inventory_observation_reason(chapter_path), | |
| 280 | 296 | ) |
| 281 | 297 | |
| 282 | 298 | |
@@ -356,7 +372,7 @@ def test_action_tracker_blocks_second_target_index_reread_after_chapter_discover | ||
| 356 | 372 | is_duplicate, reason = tracker.check_tool_call("read", {"file_path": str(index_path)}) |
| 357 | 373 | |
| 358 | 374 | assert is_duplicate is True |
| 359 | - assert "known file/title evidence" in reason | |
| 375 | + assert "reuse that file/title evidence" in reason | |
| 360 | 376 | |
| 361 | 377 | |
| 362 | 378 | def test_action_tracker_blocks_repeated_chapter_directory_search_once_titles_are_known( |
@@ -374,7 +390,7 @@ def test_action_tracker_blocks_repeated_chapter_directory_search_once_titles_are | ||
| 374 | 390 | is_duplicate, reason = tracker.check_tool_call("glob", search_args) |
| 375 | 391 | |
| 376 | 392 | assert is_duplicate is True |
| 377 | - assert "known filename/title evidence" in reason | |
| 393 | + assert "reuse that filename/title evidence" in reason | |
| 378 | 394 | |
| 379 | 395 | |
| 380 | 396 | def test_action_tracker_allows_repeated_read_after_mutation(tmp_path) -> None: |
tests/test_tool_batch_policies.pymodified@@ -251,6 +251,9 @@ async def test_tool_batch_recovery_controller_returns_follow_up( | ||
| 251 | 251 | assess_confidence=assess_confidence, |
| 252 | 252 | verify_action=verify_action, |
| 253 | 253 | ) |
| 254 | + context.session.current_task = ( | |
| 255 | + "Update index.html so every chapter link and title matches the real HTML files in chapters/." | |
| 256 | + ) | |
| 254 | 257 | controller = ToolBatchRecoveryController(context) |
| 255 | 258 | tool_call = ToolCall(id="bash-1", name="bash", arguments={"command": "pytest"}) |
| 256 | 259 | outcome = tool_outcome(tool_call=tool_call, output="command failed", is_error=True) |
@@ -371,7 +374,7 @@ async def test_tool_batch_recovery_controller_includes_known_state_for_missing_f | ||
| 371 | 374 | assert "04-variables.html" in follow_up.content |
| 372 | 375 | assert "02-basic-syntax.html -> 02-setup.html" in follow_up.content |
| 373 | 376 | assert "02-setup.html = Chapter 2: Setting Up Fortran" in follow_up.content |
| 374 | - assert "`~/Loader/guides/fortran/index.html`" in follow_up.content | |
| 377 | + assert "/Users/mfwolffe/Loader/guides/fortran/index.html" in follow_up.content | |
| 375 | 378 | assert any(event.type == "recovery" for event in events) |
| 376 | 379 | |
| 377 | 380 | |
@@ -464,6 +467,9 @@ async def test_tool_batch_recovery_controller_includes_current_html_target_excer | ||
| 464 | 467 | assess_confidence=assess_confidence, |
| 465 | 468 | verify_action=verify_action, |
| 466 | 469 | ) |
| 470 | + context.session.current_task = ( | |
| 471 | + "Update index.html so every chapter link and title matches the real HTML files in chapters/." | |
| 472 | + ) | |
| 467 | 473 | controller = ToolBatchRecoveryController(context) |
| 468 | 474 | tool_call = ToolCall( |
| 469 | 475 | id="patch-index", |
@@ -513,6 +519,97 @@ async def test_tool_batch_recovery_controller_includes_current_html_target_excer | ||
| 513 | 519 | assert 'old_string="""' in follow_up.content |
| 514 | 520 | |
| 515 | 521 | |
| 522 | +@pytest.mark.asyncio | |
| 523 | +async def test_tool_batch_recovery_controller_scopes_known_state_to_active_target( | |
| 524 | + temp_dir: Path, | |
| 525 | +) -> None: | |
| 526 | + async def assess_confidence( | |
| 527 | + tool_name: str, | |
| 528 | + tool_args: dict, | |
| 529 | + context: str, | |
| 530 | + ) -> ConfidenceAssessment: | |
| 531 | + raise AssertionError("Confidence should not run here") | |
| 532 | + | |
| 533 | + async def verify_action( | |
| 534 | + tool_name: str, | |
| 535 | + tool_args: dict, | |
| 536 | + result: str, | |
| 537 | + expected: str = "", | |
| 538 | + ) -> ActionVerification: | |
| 539 | + raise AssertionError("Verification should not run here") | |
| 540 | + | |
| 541 | + nginx_chapters = temp_dir / "nginx" / "chapters" | |
| 542 | + nginx_chapters.mkdir(parents=True) | |
| 543 | + nginx_index = temp_dir / "nginx" / "index.html" | |
| 544 | + nginx_index.write_text( | |
| 545 | + "<h2>Table of Contents</h2>\n" | |
| 546 | + "<ul>\n" | |
| 547 | + ' <li><a href="chapters/01_getting_started.html">Getting Started with NGINX</a></li>\n' | |
| 548 | + ' <li><a href="chapters/02_installation.html">Installation</a></li>\n' | |
| 549 | + "</ul>\n" | |
| 550 | + ) | |
| 551 | + (nginx_chapters / "01_getting_started.html").write_text( | |
| 552 | + "<h1>Getting Started with NGINX</h1>\n" | |
| 553 | + ) | |
| 554 | + | |
| 555 | + context = build_context( | |
| 556 | + temp_dir=temp_dir, | |
| 557 | + messages=[ | |
| 558 | + Message( | |
| 559 | + role=Role.TOOL, | |
| 560 | + content=( | |
| 561 | + "Observation [read]: Result: " | |
| 562 | + f"{temp_dir / 'fortran' / 'index.html'}\n" | |
| 563 | + "Semantic verification preview: validated 12 toc links in index.html" | |
| 564 | + ), | |
| 565 | + ), | |
| 566 | + ], | |
| 567 | + assess_confidence=assess_confidence, | |
| 568 | + verify_action=verify_action, | |
| 569 | + ) | |
| 570 | + context.session.current_task = ( # type: ignore[attr-defined] | |
| 571 | + "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel " | |
| 572 | + "for the structure and cadence of the guide. We are going to make an all " | |
| 573 | + "new equally thorough guide on how to use the nginx tool. It will live in " | |
| 574 | + "~/Loader/guides/nginx/index.html and ~/Loader/guides/nginx/chapters/." | |
| 575 | + ) | |
| 576 | + controller = ToolBatchRecoveryController(context) | |
| 577 | + tool_call = ToolCall( | |
| 578 | + id="edit-nginx", | |
| 579 | + name="edit", | |
| 580 | + arguments={ | |
| 581 | + "file_path": str(nginx_index), | |
| 582 | + "old_string": "<ul>\n</ul>", | |
| 583 | + "new_string": "<ul class=\"chapter-list\">\n</ul>", | |
| 584 | + }, | |
| 585 | + ) | |
| 586 | + outcome = tool_outcome( | |
| 587 | + tool_call=tool_call, | |
| 588 | + output=( | |
| 589 | + "Tool execution error: EditTool.execute() missing 1 required positional " | |
| 590 | + "argument: 'new_string'" | |
| 591 | + ), | |
| 592 | + is_error=True, | |
| 593 | + ) | |
| 594 | + | |
| 595 | + events: list[AgentEvent] = [] | |
| 596 | + | |
| 597 | + async def emit(event: AgentEvent) -> None: | |
| 598 | + events.append(event) | |
| 599 | + | |
| 600 | + follow_up = await controller.build_follow_up( | |
| 601 | + tool_call=tool_call, | |
| 602 | + outcome=outcome, | |
| 603 | + emit=emit, | |
| 604 | + ) | |
| 605 | + | |
| 606 | + assert follow_up is not None | |
| 607 | + assert ( | |
| 608 | + "Preferred next step: Update " | |
| 609 | + f"`{temp_dir / 'fortran' / 'index.html'}`" | |
| 610 | + ) not in follow_up.content | |
| 611 | + | |
| 612 | + | |
| 516 | 613 | @pytest.mark.asyncio |
| 517 | 614 | async def test_tool_batch_recovery_controller_reuses_context_for_related_missing_files( |
| 518 | 615 | temp_dir: Path, |
tests/test_tool_batches.pymodified@@ -28,6 +28,7 @@ from loader.runtime.reasoning_types import ( | ||
| 28 | 28 | ) |
| 29 | 29 | from loader.runtime.recovery import RecoveryContext |
| 30 | 30 | from loader.runtime.tool_batches import ToolBatchRunner |
| 31 | +from loader.runtime.workflow import sync_todos_to_definition_of_done | |
| 31 | 32 | from loader.tools.base import ToolResult as RegistryToolResult |
| 32 | 33 | from loader.tools.base import create_default_registry |
| 33 | 34 | from tests.helpers.runtime_harness import ScriptedBackend |
@@ -802,6 +803,9 @@ async def test_tool_batch_runner_marks_validated_html_toc_completion_after_succe | ||
| 802 | 803 | verify_action=verify_action, |
| 803 | 804 | auto_recover=False, |
| 804 | 805 | ) |
| 806 | + context.session.current_task = ( | |
| 807 | + "Update index.html so every chapter link and title matches the real HTML files in chapters/." | |
| 808 | + ) | |
| 805 | 809 | queued_messages: list[str] = [] |
| 806 | 810 | context.queue_steering_message_callback = queued_messages.append |
| 807 | 811 | runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
@@ -831,7 +835,9 @@ async def test_tool_batch_runner_marks_validated_html_toc_completion_after_succe | ||
| 831 | 835 | pending_tool_calls_seen=set(), |
| 832 | 836 | emit=_noop_emit, |
| 833 | 837 | summary=summary, |
| 834 | - dod=create_definition_of_done("Fix the chapter links"), | |
| 838 | + dod=create_definition_of_done( | |
| 839 | + "Update index.html so every chapter link and title matches the real HTML files in chapters/." | |
| 840 | + ), | |
| 835 | 841 | executor=executor, # type: ignore[arg-type] |
| 836 | 842 | on_confirmation=None, |
| 837 | 843 | on_user_question=None, |
@@ -845,8 +851,314 @@ async def test_tool_batch_runner_marks_validated_html_toc_completion_after_succe | ||
| 845 | 851 | for message in summary.tool_result_messages |
| 846 | 852 | ) |
| 847 | 853 | assert len(queued_messages) == 1 |
| 848 | - assert "already satisfies the verified chapter-link constraints" in queued_messages[0] | |
| 849 | - assert "Do not reread `index.html` or files in `chapters/`" in queued_messages[0] | |
| 854 | + assert "already satisfies the verified link/title constraints" in queued_messages[0] | |
| 855 | + assert f"`{index_path}`" in queued_messages[0] | |
| 856 | + assert f"`{chapters}`" in queued_messages[0] | |
| 857 | + | |
| 858 | + | |
| 859 | +@pytest.mark.asyncio | |
| 860 | +async def test_tool_batch_runner_does_not_apply_html_toc_handoff_to_reference_read( | |
| 861 | + temp_dir: Path, | |
| 862 | +) -> None: | |
| 863 | + async def assess_confidence( | |
| 864 | + tool_name: str, | |
| 865 | + tool_args: dict, | |
| 866 | + context: str, | |
| 867 | + ) -> ConfidenceAssessment: | |
| 868 | + raise AssertionError("Confidence scoring should be disabled in this scenario") | |
| 869 | + | |
| 870 | + async def verify_action( | |
| 871 | + tool_name: str, | |
| 872 | + tool_args: dict, | |
| 873 | + result: str, | |
| 874 | + expected: str = "", | |
| 875 | + ) -> ActionVerification: | |
| 876 | + raise AssertionError("Verification should not run for this scenario") | |
| 877 | + | |
| 878 | + chapters = temp_dir / "chapters" | |
| 879 | + chapters.mkdir() | |
| 880 | + (chapters / "01-introduction.html").write_text( | |
| 881 | + "<h1>Chapter 1: Introduction to Fortran</h1>\n" | |
| 882 | + ) | |
| 883 | + (chapters / "02-setup.html").write_text( | |
| 884 | + "<h1>Chapter 2: Setting Up Your Environment</h1>\n" | |
| 885 | + ) | |
| 886 | + index_path = temp_dir / "index.html" | |
| 887 | + index_path.write_text( | |
| 888 | + "<h2>Table of Contents</h2>\n" | |
| 889 | + '<ul class="chapter-list">\n' | |
| 890 | + ' <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n' | |
| 891 | + ' <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n' | |
| 892 | + "</ul>\n" | |
| 893 | + ) | |
| 894 | + | |
| 895 | + prompt = ( | |
| 896 | + "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel " | |
| 897 | + "for the structure and cadence of the guide. We are going to make an all " | |
| 898 | + "new equally thorough guide on how to use the nginx tool." | |
| 899 | + ) | |
| 900 | + | |
| 901 | + context = build_context( | |
| 902 | + temp_dir=temp_dir, | |
| 903 | + messages=[], | |
| 904 | + safeguards=FakeSafeguards(), | |
| 905 | + assess_confidence=assess_confidence, | |
| 906 | + verify_action=verify_action, | |
| 907 | + auto_recover=False, | |
| 908 | + ) | |
| 909 | + context.session.current_task = prompt # type: ignore[attr-defined] | |
| 910 | + queued_messages: list[str] = [] | |
| 911 | + context.queue_steering_message_callback = queued_messages.append | |
| 912 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) | |
| 913 | + tool_call = ToolCall( | |
| 914 | + id="read-index", | |
| 915 | + name="read", | |
| 916 | + arguments={"file_path": str(index_path)}, | |
| 917 | + ) | |
| 918 | + executor = FakeExecutor( | |
| 919 | + [ | |
| 920 | + tool_outcome( | |
| 921 | + tool_call=tool_call, | |
| 922 | + output=index_path.read_text(), | |
| 923 | + is_error=False, | |
| 924 | + ) | |
| 925 | + ] | |
| 926 | + ) | |
| 927 | + | |
| 928 | + summary = TurnSummary(final_response="") | |
| 929 | + await runner.execute_batch( | |
| 930 | + tool_calls=[tool_call], | |
| 931 | + tool_source="assistant", | |
| 932 | + pending_tool_calls_seen=set(), | |
| 933 | + emit=_noop_emit, | |
| 934 | + summary=summary, | |
| 935 | + dod=create_definition_of_done(prompt), | |
| 936 | + executor=executor, # type: ignore[arg-type] | |
| 937 | + on_confirmation=None, | |
| 938 | + on_user_question=None, | |
| 939 | + emit_confirmation=None, | |
| 940 | + consecutive_errors=0, | |
| 941 | + ) | |
| 942 | + | |
| 943 | + assert queued_messages == [] | |
| 944 | + assert all( | |
| 945 | + "Semantic verification preview:" not in message.content | |
| 946 | + for message in summary.tool_result_messages | |
| 947 | + ) | |
| 948 | + | |
| 949 | + | |
| 950 | +@pytest.mark.asyncio | |
| 951 | +async def test_tool_batch_runner_queues_next_pending_todo_after_discovery_progress( | |
| 952 | + temp_dir: Path, | |
| 953 | +) -> None: | |
| 954 | + async def assess_confidence( | |
| 955 | + tool_name: str, | |
| 956 | + tool_args: dict, | |
| 957 | + context: str, | |
| 958 | + ) -> ConfidenceAssessment: | |
| 959 | + raise AssertionError("Confidence scoring should be disabled in this scenario") | |
| 960 | + | |
| 961 | + async def verify_action( | |
| 962 | + tool_name: str, | |
| 963 | + tool_args: dict, | |
| 964 | + result: str, | |
| 965 | + expected: str = "", | |
| 966 | + ) -> ActionVerification: | |
| 967 | + raise AssertionError("Verification should not run for this scenario") | |
| 968 | + | |
| 969 | + reference = temp_dir / "fortran" / "index.html" | |
| 970 | + reference.parent.mkdir(parents=True) | |
| 971 | + reference.write_text("<h1>Fortran Beginner's Guide</h1>\n") | |
| 972 | + | |
| 973 | + context = build_context( | |
| 974 | + temp_dir=temp_dir, | |
| 975 | + messages=[], | |
| 976 | + safeguards=FakeSafeguards(), | |
| 977 | + assess_confidence=assess_confidence, | |
| 978 | + verify_action=verify_action, | |
| 979 | + auto_recover=False, | |
| 980 | + ) | |
| 981 | + queued_messages: list[str] = [] | |
| 982 | + context.queue_steering_message_callback = queued_messages.append | |
| 983 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) | |
| 984 | + dod = create_definition_of_done("Create an equally thorough nginx guide.") | |
| 985 | + sync_todos_to_definition_of_done( | |
| 986 | + dod, | |
| 987 | + [ | |
| 988 | + { | |
| 989 | + "content": "Examine the existing Fortran guide structure to understand the cadence and format", | |
| 990 | + "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format", | |
| 991 | + "status": "pending", | |
| 992 | + }, | |
| 993 | + { | |
| 994 | + "content": "Create the nginx directory structure", | |
| 995 | + "active_form": "Working on: Create the nginx directory structure", | |
| 996 | + "status": "pending", | |
| 997 | + }, | |
| 998 | + { | |
| 999 | + "content": "Create the nginx index.html file", | |
| 1000 | + "active_form": "Working on: Create the nginx index.html file", | |
| 1001 | + "status": "pending", | |
| 1002 | + }, | |
| 1003 | + ], | |
| 1004 | + ) | |
| 1005 | + tool_call = ToolCall( | |
| 1006 | + id="read-reference", | |
| 1007 | + name="read", | |
| 1008 | + arguments={"file_path": str(reference)}, | |
| 1009 | + ) | |
| 1010 | + executor = FakeExecutor( | |
| 1011 | + [ | |
| 1012 | + tool_outcome( | |
| 1013 | + tool_call=tool_call, | |
| 1014 | + output="<h1>Fortran Beginner's Guide</h1>\n", | |
| 1015 | + is_error=False, | |
| 1016 | + ) | |
| 1017 | + ] | |
| 1018 | + ) | |
| 1019 | + | |
| 1020 | + summary = TurnSummary(final_response="") | |
| 1021 | + await runner.execute_batch( | |
| 1022 | + tool_calls=[tool_call], | |
| 1023 | + tool_source="assistant", | |
| 1024 | + pending_tool_calls_seen=set(), | |
| 1025 | + emit=_noop_emit, | |
| 1026 | + summary=summary, | |
| 1027 | + dod=dod, | |
| 1028 | + executor=executor, # type: ignore[arg-type] | |
| 1029 | + on_confirmation=None, | |
| 1030 | + on_user_question=None, | |
| 1031 | + emit_confirmation=None, | |
| 1032 | + consecutive_errors=0, | |
| 1033 | + ) | |
| 1034 | + | |
| 1035 | + assert ( | |
| 1036 | + "Examine the existing Fortran guide structure to understand the cadence and format" | |
| 1037 | + in dod.completed_items | |
| 1038 | + ) | |
| 1039 | + assert any( | |
| 1040 | + "Continue with the next pending item: `Create the nginx directory structure`" | |
| 1041 | + in message | |
| 1042 | + for message in queued_messages | |
| 1043 | + ) | |
| 1044 | + | |
| 1045 | + | |
| 1046 | +@pytest.mark.asyncio | |
| 1047 | +async def test_tool_batch_runner_duplicate_reference_read_prefers_next_pending_todo( | |
| 1048 | + temp_dir: Path, | |
| 1049 | +) -> None: | |
| 1050 | + async def assess_confidence( | |
| 1051 | + tool_name: str, | |
| 1052 | + tool_args: dict, | |
| 1053 | + context: str, | |
| 1054 | + ) -> ConfidenceAssessment: | |
| 1055 | + raise AssertionError("Confidence scoring should be disabled in this scenario") | |
| 1056 | + | |
| 1057 | + async def verify_action( | |
| 1058 | + tool_name: str, | |
| 1059 | + tool_args: dict, | |
| 1060 | + result: str, | |
| 1061 | + expected: str = "", | |
| 1062 | + ) -> ActionVerification: | |
| 1063 | + raise AssertionError("Verification should not run for this scenario") | |
| 1064 | + | |
| 1065 | + reference = temp_dir / "fortran" / "index.html" | |
| 1066 | + reference.parent.mkdir(parents=True) | |
| 1067 | + reference.write_text("<h1>Fortran Beginner's Guide</h1>\n") | |
| 1068 | + | |
| 1069 | + messages = [ | |
| 1070 | + Message( | |
| 1071 | + role=Role.TOOL, | |
| 1072 | + content=( | |
| 1073 | + "Observation [read]: Result: " | |
| 1074 | + "<h1>Fortran Beginner's Guide</h1>\n" | |
| 1075 | + ), | |
| 1076 | + ) | |
| 1077 | + ] | |
| 1078 | + context = build_context( | |
| 1079 | + temp_dir=temp_dir, | |
| 1080 | + messages=messages, | |
| 1081 | + safeguards=FakeSafeguards(), | |
| 1082 | + assess_confidence=assess_confidence, | |
| 1083 | + verify_action=verify_action, | |
| 1084 | + auto_recover=False, | |
| 1085 | + ) | |
| 1086 | + prompt = ( | |
| 1087 | + "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel " | |
| 1088 | + "for the structure and cadence of the guide. We are going to make an all " | |
| 1089 | + "new equally thorough guide on how to use the nginx tool." | |
| 1090 | + ) | |
| 1091 | + context.session.current_task = prompt | |
| 1092 | + queued_messages: list[str] = [] | |
| 1093 | + context.queue_steering_message_callback = queued_messages.append | |
| 1094 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) | |
| 1095 | + dod = create_definition_of_done(prompt) | |
| 1096 | + sync_todos_to_definition_of_done( | |
| 1097 | + dod, | |
| 1098 | + [ | |
| 1099 | + { | |
| 1100 | + "content": "Examine the existing Fortran guide structure to understand the cadence and format", | |
| 1101 | + "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format", | |
| 1102 | + "status": "completed", | |
| 1103 | + }, | |
| 1104 | + { | |
| 1105 | + "content": "Create the nginx directory structure", | |
| 1106 | + "active_form": "Working on: Create the nginx directory structure", | |
| 1107 | + "status": "pending", | |
| 1108 | + }, | |
| 1109 | + { | |
| 1110 | + "content": "Create the nginx index.html file", | |
| 1111 | + "active_form": "Working on: Create the nginx index.html file", | |
| 1112 | + "status": "pending", | |
| 1113 | + }, | |
| 1114 | + ], | |
| 1115 | + ) | |
| 1116 | + tool_call = ToolCall( | |
| 1117 | + id="read-dup", | |
| 1118 | + name="read", | |
| 1119 | + arguments={"file_path": str(reference)}, | |
| 1120 | + ) | |
| 1121 | + duplicate_message = ( | |
| 1122 | + "[Skipped - duplicate action: Already read " | |
| 1123 | + f"{reference} recently without any intervening changes; " | |
| 1124 | + "reuse the earlier read result instead of rereading]" | |
| 1125 | + ) | |
| 1126 | + executor = FakeExecutor( | |
| 1127 | + [ | |
| 1128 | + ToolExecutionOutcome( | |
| 1129 | + tool_call=tool_call, | |
| 1130 | + state=ToolExecutionState.DUPLICATE, | |
| 1131 | + message=Message.tool_result_message( | |
| 1132 | + tool_call_id=tool_call.id, | |
| 1133 | + display_content=duplicate_message, | |
| 1134 | + result_content=duplicate_message, | |
| 1135 | + ), | |
| 1136 | + event_content=duplicate_message, | |
| 1137 | + is_error=False, | |
| 1138 | + result_output=duplicate_message, | |
| 1139 | + ) | |
| 1140 | + ] | |
| 1141 | + ) | |
| 1142 | + | |
| 1143 | + summary = TurnSummary(final_response="") | |
| 1144 | + await runner.execute_batch( | |
| 1145 | + tool_calls=[tool_call], | |
| 1146 | + tool_source="assistant", | |
| 1147 | + pending_tool_calls_seen=set(), | |
| 1148 | + emit=_noop_emit, | |
| 1149 | + summary=summary, | |
| 1150 | + dod=dod, | |
| 1151 | + executor=executor, # type: ignore[arg-type] | |
| 1152 | + on_confirmation=None, | |
| 1153 | + on_user_question=None, | |
| 1154 | + emit_confirmation=None, | |
| 1155 | + consecutive_errors=0, | |
| 1156 | + ) | |
| 1157 | + | |
| 1158 | + assert len(queued_messages) == 1 | |
| 1159 | + assert "Reuse the earlier observation instead of repeating it." in queued_messages[0] | |
| 1160 | + assert "Continue with the next pending item: `Create the nginx directory structure`" in queued_messages[0] | |
| 1161 | + assert "Update `" not in queued_messages[0] | |
| 850 | 1162 | |
| 851 | 1163 | |
| 852 | 1164 | @pytest.mark.asyncio |
@@ -943,7 +1255,8 @@ async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_va | ||
| 943 | 1255 | |
| 944 | 1256 | assert len(queued_messages) == 1 |
| 945 | 1257 | assert "already matches the validated replacement block" in queued_messages[0] |
| 946 | - assert "validated 2 toc links in `index.html`" in queued_messages[0] | |
| 1258 | + assert "validated 2 linked entries" in queued_messages[0] | |
| 1259 | + assert f"`{index_path}`" in queued_messages[0] | |
| 947 | 1260 | assert "Do not call `edit`, `patch`, or reread the same TOC again" in queued_messages[0] |
| 948 | 1261 | |
| 949 | 1262 | |
tests/test_workflow.pymodified@@ -17,6 +17,8 @@ from loader.runtime.workflow import ( | ||
| 17 | 17 | build_execute_bridge, |
| 18 | 18 | enrich_clarify_brief_with_grounding, |
| 19 | 19 | extract_verification_commands_from_markdown, |
| 20 | + merge_refreshed_todos_with_existing_scope, | |
| 21 | + preserve_task_grounded_acceptance_criteria, | |
| 20 | 22 | sync_todos_to_definition_of_done, |
| 21 | 23 | ) |
| 22 | 24 | |
@@ -260,6 +262,115 @@ def test_extract_verification_commands_keeps_shell_pipelines_intact() -> None: | ||
| 260 | 262 | ] |
| 261 | 263 | |
| 262 | 264 | |
| 265 | +def test_preserve_task_grounded_acceptance_criteria_keeps_original_scope_on_refresh() -> None: | |
| 266 | + task = ( | |
| 267 | + "Create an equally thorough nginx guide with index.html plus chapter files " | |
| 268 | + "covering getting started, installation, first website setup, configs, and " | |
| 269 | + "advanced topics." | |
| 270 | + ) | |
| 271 | + | |
| 272 | + preserved = preserve_task_grounded_acceptance_criteria( | |
| 273 | + task, | |
| 274 | + existing_acceptance_criteria=[ | |
| 275 | + "All files are created in the correct locations with proper directory structure", | |
| 276 | + "Content covers all required topics: getting started, installation, first website, configuration basics, advanced configurations, and troubleshooting", | |
| 277 | + ], | |
| 278 | + refreshed_acceptance_criteria=[ | |
| 279 | + "At least one chapter file exists in ~/Loader/guides/nginx/chapters/", | |
| 280 | + "~/Loader/guides/nginx/index.html exists and contains proper table of contents", | |
| 281 | + ], | |
| 282 | + ) | |
| 283 | + | |
| 284 | + assert ( | |
| 285 | + "All files are created in the correct locations with proper directory structure" | |
| 286 | + in preserved | |
| 287 | + ) | |
| 288 | + assert ( | |
| 289 | + "Content covers all required topics: getting started, installation, first website, configuration basics, advanced configurations, and troubleshooting" | |
| 290 | + in preserved | |
| 291 | + ) | |
| 292 | + assert "At least one chapter file exists in ~/Loader/guides/nginx/chapters/" in preserved | |
| 293 | + | |
| 294 | + | |
| 295 | +def test_preserve_task_grounded_acceptance_criteria_drops_stale_plan_specific_scope() -> None: | |
| 296 | + task = ( | |
| 297 | + "Implement a persistent workflow artifact with planning artifacts, " | |
| 298 | + "verification commands, and plan refresh discipline." | |
| 299 | + ) | |
| 300 | + | |
| 301 | + preserved = preserve_task_grounded_acceptance_criteria( | |
| 302 | + task, | |
| 303 | + existing_acceptance_criteria=["planned.txt exists in the workspace root."], | |
| 304 | + refreshed_acceptance_criteria=["notes.txt exists in the workspace root."], | |
| 305 | + ) | |
| 306 | + | |
| 307 | + assert preserved == ["notes.txt exists in the workspace root."] | |
| 308 | + | |
| 309 | + | |
| 310 | +def test_planning_artifacts_with_acceptance_criteria_rewrites_verification_markdown() -> None: | |
| 311 | + artifacts = PlanningArtifacts.from_model_output( | |
| 312 | + "\n".join( | |
| 313 | + [ | |
| 314 | + "# Implementation Plan", | |
| 315 | + "", | |
| 316 | + "## Execution Order", | |
| 317 | + "1. Create the guide files.", | |
| 318 | + "", | |
| 319 | + "<<<VERIFICATION>>>", | |
| 320 | + "", | |
| 321 | + "# Verification Plan", | |
| 322 | + "", | |
| 323 | + "## Acceptance Criteria", | |
| 324 | + "- At least one chapter file exists.", | |
| 325 | + "", | |
| 326 | + "## Verification Commands", | |
| 327 | + "- `find chapters -name \"*.html\" | wc -l`", | |
| 328 | + ] | |
| 329 | + ), | |
| 330 | + task_statement="Create a thorough nginx guide.", | |
| 331 | + ) | |
| 332 | + | |
| 333 | + updated = artifacts.with_acceptance_criteria( | |
| 334 | + [ | |
| 335 | + "All files are created in the correct locations.", | |
| 336 | + "Content covers getting started, installation, and advanced topics.", | |
| 337 | + ] | |
| 338 | + ) | |
| 339 | + | |
| 340 | + assert "At least one chapter file exists." not in updated.verification_markdown | |
| 341 | + assert "All files are created in the correct locations." in updated.verification_markdown | |
| 342 | + assert ( | |
| 343 | + "Content covers getting started, installation, and advanced topics." | |
| 344 | + in updated.verification_markdown | |
| 345 | + ) | |
| 346 | + | |
| 347 | + | |
| 348 | +def test_merge_refreshed_todos_with_existing_scope_keeps_grounded_progress() -> None: | |
| 349 | + task = ( | |
| 350 | + "Create an equally thorough nginx guide with index.html plus chapter files " | |
| 351 | + "covering getting started, installation, first website setup, configs, and " | |
| 352 | + "advanced topics." | |
| 353 | + ) | |
| 354 | + | |
| 355 | + todos = merge_refreshed_todos_with_existing_scope( | |
| 356 | + task, | |
| 357 | + existing_pending_items=[ | |
| 358 | + "Create each chapter file in sequence, following the established pattern", | |
| 359 | + "Collect verification evidence", | |
| 360 | + ], | |
| 361 | + existing_completed_items=["Create directory structure for the new nginx guide"], | |
| 362 | + refreshed_steps=["Create sample chapter file to verify the structure works"], | |
| 363 | + ) | |
| 364 | + | |
| 365 | + assert todos[0]["content"] == "Create directory structure for the new nginx guide" | |
| 366 | + assert todos[0]["status"] == "completed" | |
| 367 | + assert any( | |
| 368 | + item["content"] == "Create each chapter file in sequence, following the established pattern" | |
| 369 | + and item["status"] == "pending" | |
| 370 | + for item in todos | |
| 371 | + ) | |
| 372 | + | |
| 373 | + | |
| 263 | 374 | def test_workflow_artifact_store_and_bridge_round_trip(tmp_path: Path) -> None: |
| 264 | 375 | store = WorkflowArtifactStore(tmp_path) |
| 265 | 376 | brief = ClarifyBrief.fallback( |