tenseleyflow/loader / df5639d

Browse files

Strengthen qwen recovery and repair flow

Authored by espadonne
SHA
df5639d514ff7b3f567a54307ba0f5d0bd3e03df
Parents
297e213
Tree
a3395ea

34 changed files

StatusFile+-
M src/loader/runtime/artifact_invalidation.py 14 2
M src/loader/runtime/compaction.py 3 206
M src/loader/runtime/dod.py 515 19
M src/loader/runtime/explore.py 1 0
M src/loader/runtime/finalization.py 379 30
M src/loader/runtime/hooks.py 593 6
M src/loader/runtime/repair.py 454 10
A src/loader/runtime/repair_focus.py 132 0
M src/loader/runtime/safeguard_services.py 197 353
M src/loader/runtime/tool_batch_recovery.py 338 51
M src/loader/runtime/tool_batches.py 666 252
M src/loader/runtime/turn_completion.py 3 4
M src/loader/runtime/turn_iteration.py 10 4
M src/loader/runtime/turn_loop.py 1 1
M src/loader/runtime/turn_preparation.py 1 0
M src/loader/runtime/workflow.py 376 7
M src/loader/runtime/workflow_lanes.py 51 2
M src/loader/runtime/workflow_recovery.py 85 1
M src/loader/tools/workflow_tools.py 24 0
M tests/test_artifact_invalidation.py 46 0
M tests/test_compaction.py 6 17
M tests/test_dod.py 168 0
M tests/test_finalization.py 299 2
M tests/test_permissions.py 1012 2
M tests/test_repair.py 567 0
M tests/test_runtime_harness.py 12 64
M tests/test_runtime_repair_flows.py 114 3
M tests/test_safeguard_services.py 107 94
M tests/test_tool_batch_policies.py 226 14
M tests/test_tool_batches.py 2066 117
M tests/test_turn_completion.py 97 0
M tests/test_workflow.py 530 0
A tests/test_workflow_recovery.py 20 0
M tests/test_workflow_tools.py 59 0
src/loader/runtime/artifact_invalidation.pymodified
@@ -34,6 +34,8 @@ class ArtifactInvalidationAssessor:
3434
         acceptance_criteria: list[str],
3535
         touched_files: list[str],
3636
         last_verification_result: str | None,
37
+        retry_count: int = 0,
38
+        planned_artifacts_complete: bool = False,
3739
     ) -> ArtifactFreshness:
3840
         """Return stale-artifact state and the recommended recovery strategy."""
3941
 
@@ -46,10 +48,12 @@ class ArtifactInvalidationAssessor:
4648
         reason_codes: list[str] = []
4749
         evidence: list[ArtifactEvidence] = []
4850
 
51
+        allow_repair_local_touchpoints = planned_artifacts_complete and retry_count > 0
4952
         unexpected_paths = [
5053
             name
5154
             for path in touched_files
52
-            if (name := _path_name(path)) and not _text_covers_path_reference(plan_text, path)
55
+            if (name := _path_name(path))
56
+            and not _text_covers_path_reference(plan_text, path)
5357
         ]
5458
         confirmed_touchpoints = [
5559
             name
@@ -86,13 +90,21 @@ class ArtifactInvalidationAssessor:
8690
                 f"Persisted artifacts still point at `{item}`.",
8791
             )
8892
 
89
-        if unexpected_paths:
93
+        if unexpected_paths and not allow_repair_local_touchpoints:
9094
             stale_plan = True
9195
             reason_codes.append("touched_files_outside_plan")
9296
             reasons.append(
9397
                 "Touched files outside the current plan: "
9498
                 + ", ".join(dict.fromkeys(unexpected_paths))
9599
             )
100
+        elif unexpected_paths:
101
+            for item in dict.fromkeys(unexpected_paths):
102
+                _append_evidence(
103
+                    evidence,
104
+                    ArtifactEvidenceKind.CONFIRMED_TOUCHPOINT,
105
+                    "Verification repair touched supplemental file "
106
+                    f"`{item}` after the originally planned artifacts were complete.",
107
+                )
96108
 
97109
         acceptance_anchors = [
98110
             item
src/loader/runtime/compaction.pymodified
@@ -8,7 +8,6 @@ from dataclasses import dataclass
88
 from pathlib import Path
99
 
1010
 from ..llm.base import Message, Role, ToolCall
11
-from .semantic_rules import html_toc as html_toc_rule
1211
 
1312
 DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 100_000
1413
 MIN_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 12_000
@@ -337,19 +336,7 @@ def infer_preferred_next_step(
337336
         current_task=current_task,
338337
         focus_path=focus_path,
339338
     )
340
-    has_confirmed_titles = _summarize_html_title_discovery(relevant_messages) is not None
341
-    verification_gap = _summarize_latest_html_verification_gap(relevant_messages)
342339
     if target_path:
343
-        if verification_gap:
344
-            return (
345
-                f"Update `{target_path}` to fix the specific verification failures "
346
-                f"({verification_gap}) instead of restarting discovery."
347
-            )
348
-        if has_confirmed_titles:
349
-            return (
350
-                f"Update `{target_path}` using the confirmed chapter file/title pairs "
351
-                "instead of rereading files."
352
-            )
353340
         return (
354341
             f"Update `{target_path}` using the confirmed findings instead of "
355342
             "restarting earlier discovery steps."
@@ -440,27 +427,6 @@ def _collect_confirmed_facts(messages: list[Message]) -> list[str]:
440427
     if explicit_mapping_fact:
441428
         facts.append(explicit_mapping_fact)
442429
 
443
-    verification_gap_fact = _collect_html_verification_gap_fact(
444
-        messages,
445
-        tool_calls_by_id=tool_calls_by_id,
446
-    )
447
-    if verification_gap_fact:
448
-        facts.append(verification_gap_fact)
449
-
450
-    title_fact = _summarize_html_title_discovery(
451
-        messages,
452
-        tool_calls_by_id=tool_calls_by_id,
453
-    )
454
-    if title_fact:
455
-        facts.append(title_fact)
456
-
457
-    file_fact = _collect_html_file_discovery_fact(
458
-        messages,
459
-        tool_calls_by_id=tool_calls_by_id,
460
-    )
461
-    if file_fact:
462
-        facts.append(file_fact)
463
-
464430
     return facts
465431
 
466432
 
@@ -529,162 +495,6 @@ def _summarize_html_mappings(payload: str) -> str | None:
529495
     return f"Filename mappings confirmed: {preview}"
530496
 
531497
 
532
-def _summarize_html_title_discovery(
533
-    messages: list[Message],
534
-    *,
535
-    max_pairs: int = 4,
536
-    tool_calls_by_id: dict[str, ToolCall] | None = None,
537
-) -> str | None:
538
-    if tool_calls_by_id is None:
539
-        tool_calls_by_id = {
540
-            tool_call.id: tool_call
541
-            for message in messages
542
-            for tool_call in message.tool_calls
543
-        }
544
-
545
-    confirmed_pairs: list[str] = []
546
-    for message in messages:
547
-        if message.role != Role.TOOL or _is_compacted_context_message(message.content):
548
-            continue
549
-        if any(result.is_error for result in message.tool_results):
550
-            continue
551
-
552
-        tool_call = next(
553
-            (
554
-                tool_calls_by_id.get(result.tool_call_id)
555
-                for result in message.tool_results
556
-                if result.tool_call_id in tool_calls_by_id
557
-            ),
558
-            None,
559
-        )
560
-        if tool_call is None or tool_call.name != "read":
561
-            continue
562
-
563
-        raw_path = tool_call.arguments.get("file_path")
564
-        if not isinstance(raw_path, str):
565
-            continue
566
-        normalized_path = _normalize_path_candidate(raw_path) or raw_path
567
-        if html_toc_rule.is_html_toc_index_path(normalized_path) or "/chapters/" not in normalized_path:
568
-            continue
569
-
570
-        payload = "\n".join(
571
-            result.content.strip()
572
-            for result in message.tool_results
573
-            if result.content.strip()
574
-        ) or message.content
575
-        title = html_toc_rule.extract_html_title_from_text(payload)
576
-        if not title:
577
-            continue
578
-
579
-        pair = f"{Path(normalized_path).name} = {title}"
580
-        if pair not in confirmed_pairs:
581
-            confirmed_pairs.append(pair)
582
-
583
-    if not confirmed_pairs:
584
-        return None
585
-
586
-    preview = ", ".join(confirmed_pairs[:max_pairs])
587
-    if len(confirmed_pairs) > max_pairs:
588
-        preview += ", ..."
589
-    return f"Chapter titles confirmed: {preview}"
590
-
591
-
592
-def _collect_html_file_discovery_fact(
593
-    messages: list[Message],
594
-    *,
595
-    tool_calls_by_id: dict[str, ToolCall],
596
-) -> str | None:
597
-    filenames: list[str] = []
598
-    for message in messages:
599
-        if message.role != Role.TOOL or _is_compacted_context_message(message.content):
600
-            continue
601
-        if any(result.is_error for result in message.tool_results):
602
-            continue
603
-
604
-        tool_name = _resolve_tool_name(
605
-            message,
606
-            tool_calls_by_id=tool_calls_by_id,
607
-        )
608
-        if tool_name not in {"glob", "bash"}:
609
-            continue
610
-
611
-        payload = "\n".join(
612
-            result.content.strip()
613
-            for result in message.tool_results
614
-            if result.content.strip()
615
-        ) or message.content
616
-        matches = re.findall(r"([A-Za-z0-9_.-]+\.html)", payload)
617
-        for name in matches:
618
-            if name not in filenames:
619
-                filenames.append(name)
620
-
621
-    if len(filenames) < 3:
622
-        return None
623
-
624
-    preview = ", ".join(filenames[:6])
625
-    if len(filenames) > 6:
626
-        preview += ", ..."
627
-    return f"Existing files include {preview}"
628
-
629
-
630
-def _collect_html_verification_gap_fact(
631
-    messages: list[Message],
632
-    *,
633
-    tool_calls_by_id: dict[str, ToolCall],
634
-) -> str | None:
635
-    gap = _summarize_latest_html_verification_gap(
636
-        messages,
637
-        tool_calls_by_id=tool_calls_by_id,
638
-    )
639
-    if not gap:
640
-        return None
641
-    return f"Verification gaps: {gap}"
642
-
643
-
644
-def _summarize_latest_html_verification_gap(
645
-    messages: list[Message],
646
-    *,
647
-    max_items: int = 2,
648
-    tool_calls_by_id: dict[str, ToolCall] | None = None,
649
-) -> str | None:
650
-    if tool_calls_by_id is None:
651
-        tool_calls_by_id = {
652
-            tool_call.id: tool_call
653
-            for message in messages
654
-            for tool_call in message.tool_calls
655
-        }
656
-
657
-    for message in reversed(messages):
658
-        if message.role != Role.TOOL or _is_compacted_context_message(message.content):
659
-            continue
660
-        if not any(result.is_error for result in message.tool_results):
661
-            continue
662
-        tool_name = _resolve_tool_name(
663
-            message,
664
-            tool_calls_by_id=tool_calls_by_id,
665
-        )
666
-        if tool_name != "bash":
667
-            continue
668
-
669
-        payload = "\n".join(
670
-            result.content.strip()
671
-            for result in message.tool_results
672
-            if result.content.strip()
673
-        ) or message.content
674
-        gap = html_toc_rule.summarize_html_toc_verification_gap(
675
-            payload,
676
-            max_items=max_items,
677
-        )
678
-        if gap:
679
-            return gap
680
-
681
-    return None
682
-
683
-
684
-def _summarize_html_file_discovery(payload: str) -> str | None:
685
-    return html_toc_rule.summarize_html_file_discovery(payload)
686
-
687
-
688498
 def _resolve_tool_name(
689499
     message: Message,
690500
     *,
@@ -710,9 +520,6 @@ def _choose_target_path(
710520
     if focus_path:
711521
         normalized_focus = _normalize_path_candidate(focus_path)
712522
         if normalized_focus:
713
-            resolved_focus = html_toc_rule.resolve_html_toc_index_path(normalized_focus)
714
-            if resolved_focus is not None:
715
-                return str(resolved_focus)
716523
             return normalized_focus
717524
 
718525
     candidates: Counter[str] = Counter()
@@ -727,9 +534,9 @@ def _choose_target_path(
727534
             if not normalized:
728535
                 continue
729536
             path_name = Path(normalized).name
730
-            if html_toc_rule.is_html_toc_index_path(normalized):
537
+            if path_name == "index.html":
731538
                 candidates[normalized] += 10
732
-            elif path_name.endswith(".html") and "/chapters/" not in normalized:
539
+            elif "." in path_name:
733540
                 candidates[normalized] += 4
734541
 
735542
     if candidates:
@@ -738,9 +545,6 @@ def _choose_target_path(
738545
     if not current_task:
739546
         return None
740547
     current_task_paths = extract_key_files([Message(role=Role.USER, content=current_task)], limit=3)
741
-    for path in current_task_paths:
742
-        if html_toc_rule.is_html_toc_index_path(path):
743
-            return path
744548
     return current_task_paths[0] if current_task_paths else None
745549
 
746550
 
@@ -770,14 +574,7 @@ def _focus_path_anchors(focus_path: str) -> tuple[str, ...]:
770574
     )
771575
     focus = Path(normalized_focus).expanduser()
772576
     anchors = {str(focus)}
773
-
774
-    resolved_index = html_toc_rule.resolve_html_toc_index_path(focus)
775
-    if resolved_index is not None:
776
-        anchors.add(str(resolved_index))
777
-        anchors.add(str(resolved_index.parent))
778
-        anchors.add(str(resolved_index.parent / "chapters"))
779
-    else:
780
-        anchors.add(str(focus.parent))
577
+    anchors.add(str(focus.parent))
781578
 
782579
     return tuple(anchor for anchor in anchors if anchor)
783580
 
src/loader/runtime/dod.pymodified
@@ -12,7 +12,6 @@ from typing import Any, Literal
1212
 
1313
 from ..llm.base import ToolCall
1414
 from ..tools.shell_tools import BashTool
15
-from .semantic_rules import html_toc as html_toc_rule
1615
 from .verification_observations import VerificationAttempt, verification_attempt_id
1716
 
1817
 TaskSize = Literal["small", "standard", "large"]
@@ -20,6 +19,38 @@ DoDStatus = Literal["draft", "in_progress", "verifying", "fixing", "done", "fail
2019
 VerificationConfidence = Literal["high", "medium", "low"]
2120
 VerificationKind = Literal["test", "typecheck", "lint", "build", "smoke", "runtime", "manual"]
2221
 
22
+_DIRECTORY_CONTENT_HINTS = (
23
+    "file",
24
+    "files",
25
+    "chapter",
26
+    "chapters",
27
+    "page",
28
+    "pages",
29
+    "test",
30
+    "tests",
31
+    "artifact",
32
+    "artifacts",
33
+    "document",
34
+    "documents",
35
+    "content",
36
+    "entry",
37
+    "entries",
38
+)
39
+_DIRECTORY_MUTATION_HINTS = (
40
+    "create",
41
+    "creating",
42
+    "generate",
43
+    "generating",
44
+    "write",
45
+    "writing",
46
+    "add",
47
+    "adding",
48
+    "build",
49
+    "building",
50
+    "populate",
51
+    "populating",
52
+)
53
+
2354
 
2455
 @dataclass
2556
 class VerificationEvidence:
@@ -213,10 +244,13 @@ def derive_verification_commands(
213244
     """Generate verification commands from execution history and project shape."""
214245
 
215246
     commands: list[str] = []
216
-    semantic_command = _derive_html_toc_verification_command(
247
+    html_link_command = _derive_local_html_link_verification_command(
248
+        dod,
249
+        project_root=project_root,
250
+    )
251
+    planned_artifact_targets = collect_planned_artifact_targets(
217252
         dod,
218253
         project_root=project_root,
219
-        task_statement=task_statement,
220254
     )
221255
 
222256
     explicit = [cmd for cmd in dod.successful_commands if _is_verification_command(cmd)]
@@ -230,8 +264,10 @@ def derive_verification_commands(
230264
             if path.suffix == ".py":
231265
                 _append_unique(commands, f"python {shlex.quote(path.name)}")
232266
 
233
-    if semantic_command:
234
-        _append_unique(commands, semantic_command)
267
+    if html_link_command:
268
+        _append_unique(commands, html_link_command)
269
+    for command in _build_planned_artifact_verification_commands(planned_artifact_targets):
270
+        _append_unique(commands, command)
235271
 
236272
     if commands:
237273
         return commands
@@ -512,30 +548,490 @@ def _extract_files_from_bash(command: str) -> list[str]:
512548
     return []
513549
 
514550
 
515
-def _derive_html_toc_verification_command(
551
+def _derive_local_html_link_verification_command(
516552
     dod: DefinitionOfDone,
517553
     *,
518554
     project_root: Path,
519
-    task_statement: str,
520555
 ) -> str | None:
521
-    task_hints = " ".join([task_statement, *dod.acceptance_criteria]).lower()
522
-    if not html_toc_rule.task_targets_html_toc(task_hints):
523
-        return None
524
-
556
+    html_paths: list[Path] = []
525557
     for path_str in dod.touched_files:
526558
         path = Path(path_str)
527559
         effective_path = path if path.is_absolute() else (project_root / path)
528
-        command = html_toc_rule.build_html_toc_verification_command(effective_path)
529
-        if command:
530
-            return command
560
+        if effective_path.suffix.lower() != ".html" or not effective_path.exists():
561
+            continue
562
+        html_paths.append(effective_path)
563
+
564
+    unique_paths = list(dict.fromkeys(str(path) for path in html_paths))
565
+    resolved_paths = [Path(path) for path in unique_paths]
566
+    if not resolved_paths:
567
+        return None
568
+    if not any(_html_file_contains_local_links(path) for path in resolved_paths):
569
+        return None
570
+    return _build_local_html_link_verification_command(resolved_paths)
571
+
572
+
573
+def collect_planned_artifact_targets(
574
+    dod: DefinitionOfDone,
575
+    *,
576
+    project_root: Path,
577
+    max_paths: int | None = None,
578
+) -> list[tuple[Path, bool]]:
579
+    if not dod.implementation_plan:
580
+        return []
581
+
582
+    plan_path = Path(dod.implementation_plan)
583
+    if not plan_path.exists():
584
+        return []
585
+
586
+    markdown = plan_path.read_text()
587
+    file_change_lines = _extract_markdown_section_lines(markdown, "File Changes")
588
+    candidates = _extract_planned_path_literals(file_change_lines or markdown.splitlines())
589
+    if not candidates:
590
+        confirmed_progress_lines = _extract_markdown_section_lines(
591
+            markdown,
592
+            "Confirmed Progress",
593
+        )
594
+        candidates = _extract_planned_path_literals(confirmed_progress_lines)
595
+    targets: list[tuple[Path, bool]] = []
596
+    seen: set[tuple[str, bool]] = set()
597
+
598
+    selected_candidates = candidates if max_paths is None else candidates[:max_paths]
599
+    for raw_path in selected_candidates:
600
+        effective_path = _resolve_planned_artifact_path(raw_path, project_root=project_root)
601
+        if effective_path is None:
602
+            continue
603
+        expect_directory = raw_path.endswith("/")
604
+        if not expect_directory and not effective_path.suffix:
605
+            continue
606
+        key = (str(effective_path), expect_directory)
607
+        if key in seen:
608
+            continue
609
+        seen.add(key)
610
+        targets.append((effective_path, expect_directory))
611
+    return targets
612
+
613
+
614
+def all_planned_artifacts_exist(
615
+    dod: DefinitionOfDone,
616
+    *,
617
+    project_root: Path,
618
+    max_paths: int | None = None,
619
+) -> bool:
620
+    targets = collect_planned_artifact_targets(
621
+        dod,
622
+        project_root=project_root,
623
+        max_paths=max_paths,
624
+    )
625
+    if not targets:
626
+        return False
627
+    if not all(
628
+        planned_artifact_target_satisfied(
629
+            dod,
630
+            target=target,
631
+            expect_directory=expect_directory,
632
+            project_root=project_root,
633
+        )
634
+        for target, expect_directory in targets
635
+    ):
636
+        return False
637
+    return not _planned_html_outputs_have_missing_local_links(
638
+        dod,
639
+        project_root=project_root,
640
+        targets=targets,
641
+    )
642
+
643
+
644
+def planned_artifact_target_satisfied(
645
+    dod: DefinitionOfDone,
646
+    *,
647
+    target: Path,
648
+    expect_directory: bool,
649
+    project_root: Path,
650
+) -> bool:
651
+    """Return whether one planned file or directory target is substantively satisfied."""
652
+
653
+    if not expect_directory:
654
+        return target.is_file()
655
+    if not target.is_dir():
656
+        return False
657
+    if not planned_directory_requires_generated_files(
658
+        dod,
659
+        target=target,
660
+        project_root=project_root,
661
+    ):
662
+        return True
663
+    return _directory_contains_files(target)
664
+
665
+
666
+def infer_next_declared_html_output_file(
667
+    *,
668
+    target: Path,
669
+    project_root: Path,
670
+) -> Path | None:
671
+    """Return the first missing HTML file already declared within an output directory."""
672
+
673
+    missing_targets = collect_missing_declared_html_output_files(
674
+        target=target,
675
+        project_root=project_root,
676
+    )
677
+    return missing_targets[0] if missing_targets else None
678
+
679
+
680
+def collect_missing_declared_html_output_files(
681
+    *,
682
+    target: Path,
683
+    project_root: Path,
684
+) -> tuple[Path, ...]:
685
+    """Return missing HTML outputs already declared within the current artifact graph."""
686
+
687
+    normalized_target = target.resolve(strict=False)
688
+    artifact_root = _resolve_declared_html_artifact_root(
689
+        normalized_target,
690
+        project_root=project_root.resolve(strict=False),
691
+    )
692
+    if artifact_root is None:
693
+        return ()
694
+
695
+    html_files = [path for path in sorted(artifact_root.rglob("*.html")) if path.is_file()]
696
+    if not html_files:
697
+        return ()
698
+
699
+    missing_targets: list[Path] = []
700
+    seen: set[str] = set()
701
+    for html_file in html_files:
702
+        try:
703
+            content = html_file.read_text()
704
+        except OSError:
705
+            continue
706
+        for resolved_target in _iter_local_html_targets(html_file, content):
707
+            if resolved_target.exists():
708
+                continue
709
+            if resolved_target.suffix.lower() not in {".html", ".htm"}:
710
+                continue
711
+            try:
712
+                resolved_target.relative_to(artifact_root)
713
+                resolved_target.relative_to(normalized_target)
714
+            except ValueError:
715
+                continue
716
+            key = str(resolved_target)
717
+            if key in seen:
718
+                continue
719
+            seen.add(key)
720
+            missing_targets.append(resolved_target)
721
+    return tuple(missing_targets)
722
+
723
+
724
+def _build_planned_artifact_verification_commands(
725
+    targets: list[tuple[Path, bool]],
726
+) -> list[str]:
727
+    commands: list[str] = []
728
+    for effective_path, expect_directory in targets:
729
+        command = (
730
+            f"test -d {shlex.quote(str(effective_path))}"
731
+            if expect_directory
732
+            else f"test -f {shlex.quote(str(effective_path))}"
733
+        )
734
+        _append_unique(commands, command)
735
+    return commands
736
+
737
+
738
+def _extract_markdown_section_lines(markdown: str, heading: str) -> list[str]:
739
+    current_heading: str | None = None
740
+    collected: list[str] = []
741
+    for line in markdown.splitlines():
742
+        stripped = line.strip()
743
+        if stripped.startswith("## "):
744
+            current_heading = stripped[3:].strip().lower()
745
+            continue
746
+        if current_heading == heading.lower():
747
+            collected.append(line)
748
+    return collected
749
+
750
+
751
+def _extract_planned_path_literals(lines: list[str]) -> list[str]:
752
+    paths: list[str] = []
753
+    seen: set[str] = set()
754
+
755
+    for line in lines:
756
+        candidates = re.findall(r"`([^`]+)`", line)
757
+        if not candidates:
758
+            stripped = line.strip()
759
+            stripped = re.sub(r"^[-*+]\s+", "", stripped)
760
+            stripped = re.sub(r"^\d+[.)]\s+", "", stripped)
761
+            stripped = stripped.strip("`'\",.:;()[]{}")
762
+            candidates = [stripped] if _looks_like_path_literal(stripped) else []
763
+        for candidate in candidates:
764
+            normalized = candidate.strip("`'\",.:;()[]{}")
765
+            if not _looks_like_path_literal(normalized) or normalized in seen:
766
+                continue
767
+            seen.add(normalized)
768
+            paths.append(normalized)
769
+    return paths
770
+
771
+
772
+def _resolve_declared_html_artifact_root(
773
+    target: Path,
774
+    *,
775
+    project_root: Path,
776
+) -> Path | None:
777
+    for candidate in [target, *target.parents]:
778
+        if (candidate / "index.html").is_file():
779
+            return candidate
780
+        if candidate == project_root or candidate == candidate.parent:
781
+            break
782
+
783
+    fallback = target if target.exists() else target.parent
784
+    if fallback.exists():
785
+        return fallback
531786
     return None
532787
 
533788
 
534
-def _build_html_toc_verification_command(index_path: Path) -> str:
535
-    command = html_toc_rule.build_html_toc_verification_command(index_path)
536
-    if command is None:
537
-        raise ValueError(f"{index_path} is not a valid HTML TOC target")
538
-    return command
789
+def _iter_local_html_targets(file_path: Path, content: str) -> list[Path]:
790
+    pattern = re.compile(r'href\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE)
791
+    targets: list[Path] = []
792
+    seen: set[str] = set()
793
+    for href in pattern.findall(content):
794
+        candidate = href.strip()
795
+        if not _is_local_html_link_target(candidate):
796
+            continue
797
+        resolved = (file_path.parent / candidate).resolve(strict=False)
798
+        key = str(resolved)
799
+        if key in seen:
800
+            continue
801
+        seen.add(key)
802
+        targets.append(resolved)
803
+    return targets
804
+
805
+
806
+def _is_local_html_link_target(href: str) -> bool:
807
+    candidate = href.strip()
808
+    if not candidate or candidate.startswith(("#", "http://", "https://", "mailto:")):
809
+        return False
810
+    if "?" in candidate:
811
+        candidate = candidate.split("?", 1)[0]
812
+    if "#" in candidate:
813
+        candidate = candidate.split("#", 1)[0]
814
+    return Path(candidate).suffix.lower() in {".html", ".htm"}
815
+
816
+
817
+def _looks_like_path_literal(value: str) -> bool:
818
+    if not value or " " in value:
819
+        return False
820
+    if value.startswith(("http://", "https://")):
821
+        return False
822
+    return (
823
+        value.startswith(("~/", "./", "../", "/"))
824
+        or "/" in value
825
+        or value.endswith("/")
826
+    )
827
+
828
+
829
+def _resolve_planned_artifact_path(
830
+    raw_path: str,
831
+    *,
832
+    project_root: Path,
833
+) -> Path | None:
834
+    text = raw_path.strip()
835
+    if not text:
836
+        return None
837
+    path = Path(text).expanduser()
838
+    if path.is_absolute():
839
+        return path
840
+    return project_root / path
841
+
842
+
843
+def planned_directory_requires_generated_files(
844
+    dod: DefinitionOfDone,
845
+    *,
846
+    target: Path,
847
+    project_root: Path,
848
+) -> bool:
849
+    """Return whether a planned directory is expected to contain generated files."""
850
+
851
+    plan_path = Path(dod.implementation_plan) if dod.implementation_plan else None
852
+    if plan_path is not None and plan_path.exists():
853
+        markdown = plan_path.read_text()
854
+        file_change_lines = _extract_markdown_section_lines(markdown, "File Changes")
855
+        if any(
856
+            _line_describes_directory_contents(line, target=target, project_root=project_root)
857
+            for line in file_change_lines
858
+        ):
859
+            return True
860
+
861
+        execution_lines = _extract_markdown_section_lines(markdown, "Execution Order")
862
+        if any(
863
+            _line_mentions_directory_generation(line, target=target)
864
+            for line in execution_lines
865
+        ):
866
+            return True
867
+
868
+    todo_lines = [*dod.pending_items, *dod.completed_items]
869
+    return any(
870
+        _line_mentions_directory_generation(line, target=target)
871
+        for line in todo_lines
872
+    )
873
+
874
+
875
+def _line_describes_directory_contents(
876
+    line: str,
877
+    *,
878
+    target: Path,
879
+    project_root: Path,
880
+) -> bool:
881
+    lowered = line.lower()
882
+    if not any(hint in lowered for hint in _DIRECTORY_CONTENT_HINTS):
883
+        return False
884
+
885
+    target_text = str(target)
886
+    relative_target = str(target.relative_to(project_root)) if target.is_relative_to(project_root) else ""
887
+    if target_text in line or relative_target and relative_target in line:
888
+        return True
889
+    return _line_mentions_directory_generation(line, target=target)
890
+
891
+
892
+def _line_mentions_directory_generation(line: str, *, target: Path) -> bool:
893
+    lowered = line.lower()
894
+    if not any(hint in lowered for hint in _DIRECTORY_CONTENT_HINTS):
895
+        return False
896
+    if not any(hint in lowered for hint in _DIRECTORY_MUTATION_HINTS) and "directory for" not in lowered:
897
+        return False
898
+    directory_tokens = _directory_tokens(target)
899
+    return any(token in lowered for token in directory_tokens)
900
+
901
+
902
+def _directory_tokens(target: Path) -> set[str]:
903
+    tokens: set[str] = set()
904
+    for raw_token in re.split(r"[^a-z0-9]+", target.name.lower()):
905
+        token = raw_token.strip()
906
+        if len(token) < 2:
907
+            continue
908
+        tokens.add(token)
909
+        if token.endswith("ies") and len(token) > 3:
910
+            tokens.add(f"{token[:-3]}y")
911
+        elif token.endswith("s") and len(token) > 3:
912
+            tokens.add(token[:-1])
913
+    return tokens
914
+
915
+
916
+def _directory_contains_files(target: Path) -> bool:
917
+    try:
918
+        return any(child.is_file() for child in target.rglob("*"))
919
+    except OSError:
920
+        return False
921
+
922
+
923
+def _html_file_contains_local_links(path: Path) -> bool:
924
+    pattern = re.compile(r'href\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE)
925
+    try:
926
+        text = path.read_text()
927
+    except OSError:
928
+        return False
929
+    return any(_is_local_html_link_target(href) for href in pattern.findall(text))
930
+
931
+
932
+def _planned_html_outputs_have_missing_local_links(
933
+    dod: DefinitionOfDone,
934
+    *,
935
+    project_root: Path,
936
+    targets: list[tuple[Path, bool]],
937
+) -> bool:
938
+    html_paths: list[Path] = []
939
+    for raw_path in dod.touched_files:
940
+        path = Path(raw_path)
941
+        effective_path = path if path.is_absolute() else (project_root / path)
942
+        if effective_path.suffix.lower() != ".html" or not effective_path.exists():
943
+            continue
944
+        html_paths.append(effective_path)
945
+
946
+    for target, expect_directory in targets:
947
+        if expect_directory or target.suffix.lower() != ".html" or not target.exists():
948
+            continue
949
+        html_paths.append(target)
950
+
951
+    seen: set[str] = set()
952
+    for path in html_paths:
953
+        normalized = str(path)
954
+        if normalized in seen:
955
+            continue
956
+        seen.add(normalized)
957
+        if _html_file_has_missing_local_links(path):
958
+            return True
959
+    return False
960
+
961
+
962
+def _html_file_has_missing_local_links(path: Path) -> bool:
963
+    pattern = re.compile(r'href\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE)
964
+    try:
965
+        text = path.read_text()
966
+    except OSError:
967
+        return False
968
+    for href in pattern.findall(text):
969
+        target = href.strip()
970
+        if not _is_local_html_link_target(target):
971
+            continue
972
+        normalized = target.split("#", 1)[0].split("?", 1)[0].strip()
973
+        if not normalized:
974
+            continue
975
+        if not (path.parent / normalized).resolve().exists():
976
+            return True
977
+    return False
978
+
979
+
980
+def _is_local_html_link_target(href: str) -> bool:
981
+    target = href.strip()
982
+    if not target:
983
+        return False
984
+    if target.startswith(("#", "mailto:", "tel:", "javascript:")):
985
+        return False
986
+    if "://" in target:
987
+        return False
988
+    target = target.split("#", 1)[0].split("?", 1)[0].strip()
989
+    return bool(target)
990
+
991
+
992
+def _build_local_html_link_verification_command(paths: list[Path]) -> str:
993
+    serialized_paths = ", ".join(repr(str(path)) for path in paths)
994
+    return "\n".join(
995
+        [
996
+            "python3 - <<'PY'",
997
+            "from pathlib import Path",
998
+            "import re",
999
+            "",
1000
+            f"paths = [{serialized_paths}]",
1001
+            (
1002
+                r"pattern = re.compile(r'href\s*=\s*[\"\\\']([^\"\\\']+)[\"\\\']', "
1003
+                "re.IGNORECASE)"
1004
+            ),
1005
+            "checked = 0",
1006
+            "missing = []",
1007
+            "for raw_path in paths:",
1008
+            "    html_path = Path(raw_path)",
1009
+            "    if not html_path.exists():",
1010
+            "        continue",
1011
+            "    text = html_path.read_text()",
1012
+            "    for href in pattern.findall(text):",
1013
+            "        target = href.strip()",
1014
+            "        if not target:",
1015
+            "            continue",
1016
+            "        if target.startswith((\"#\", \"mailto:\", \"tel:\", \"javascript:\")):",
1017
+            "            continue",
1018
+            "        if \"://\" in target:",
1019
+            "            continue",
1020
+            "        target = target.split(\"#\", 1)[0].split(\"?\", 1)[0].strip()",
1021
+            "        if not target:",
1022
+            "            continue",
1023
+            "        checked += 1",
1024
+            "        resolved = (html_path.parent / target).resolve()",
1025
+            "        if not resolved.exists():",
1026
+            "            missing.append(f\"{html_path}:{href} -> {resolved}\")",
1027
+            "if missing:",
1028
+            "    print(\"Missing local HTML links:\")",
1029
+            "    print(\"\\n\".join(missing))",
1030
+            "    raise SystemExit(1)",
1031
+            "print(f\"Checked {checked} local HTML links across {len(paths)} file(s).\")",
1032
+            "PY",
1033
+        ]
1034
+    )
5391035
 
5401036
 
5411037
 def _first_non_empty_line(text: str) -> str:
src/loader/runtime/explore.pymodified
@@ -109,6 +109,7 @@ class ExploreRuntime:
109109
                 registry=self.registry,
110110
                 rollback_plan=None,
111111
                 workspace_root=self.context.project_root,
112
+                session=self.context.session,
112113
             ),
113114
         )
114115
 
src/loader/runtime/finalization.pymodified
@@ -14,6 +14,7 @@ from .dod import (
1414
     DefinitionOfDoneStore,
1515
     VerificationEvidence,
1616
     build_verification_summary,
17
+    collect_planned_artifact_targets,
1718
     derive_verification_commands,
1819
     ensure_active_verification_attempt,
1920
     synthesize_todo_items,
@@ -28,7 +29,6 @@ from .executor import ToolExecutor
2829
 from .logging import get_runtime_logger
2930
 from .memory import MemoryStore
3031
 from .policy_timeline import append_verification_timeline_entry
31
-from .semantic_rules import html_toc as html_toc_rule
3232
 from .session import normalize_usage
3333
 from .tracing import RuntimeTracer
3434
 from .verification_observations import (
@@ -41,6 +41,7 @@ from .workflow import (
4141
     WorkflowMode,
4242
     WorkflowTimelineEntry,
4343
     WorkflowTimelineEntryKind,
44
+    effective_pending_todo_items,
4445
     extract_verification_commands_from_markdown,
4546
 )
4647
 
@@ -98,13 +99,20 @@ class TurnFinalizer:
9899
         """Gate completion on DoD state and verification evidence."""
99100
 
100101
         implementation_item = "Complete the requested work"
101
-        if implementation_item in dod.pending_items:
102
-            dod.pending_items.remove(implementation_item)
103
-            dod.completed_items.append(implementation_item)
102
+        verification_item = "Collect verification evidence"
104103
 
105104
         tracked_pending_items = [
106
-            item for item in dod.pending_items if item != "Collect verification evidence"
105
+            item
106
+            for item in effective_pending_todo_items(
107
+                dod,
108
+                project_root=self.context.project_root,
109
+            )
110
+            if item not in {implementation_item, verification_item}
107111
         ]
112
+        missing_planned_artifacts = _missing_planned_artifact_labels(
113
+            dod,
114
+            project_root=self.context.project_root,
115
+        )
108116
 
109117
         mutating_paths = [path for path in dod.touched_files if path]
110118
         requires_verification = bool(mutating_paths or dod.mutating_actions)
@@ -115,6 +123,60 @@ class TurnFinalizer:
115123
             reason=f"files={mutating_paths[:3]}, actions={len(dod.mutating_actions)}"
116124
             if requires_verification else None,
117125
         )
126
+        if missing_planned_artifacts:
127
+            recovery_nudge = _build_missing_artifact_recovery_nudge(
128
+                _first_missing_planned_artifact(
129
+                    dod,
130
+                    project_root=self.context.project_root,
131
+                )
132
+            )
133
+            if recovery_nudge:
134
+                self.context.queue_steering_message(recovery_nudge)
135
+            missing_provenance = [
136
+                EvidenceProvenance(
137
+                    category="tracked_work",
138
+                    source="dod.implementation_plan",
139
+                    summary=f"planned artifact still missing: {label}",
140
+                    status=EvidenceProvenanceStatus.MISSING.value,
141
+                    subject=label,
142
+                )
143
+                for label in missing_planned_artifacts
144
+            ]
145
+            missing_text = "\n".join(
146
+                f"- {label}" for label in missing_planned_artifacts[:8]
147
+            )
148
+            pending_text = ""
149
+            if tracked_pending_items:
150
+                pending_text = (
151
+                    "\nRemaining tracked work:\n"
152
+                    + "\n".join(f"- {item}" for item in tracked_pending_items[:6])
153
+                )
154
+            self.dod_store.save(dod)
155
+            await self.emit_dod_status(emit, dod)
156
+            self.context.session.append(
157
+                Message(
158
+                    role=Role.USER,
159
+                    content=(
160
+                        "[PLANNED ARTIFACTS STILL MISSING]\n"
161
+                        "The explicit implementation plan is not complete yet. "
162
+                        "Do not move to verification or final confirmation.\n\n"
163
+                        "Missing planned artifacts:\n"
164
+                        f"{missing_text}"
165
+                        f"{pending_text}\n\n"
166
+                        "Continue by creating or updating the missing planned artifacts."
167
+                    ),
168
+                )
169
+            )
170
+            return CompletionGateResult(
171
+                should_continue=True,
172
+                reason_code="planned_artifacts_missing_continue",
173
+                reason_summary=(
174
+                    "continued because explicitly planned artifacts were still missing "
175
+                    "before verification"
176
+                ),
177
+                final_response="",
178
+                evidence_provenance=missing_provenance,
179
+            )
118180
         if tracked_pending_items and not requires_verification:
119181
             pending_provenance = [
120182
                 EvidenceProvenance(
@@ -149,6 +211,10 @@ class TurnFinalizer:
149211
             )
150212
 
151213
         if not requires_verification:
214
+            if implementation_item in dod.pending_items:
215
+                dod.pending_items.remove(implementation_item)
216
+            if implementation_item not in dod.completed_items:
217
+                dod.completed_items.append(implementation_item)
152218
             skip_provenance = [
153219
                 EvidenceProvenance(
154220
                     category="verification",
@@ -240,9 +306,15 @@ class TurnFinalizer:
240306
                 f"Task: {dod.task_statement}\n"
241307
                 "No new file changes were made since the last failed verification.\n\n"
242308
                 f"{build_verification_summary(dod.evidence)}\n\n"
243
-                f"{_build_verification_repair_guidance(dod)}\n\n"
309
+                f"{_build_verification_repair_guidance(dod, project_root=self.context.project_root)}\n\n"
244310
                 "Apply a concrete edit or patch before trying to finish again."
245311
             )
312
+            recovery_nudge = _build_verification_failure_recovery_nudge(
313
+                dod,
314
+                project_root=self.context.project_root,
315
+            )
316
+            if recovery_nudge:
317
+                self.context.queue_steering_message(recovery_nudge)
246318
             self.context.session.append(Message(role=Role.USER, content=repair_prompt))
247319
             return CompletionGateResult(
248320
                 should_continue=True,
@@ -407,6 +479,12 @@ class TurnFinalizer:
407479
         dod.confidence = "medium"
408480
         self.dod_store.save(dod)
409481
         await self.emit_dod_status(emit, dod)
482
+        recovery_nudge = _build_verification_failure_recovery_nudge(
483
+            dod,
484
+            project_root=self.context.project_root,
485
+        )
486
+        if recovery_nudge:
487
+            self.context.queue_steering_message(recovery_nudge)
410488
         await self.set_workflow_mode(
411489
             ModeDecision.transition(
412490
                 WorkflowMode.EXECUTE,
@@ -424,7 +502,7 @@ class TurnFinalizer:
424502
             f"Attempt: {dod.retry_count}/{dod.retry_budget}\n"
425503
             f"Pending items: {', '.join(dod.pending_items)}\n\n"
426504
             f"{build_verification_summary(dod.evidence)}\n\n"
427
-            f"{_build_verification_repair_guidance(dod)}\n\n"
505
+            f"{_build_verification_repair_guidance(dod, project_root=self.context.project_root)}\n\n"
428506
             "Fix the failures above, then finish the task again."
429507
         )
430508
         self.context.session.append(Message(role=Role.USER, content=failure_prompt))
@@ -710,6 +788,72 @@ def _verification_result_provenance(
710788
     return entries
711789
 
712790
 
791
+def _missing_planned_artifact_labels(
792
+    dod: DefinitionOfDone,
793
+    *,
794
+    project_root: Path,
795
+) -> list[str]:
796
+    labels: list[str] = []
797
+    for target, expect_directory in collect_planned_artifact_targets(
798
+        dod,
799
+        project_root=project_root,
800
+        max_paths=12,
801
+    ):
802
+        exists = target.is_dir() if expect_directory else target.is_file()
803
+        if exists:
804
+            continue
805
+        label = target.name or str(target)
806
+        if expect_directory and not label.endswith("/"):
807
+            label += "/"
808
+        labels.append(f"`{label}`")
809
+    return labels
810
+
811
+
812
+def _first_missing_planned_artifact(
813
+    dod: DefinitionOfDone,
814
+    *,
815
+    project_root: Path,
816
+) -> tuple[Path, bool] | None:
817
+    for target, expect_directory in collect_planned_artifact_targets(
818
+        dod,
819
+        project_root=project_root,
820
+        max_paths=12,
821
+    ):
822
+        exists = target.is_dir() if expect_directory else target.is_file()
823
+        if not exists:
824
+            return target, expect_directory
825
+    return None
826
+
827
+
828
+def _build_missing_artifact_recovery_nudge(
829
+    missing_artifact: tuple[Path, bool] | None,
830
+) -> str | None:
831
+    if missing_artifact is None:
832
+        return None
833
+
834
+    target, expect_directory = missing_artifact
835
+    label = target.name or str(target)
836
+    if expect_directory and not label.endswith("/"):
837
+        label += "/"
838
+
839
+    if expect_directory:
840
+        return (
841
+            "Your prior completion claim was incorrect because "
842
+            f"`{label}` does not exist yet. Do not summarize, mark completion, or "
843
+            "write bookkeeping notes yet. Your next response should be one concrete "
844
+            f"tool call that creates `{target}`. If a specific missing fact blocks "
845
+            "that step, ask one precise question."
846
+        )
847
+
848
+    return (
849
+        "Your prior completion claim was incorrect because "
850
+        f"`{label}` does not exist yet. Do not summarize, mark completion, or "
851
+        "write bookkeeping notes yet. Your next response should be one concrete "
852
+        f"`write` or `edit`-style tool call that creates or updates `{target}`. "
853
+        "If a specific missing fact blocks that step, ask one precise question."
854
+    )
855
+
856
+
713857
 def _verification_result_observations(
714858
     dod: DefinitionOfDone,
715859
     *,
@@ -938,49 +1082,254 @@ def _verification_state_signature(dod: DefinitionOfDone) -> str:
9381082
     )
9391083
 
9401084
 
941
-def _build_verification_repair_guidance(dod: DefinitionOfDone) -> str:
942
-    fixes = _extract_verification_repairs(dod.evidence)
943
-    if not fixes:
1085
+def _build_verification_repair_guidance(
1086
+    dod: DefinitionOfDone,
1087
+    *,
1088
+    project_root: Path,
1089
+) -> str:
1090
+    repair_targets = _extract_verification_repair_targets(dod.evidence)
1091
+    fixes = _extract_verification_repairs(
1092
+        dod.evidence,
1093
+        repair_targets=repair_targets,
1094
+    )
1095
+    repair_source_paths = _existing_repair_source_paths(
1096
+        dod,
1097
+        repair_targets=repair_targets,
1098
+        project_root=project_root,
1099
+    )
1100
+    if not fixes and not repair_targets:
9441101
         return (
9451102
             "Use the failed verification evidence directly, avoid rereading unrelated "
9461103
             "files, and fix the target file before retrying."
9471104
         )
9481105
 
949
-    return "\n".join(
950
-        [
951
-            "Repair focus:",
952
-            *[f"- {item}" for item in fixes],
953
-            "- Reuse these exact failures instead of restarting discovery from earlier chapters.",
954
-        ]
955
-    )
1106
+    lines = ["Repair focus:"]
1107
+    lines.extend(f"- {item}" for item in fixes)
1108
+    primary_target = repair_targets[0] if repair_targets else None
1109
+    if primary_target is not None:
1110
+        lines.extend(
1111
+            [
1112
+                f"- Immediate next step: edit `{primary_target.artifact_path}`.",
1113
+                "- If the broken reference should remain, create "
1114
+                f"`{primary_target.expected_path}`; otherwise remove or replace "
1115
+                f"`{primary_target.failing_reference}`.",
1116
+                *(
1117
+                    [
1118
+                        "- Use the existing artifact files as the source of truth while "
1119
+                        "repairing this file: "
1120
+                        + ", ".join(f"`{path}`" for path in repair_source_paths[:6])
1121
+                        + (", ..." if len(repair_source_paths) > 6 else "")
1122
+                    ]
1123
+                    if repair_source_paths
1124
+                    else []
1125
+                ),
1126
+                "- Do not reread unrelated reference materials or restart discovery "
1127
+                "while this concrete repair target is unresolved.",
1128
+            ]
1129
+        )
1130
+    else:
1131
+        lines.append(
1132
+            "- Reuse these exact failures instead of restarting discovery from earlier "
1133
+            "chapters."
1134
+        )
1135
+    return "\n".join(lines)
9561136
 
9571137
 
9581138
 def _extract_verification_repairs(
9591139
     evidence_items: list[VerificationEvidence],
1140
+    *,
1141
+    repair_targets: list[VerificationRepairTarget] | None = None,
9601142
 ) -> list[str]:
9611143
     fixes: list[str] = []
1144
+    target_map = {
1145
+        (target.artifact_path, target.failing_reference, target.expected_path): target
1146
+        for target in (repair_targets or _extract_verification_repair_targets(evidence_items))
1147
+    }
1148
+    for target in target_map.values():
1149
+        item = (
1150
+            f"Fix the broken local reference `{target.failing_reference}` in "
1151
+            f"`{target.artifact_path}`."
1152
+        )
1153
+        if item not in fixes:
1154
+            fixes.append(item)
9621155
     for evidence in evidence_items:
9631156
         for candidate in (evidence.stderr, evidence.output, evidence.stdout):
964
-            missing, mismatches = html_toc_rule.parse_html_toc_verification_failures(
965
-                str(candidate)
966
-            )
967
-            for href in missing:
968
-                item = (
969
-                    f"Fix the missing TOC href `{href}` in the target HTML "
970
-                    "table-of-contents page."
971
-                )
972
-                if item not in fixes:
973
-                    fixes.append(item)
974
-            for mismatch in mismatches:
1157
+            for problem in _extract_missing_local_html_links(str(candidate)):
1158
+                parsed = _parse_missing_local_html_link(problem)
1159
+                if parsed is not None:
1160
+                    key = (
1161
+                        parsed.artifact_path,
1162
+                        parsed.failing_reference,
1163
+                        parsed.expected_path,
1164
+                    )
1165
+                    if key in target_map:
1166
+                        continue
9751167
                 item = (
976
-                    f"Fix the TOC label mismatch `{mismatch}` in the target HTML "
977
-                    "table-of-contents page."
1168
+                    "Fix the missing local HTML link "
1169
+                    f"`{problem}` in the edited artifact set."
9781170
                 )
9791171
                 if item not in fixes:
9801172
                     fixes.append(item)
9811173
     return fixes
9821174
 
9831175
 
1176
+@dataclass(frozen=True)
1177
+class VerificationRepairTarget:
1178
+    """Structured repair target extracted from failed verification evidence."""
1179
+
1180
+    artifact_path: str
1181
+    failing_reference: str
1182
+    expected_path: str
1183
+
1184
+
1185
+def _build_verification_failure_recovery_nudge(
1186
+    dod: DefinitionOfDone,
1187
+    *,
1188
+    project_root: Path,
1189
+) -> str | None:
1190
+    repair_targets = _extract_verification_repair_targets(dod.evidence)
1191
+    repair_source_paths = _existing_repair_source_paths(
1192
+        dod,
1193
+        repair_targets=repair_targets,
1194
+        project_root=project_root,
1195
+    )
1196
+    if repair_targets:
1197
+        primary_target = repair_targets[0]
1198
+        source_hint = ""
1199
+        if repair_source_paths:
1200
+            preview = ", ".join(f"`{path}`" for path in repair_source_paths[:4])
1201
+            if len(repair_source_paths) > 4:
1202
+                preview += ", ..."
1203
+            source_hint = (
1204
+                " Use the existing artifact files already on disk as the source of truth: "
1205
+                f"{preview}."
1206
+            )
1207
+        return (
1208
+            "Verification already identified the concrete repair target. "
1209
+            "Do not restart discovery or reread unrelated references. "
1210
+            "Your next response should be one concrete `edit` or `write`-style tool "
1211
+            f"call that updates `{primary_target.artifact_path}` to repair "
1212
+            f"`{primary_target.failing_reference}`. "
1213
+            f"If that reference should stay, create `{primary_target.expected_path}`; "
1214
+            "otherwise remove or replace the broken local reference."
1215
+            f"{source_hint}"
1216
+        )
1217
+
1218
+    fixes = _extract_verification_repairs(dod.evidence, repair_targets=repair_targets)
1219
+    if not fixes:
1220
+        return None
1221
+    return (
1222
+        "Verification already identified a concrete failure in the active artifact set. "
1223
+        "Reuse that evidence directly, apply one concrete edit or patch, and do not "
1224
+        "restart discovery unless a specific missing fact blocks the repair."
1225
+    )
1226
+
1227
+
1228
+def _existing_repair_source_paths(
1229
+    dod: DefinitionOfDone,
1230
+    *,
1231
+    repair_targets: list[VerificationRepairTarget],
1232
+    project_root: Path,
1233
+) -> list[str]:
1234
+    if not repair_targets:
1235
+        return []
1236
+
1237
+    candidate_dirs = {
1238
+        Path(target.expected_path).parent.resolve(strict=False)
1239
+        for target in repair_targets
1240
+        if str(target.expected_path).strip()
1241
+    }
1242
+    candidate_dirs.update(
1243
+        Path(target.artifact_path).parent.resolve(strict=False)
1244
+        for target in repair_targets
1245
+        if str(target.artifact_path).strip()
1246
+    )
1247
+
1248
+    paths: list[str] = []
1249
+    seen: set[str] = set()
1250
+    for target, expect_directory in collect_planned_artifact_targets(
1251
+        dod,
1252
+        project_root=project_root,
1253
+        max_paths=24,
1254
+    ):
1255
+        if expect_directory or not target.is_file():
1256
+            continue
1257
+        resolved = target.resolve(strict=False)
1258
+        if resolved.parent not in candidate_dirs:
1259
+            continue
1260
+        normalized = str(resolved)
1261
+        if normalized in seen:
1262
+            continue
1263
+        seen.add(normalized)
1264
+        paths.append(normalized)
1265
+    return paths
1266
+
1267
+
1268
+def _extract_verification_repair_targets(
1269
+    evidence_items: list[VerificationEvidence],
1270
+) -> list[VerificationRepairTarget]:
1271
+    targets: list[VerificationRepairTarget] = []
1272
+    seen: set[tuple[str, str, str]] = set()
1273
+    for evidence in evidence_items:
1274
+        for candidate in (evidence.stderr, evidence.output, evidence.stdout):
1275
+            for problem in _extract_missing_local_html_links(str(candidate)):
1276
+                parsed = _parse_missing_local_html_link(problem)
1277
+                if parsed is None:
1278
+                    continue
1279
+                key = (
1280
+                    parsed.artifact_path,
1281
+                    parsed.failing_reference,
1282
+                    parsed.expected_path,
1283
+                )
1284
+                if key in seen:
1285
+                    continue
1286
+                seen.add(key)
1287
+                targets.append(parsed)
1288
+    return targets
1289
+
1290
+
1291
+def _parse_missing_local_html_link(problem: str) -> VerificationRepairTarget | None:
1292
+    if " -> " not in problem:
1293
+        return None
1294
+    broken_target, expected_path = problem.split(" -> ", 1)
1295
+    broken_target = broken_target.strip()
1296
+    expected_path = expected_path.strip()
1297
+    if not broken_target or not expected_path or ":" not in broken_target:
1298
+        return None
1299
+    artifact_path, failing_reference = broken_target.rsplit(":", 1)
1300
+    artifact_path = artifact_path.strip()
1301
+    failing_reference = failing_reference.strip()
1302
+    if not artifact_path or not failing_reference:
1303
+        return None
1304
+    return VerificationRepairTarget(
1305
+        artifact_path=artifact_path,
1306
+        failing_reference=failing_reference,
1307
+        expected_path=expected_path,
1308
+    )
1309
+
1310
+
1311
+def _extract_missing_local_html_links(text: str) -> list[str]:
1312
+    if "Missing local HTML links:" not in text:
1313
+        return []
1314
+
1315
+    problems: list[str] = []
1316
+    capture = False
1317
+    for raw_line in text.splitlines():
1318
+        line = raw_line.strip()
1319
+        if not line:
1320
+            continue
1321
+        if line == "Missing local HTML links:":
1322
+            capture = True
1323
+            continue
1324
+        if not capture:
1325
+            continue
1326
+        if " -> " not in line:
1327
+            continue
1328
+        if line not in problems:
1329
+            problems.append(line)
1330
+    return problems
1331
+
1332
+
9841333
 def _classify_verification_kind(command: str) -> str:
9851334
     """Classify the verification command into a summary kind."""
9861335
 
src/loader/runtime/hooks.pymodified
@@ -2,6 +2,7 @@
22
 
33
 from __future__ import annotations
44
 
5
+import shlex
56
 from collections.abc import Iterable
67
 from dataclasses import dataclass, field
78
 from enum import StrEnum
@@ -11,10 +12,27 @@ from typing import Any, Protocol
1112
 from ..llm.base import ToolCall
1213
 from ..tools.base import Tool, ToolRegistry
1314
 from ..tools.base import ToolResult as RegistryToolResult
15
+from .dod import (
16
+    DefinitionOfDoneStore,
17
+    all_planned_artifacts_exist,
18
+    collect_missing_declared_html_output_files,
19
+    collect_planned_artifact_targets,
20
+    planned_artifact_target_satisfied,
21
+)
1422
 from .memory import MemoryStore
1523
 from .permissions import PermissionOverride, PermissionPolicy
24
+from .repair_focus import (
25
+    extract_active_repair_context,
26
+    normalize_repair_path,
27
+    path_matches_allowed_paths,
28
+    path_within_allowed_roots,
29
+)
1630
 from .rollback import RollbackPlan, create_rollback_plan_for_action, is_destructive_tool
17
-from .safeguard_services import ActionTracker, PreActionValidator
31
+from .safeguard_services import (
32
+    ActionTracker,
33
+    PreActionValidator,
34
+    extract_shell_text_rewrite_target,
35
+)
1836
 
1937
 
2038
 class HookEvent(StrEnum):
@@ -204,13 +222,21 @@ class RelativePathContextHook(BaseToolHook):
204222
 
205223
         arguments = context.tool_call.arguments
206224
         raw_path = str(arguments.get(argument_key, "")).strip()
207
-        if not raw_path or raw_path.startswith(("/", "~")):
225
+        if not raw_path:
208226
             return HookResult()
209227
 
210
-        resolved = self._resolve_recent_context_path(
211
-            raw_path,
212
-            require_existing=True,
213
-        )
228
+        require_existing = context.tool_call.name in {"read", "glob", "grep", "edit", "patch"}
229
+        resolved: str | None = None
230
+        if raw_path.startswith("/"):
231
+            resolved = self._resolve_workspace_mirror_path(
232
+                raw_path,
233
+                require_existing=require_existing,
234
+            )
235
+        elif not raw_path.startswith("~"):
236
+            resolved = self._resolve_recent_context_path(
237
+                raw_path,
238
+                require_existing=require_existing,
239
+            )
214240
         if resolved is None:
215241
             return HookResult()
216242
 
@@ -245,6 +271,551 @@ class RelativePathContextHook(BaseToolHook):
245271
                 return str(candidate)
246272
         return None
247273
 
274
+    def _resolve_workspace_mirror_path(
275
+        self,
276
+        raw_path: str,
277
+        *,
278
+        require_existing: bool,
279
+    ) -> str | None:
280
+        candidate = Path(raw_path).expanduser()
281
+        try:
282
+            resolved = candidate.resolve(strict=False)
283
+        except Exception:
284
+            resolved = candidate
285
+
286
+        try:
287
+            relative = resolved.relative_to(self.workspace_root)
288
+        except ValueError:
289
+            return None
290
+        if not relative.parts:
291
+            return None
292
+
293
+        anchor = relative.parts[0]
294
+        for base_dir in self.action_tracker.recent_path_contexts():
295
+            base_path = Path(base_dir).expanduser()
296
+            try:
297
+                resolved_base = base_path.resolve(strict=False)
298
+            except Exception:
299
+                resolved_base = base_path
300
+            if resolved_base == self.workspace_root:
301
+                continue
302
+            try:
303
+                resolved_base.relative_to(self.workspace_root)
304
+                continue
305
+            except ValueError:
306
+                pass
307
+
308
+            try:
309
+                anchor_index = resolved_base.parts.index(anchor)
310
+            except ValueError:
311
+                continue
312
+            if anchor_index <= 0:
313
+                continue
314
+
315
+            anchor_root = Path(*resolved_base.parts[: anchor_index + 1])
316
+            remapped = Path(*resolved_base.parts[:anchor_index]).joinpath(*relative.parts)
317
+            if remapped == resolved:
318
+                continue
319
+            if require_existing:
320
+                if remapped.exists():
321
+                    return str(remapped)
322
+                continue
323
+            if remapped.exists() or remapped.parent.exists() or anchor_root.exists():
324
+                return str(remapped)
325
+        return None
326
+
327
+
328
+_OBSERVATION_TOOLS = frozenset({"read", "glob", "grep", "bash"})
329
+_MUTATION_TOOLS = frozenset({"write", "edit", "patch", "bash"})
330
+_READ_ONLY_BASH_PREFIXES = frozenset(
331
+    {"ls", "pwd", "find", "stat", "cat", "head", "tail", "rg", "grep"}
332
+)
333
+_MUTATING_BASH_FRAGMENTS = (
334
+    " >",
335
+    ">>",
336
+    "| tee",
337
+    "touch ",
338
+    "mkdir ",
339
+    "rm ",
340
+    "mv ",
341
+    "cp ",
342
+    "sed -i",
343
+    "perl -pi",
344
+    "git add",
345
+    "git commit",
346
+    "git apply",
347
+)
348
+
349
+
350
+def _extract_observation_paths(tool_call: ToolCall) -> list[str]:
351
+    arguments = tool_call.arguments
352
+    if tool_call.name == "read":
353
+        file_path = str(arguments.get("file_path", "")).strip()
354
+        return [file_path] if file_path else []
355
+
356
+    if tool_call.name in {"glob", "grep"}:
357
+        candidates: list[str] = []
358
+        search_path = str(arguments.get("path", "")).strip()
359
+        if search_path:
360
+            anchored_path = _derive_search_anchor(search_path, str(arguments.get("pattern", "")).strip())
361
+            candidates.append(anchored_path or search_path)
362
+        pattern = str(arguments.get("pattern", "")).strip()
363
+        if not search_path and pattern.startswith(("/", "~")):
364
+            candidates.append(str(Path(pattern).expanduser().parent))
365
+        return candidates
366
+
367
+    command = str(arguments.get("command", "")).strip()
368
+    if not _is_read_only_bash(command):
369
+        return []
370
+    return _extract_bash_paths(command)
371
+
372
+
373
+def _is_read_only_bash(command: str) -> bool:
374
+    normalized = " ".join(command.split())
375
+    if not normalized:
376
+        return False
377
+    if extract_shell_text_rewrite_target(normalized) is not None:
378
+        return False
379
+    if any(fragment in normalized for fragment in _MUTATING_BASH_FRAGMENTS):
380
+        return False
381
+    try:
382
+        argv = shlex.split(normalized)
383
+    except ValueError:
384
+        return False
385
+    if not argv:
386
+        return False
387
+    return argv[0] in _READ_ONLY_BASH_PREFIXES
388
+
389
+
390
+def _extract_bash_paths(command: str) -> list[str]:
391
+    try:
392
+        argv = shlex.split(command)
393
+    except ValueError:
394
+        return []
395
+    observed: list[str] = []
396
+    for token in argv[1:]:
397
+        candidate = token.strip()
398
+        if not candidate or candidate.startswith("-"):
399
+            continue
400
+        if candidate.startswith(("/", "~")):
401
+            observed.append(candidate)
402
+    return observed
403
+
404
+
405
+def _derive_search_anchor(search_path: str, pattern: str) -> str:
406
+    normalized_search_path = str(search_path or "").strip()
407
+    normalized_pattern = str(pattern or "").strip()
408
+    if not normalized_search_path or not normalized_pattern:
409
+        return normalized_search_path
410
+
411
+    literal_segments: list[str] = []
412
+    for segment in normalized_pattern.split("/"):
413
+        cleaned = segment.strip()
414
+        if not cleaned or cleaned == ".":
415
+            continue
416
+        if any(token in cleaned for token in ("*", "?", "[")):
417
+            continue
418
+        literal_segments.append(cleaned)
419
+
420
+    if not literal_segments:
421
+        return normalized_search_path
422
+
423
+    if "." in literal_segments[-1]:
424
+        literal_segments = literal_segments[:-1]
425
+    if not literal_segments:
426
+        return normalized_search_path
427
+
428
+    try:
429
+        anchored = Path(normalized_search_path).expanduser().joinpath(*literal_segments)
430
+    except (OSError, RuntimeError, ValueError):
431
+        return normalized_search_path
432
+    return str(anchored)
433
+
434
+
435
+def _extract_mutation_paths(tool_call: ToolCall) -> list[str]:
436
+    arguments = tool_call.arguments
437
+    if tool_call.name in {"write", "edit", "patch"}:
438
+        file_path = str(arguments.get("file_path", "")).strip()
439
+        return [file_path] if file_path else []
440
+
441
+    if tool_call.name != "bash":
442
+        return []
443
+
444
+    command = str(arguments.get("command", "")).strip()
445
+    if not command or not _is_mutating_bash(command):
446
+        return []
447
+    target = extract_shell_text_rewrite_target(command)
448
+    return [target] if target else []
449
+
450
+
451
+def _is_mutating_bash(command: str) -> bool:
452
+    normalized = " ".join(command.split())
453
+    if not normalized:
454
+        return False
455
+    if extract_shell_text_rewrite_target(normalized) is not None:
456
+        return True
457
+    if any(fragment in normalized for fragment in _MUTATING_BASH_FRAGMENTS):
458
+        return True
459
+    try:
460
+        argv = shlex.split(normalized)
461
+    except ValueError:
462
+        return False
463
+    if not argv:
464
+        return False
465
+    return argv[0] in {"touch", "mkdir", "rm", "mv", "cp", "chmod", "chown"}
466
+
467
+
468
+def _repair_declared_output_paths(repair: Any, *, project_root: Path) -> set[str]:
469
+    declared_outputs: set[str] = set()
470
+    for root in getattr(repair, "allowed_roots", ()) or ():
471
+        normalized_root = normalize_repair_path(root)
472
+        if not normalized_root:
473
+            continue
474
+        for path in collect_missing_declared_html_output_files(
475
+            target=Path(normalized_root),
476
+            project_root=project_root,
477
+        ):
478
+            declared_outputs.add(normalize_repair_path(str(path)))
479
+    return declared_outputs
480
+
481
+
482
+class ActiveRepairScopeHook(BaseToolHook):
483
+    """Keep fix-mode observations anchored to the active artifact set."""
484
+
485
+    def __init__(
486
+        self,
487
+        *,
488
+        dod_store: DefinitionOfDoneStore,
489
+        project_root: Path,
490
+        session: Any,
491
+    ) -> None:
492
+        self.dod_store = dod_store
493
+        self.project_root = project_root
494
+        self.session = session
495
+
496
+    async def pre_tool_use(self, context: HookContext) -> HookResult:
497
+        if context.tool_call.name not in _OBSERVATION_TOOLS:
498
+            return HookResult()
499
+        if context.source == "verification":
500
+            return HookResult()
501
+
502
+        repair = self._active_repair_context()
503
+        if repair is None:
504
+            return HookResult()
505
+
506
+        observed_paths = _extract_observation_paths(context.tool_call)
507
+        if not observed_paths:
508
+            return HookResult()
509
+        declared_output_paths = _repair_declared_output_paths(
510
+            repair,
511
+            project_root=self.project_root,
512
+        )
513
+        if repair.allowed_paths:
514
+            if all(path_matches_allowed_paths(path, repair.allowed_paths) for path in observed_paths):
515
+                return HookResult()
516
+            if declared_output_paths and all(
517
+                normalize_repair_path(path) in declared_output_paths
518
+                for path in observed_paths
519
+            ):
520
+                return HookResult()
521
+            if context.tool_call.name in {"glob", "grep", "bash"} and repair.allowed_roots:
522
+                if all(path_within_allowed_roots(path, repair.allowed_roots) for path in observed_paths):
523
+                    return HookResult()
524
+
525
+            allowed_preview = ", ".join(f"`{path}`" for path in repair.allowed_paths[:3])
526
+            if len(repair.allowed_paths) > 3:
527
+                allowed_preview += ", ..."
528
+            declared_preview = ", ".join(
529
+                f"`{Path(path).name or path}`"
530
+                for path in sorted(declared_output_paths)[:3]
531
+            )
532
+            if len(declared_output_paths) > 3:
533
+                declared_preview += ", ..."
534
+            suggestion_suffix = (
535
+                f" Declared sibling outputs currently allowed inside this repair set include: {declared_preview}."
536
+                if declared_preview
537
+                else ""
538
+            )
539
+            return HookResult(
540
+                decision=HookDecision.DENY,
541
+                message=(
542
+                    "[Blocked - active repair scope: verification already identified "
543
+                    f"`{repair.artifact_path}` as the current repair target. "
544
+                    "Stay on the concrete repair files until that repair passes.] "
545
+                    "Suggestion: inspect or edit only "
546
+                    f"{allowed_preview} and do not reopen unrelated reference materials."
547
+                    f"{suggestion_suffix}"
548
+                ),
549
+                terminal_state="blocked",
550
+            )
551
+
552
+        if not repair.allowed_roots:
553
+            return HookResult()
554
+        if all(path_within_allowed_roots(path, repair.allowed_roots) for path in observed_paths):
555
+            return HookResult()
556
+
557
+        roots_preview = ", ".join(f"`{root}`" for root in repair.allowed_roots[:2])
558
+        if len(repair.allowed_roots) > 2:
559
+            roots_preview += ", ..."
560
+        return HookResult(
561
+            decision=HookDecision.DENY,
562
+            message=(
563
+                "[Blocked - active repair scope: verification already identified "
564
+                f"`{repair.artifact_path}` as the current repair target. "
565
+                "Stay inside the current artifact set until that repair passes.] "
566
+                "Suggestion: inspect or edit files under "
567
+                f"{roots_preview} and do not reopen unrelated reference materials."
568
+            ),
569
+            terminal_state="blocked",
570
+        )
571
+
572
+    def _active_repair_context(self):
573
+        dod_path = getattr(self.session, "active_dod_path", None)
574
+        if not dod_path:
575
+            return None
576
+        path = Path(str(dod_path))
577
+        if not path.exists():
578
+            return None
579
+        dod = self.dod_store.load(path)
580
+        if dod.status == "done":
581
+            return None
582
+        return extract_active_repair_context(getattr(self.session, "messages", []))
583
+
584
+
585
+class ActiveRepairMutationScopeHook(BaseToolHook):
586
+    """Keep repair-phase mutations pinned to the concrete repair targets."""
587
+
588
+    def __init__(
589
+        self,
590
+        *,
591
+        dod_store: DefinitionOfDoneStore,
592
+        project_root: Path,
593
+        session: Any,
594
+    ) -> None:
595
+        self.dod_store = dod_store
596
+        self.project_root = project_root
597
+        self.session = session
598
+
599
+    async def pre_tool_use(self, context: HookContext) -> HookResult:
600
+        if context.tool_call.name not in _MUTATION_TOOLS:
601
+            return HookResult()
602
+        if context.source == "verification":
603
+            return HookResult()
604
+
605
+        repair = self._active_repair_context()
606
+        if repair is None or not repair.allowed_paths:
607
+            return HookResult()
608
+        allowed_paths = {normalize_repair_path(path) for path in repair.allowed_paths}
609
+
610
+        mutation_paths = _extract_mutation_paths(context.tool_call)
611
+        if not mutation_paths:
612
+            if context.tool_call.name == "bash" and _is_mutating_bash(
613
+                str(context.tool_call.arguments.get("command", "")).strip()
614
+            ):
615
+                return HookResult(
616
+                    decision=HookDecision.DENY,
617
+                    message=(
618
+                        "[Blocked - active repair mutation scope: the current repair already "
619
+                        f"identifies `{repair.artifact_path}` as the concrete target.] "
620
+                        "Suggestion: use write/edit/patch directly on one of the active repair "
621
+                        "files instead of a broad shell mutation."
622
+                    ),
623
+                    terminal_state="blocked",
624
+                )
625
+            return HookResult()
626
+        normalized_mutation_paths = [
627
+            normalize_repair_path(path) for path in mutation_paths if str(path).strip()
628
+        ]
629
+        allowed_declared_outputs = _repair_declared_output_paths(
630
+            repair,
631
+            project_root=self.project_root,
632
+        )
633
+
634
+        if normalized_mutation_paths and all(
635
+            path in allowed_paths for path in normalized_mutation_paths
636
+        ):
637
+            return HookResult()
638
+        if normalized_mutation_paths and all(
639
+            path in allowed_paths or path in allowed_declared_outputs
640
+            for path in normalized_mutation_paths
641
+        ):
642
+            return HookResult()
643
+
644
+        allowed_preview = ", ".join(f"`{path}`" for path in repair.allowed_paths[:3])
645
+        if len(repair.allowed_paths) > 3:
646
+            allowed_preview += ", ..."
647
+        declared_preview = ", ".join(
648
+            f"`{Path(path).name or path}`"
649
+            for path in sorted(allowed_declared_outputs)[:3]
650
+        )
651
+        if len(allowed_declared_outputs) > 3:
652
+            declared_preview += ", ..."
653
+        suggestion_suffix = (
654
+            f" Declared sibling outputs currently allowed inside this repair set include: {declared_preview}."
655
+            if declared_preview
656
+            else ""
657
+        )
658
+        return HookResult(
659
+            decision=HookDecision.DENY,
660
+            message=(
661
+                "[Blocked - active repair mutation scope: verification already identified "
662
+                f"`{repair.artifact_path}` as the current repair target.] Suggestion: keep "
663
+                f"mutations on the active repair files only: {allowed_preview}."
664
+                f"{suggestion_suffix}"
665
+            ),
666
+            terminal_state="blocked",
667
+        )
668
+
669
+    def _active_repair_context(self):
670
+        dod_path = getattr(self.session, "active_dod_path", None)
671
+        if not dod_path:
672
+            return None
673
+        path = Path(str(dod_path))
674
+        if not path.exists():
675
+            return None
676
+        dod = self.dod_store.load(path)
677
+        if dod.status == "done":
678
+            return None
679
+        return extract_active_repair_context(getattr(self.session, "messages", []))
680
+
681
+class LateReferenceDriftHook(BaseToolHook):
682
+    """Block reopening old reference paths once planned artifacts are well underway."""
683
+
684
+    _MIN_COMPLETED_FILES = 3
685
+
686
+    def __init__(self, *, dod_store: DefinitionOfDoneStore, project_root: Path, session: Any) -> None:
687
+        self.dod_store = dod_store
688
+        self.project_root = project_root
689
+        self.session = session
690
+
691
+    async def pre_tool_use(self, context: HookContext) -> HookResult:
692
+        if context.tool_call.name not in _OBSERVATION_TOOLS:
693
+            return HookResult()
694
+
695
+        completed_scope = self._completed_artifact_scope()
696
+        if completed_scope is not None:
697
+            observed_paths = _extract_observation_paths(context.tool_call)
698
+            if not observed_paths:
699
+                return HookResult()
700
+            if all(path_within_allowed_roots(path, completed_scope) for path in observed_paths):
701
+                return HookResult()
702
+
703
+            roots_preview = ", ".join(f"`{root}`" for root in completed_scope[:2])
704
+            if len(completed_scope) > 2:
705
+                roots_preview += ", ..."
706
+            return HookResult(
707
+                decision=HookDecision.DENY,
708
+                message=(
709
+                    "[Blocked - completed artifact set scope: all explicitly planned artifacts "
710
+                    "already exist.] Suggestion: stay within the current output roots under "
711
+                    f"{roots_preview} and use those files as the source of truth instead of "
712
+                    "reopening earlier reference materials."
713
+                ),
714
+                terminal_state="blocked",
715
+            )
716
+
717
+        late_stage = self._late_stage_missing_artifact()
718
+        if late_stage is None:
719
+            return HookResult()
720
+        missing_artifact, planned_roots = late_stage
721
+        observed_paths = _extract_observation_paths(context.tool_call)
722
+        if not observed_paths:
723
+            return HookResult()
724
+        if all(path_within_allowed_roots(path, planned_roots) for path in observed_paths):
725
+            return HookResult()
726
+
727
+        roots_preview = ", ".join(f"`{root}`" for root in planned_roots[:2])
728
+        if len(planned_roots) > 2:
729
+            roots_preview += ", ..."
730
+        return HookResult(
731
+            decision=HookDecision.DENY,
732
+            message=(
733
+                "[Blocked - late reference drift: several planned artifacts already exist and "
734
+                f"`{missing_artifact}` is still missing.] Suggestion: finish the next missing "
735
+                f"artifact inside {roots_preview} before reopening earlier reference materials."
736
+            ),
737
+            terminal_state="blocked",
738
+        )
739
+
740
+    def _late_stage_missing_artifact(self) -> tuple[str, tuple[str, ...]] | None:
741
+        dod_path = getattr(self.session, "active_dod_path", None)
742
+        if not dod_path:
743
+            return None
744
+        path = Path(str(dod_path))
745
+        if not path.exists():
746
+            return None
747
+        dod = self.dod_store.load(path)
748
+        if dod.status == "done":
749
+            return None
750
+
751
+        planned_targets = collect_planned_artifact_targets(
752
+            dod,
753
+            project_root=self.project_root,
754
+        )
755
+        if not planned_targets:
756
+            return None
757
+
758
+        missing_label = ""
759
+        completed_files = 0
760
+        planned_roots: list[str] = []
761
+        seen_roots: set[str] = set()
762
+        for target, expect_directory in planned_targets:
763
+            satisfied = planned_artifact_target_satisfied(
764
+                dod,
765
+                target=target,
766
+                expect_directory=expect_directory,
767
+                project_root=self.project_root,
768
+            )
769
+            if not expect_directory:
770
+                if satisfied:
771
+                    completed_files += 1
772
+                elif not missing_label:
773
+                    missing_label = str(target)
774
+                root = str(target.parent)
775
+            else:
776
+                if not satisfied and not missing_label:
777
+                    missing_label = str(target)
778
+                root = str(target)
779
+            if root not in seen_roots:
780
+                planned_roots.append(root)
781
+                seen_roots.add(root)
782
+
783
+        if not missing_label:
784
+            return None
785
+        if completed_files < self._MIN_COMPLETED_FILES:
786
+            return None
787
+        return missing_label, tuple(planned_roots)
788
+
789
+    def _completed_artifact_scope(self) -> tuple[str, ...] | None:
790
+        dod_path = getattr(self.session, "active_dod_path", None)
791
+        if not dod_path:
792
+            return None
793
+        path = Path(str(dod_path))
794
+        if not path.exists():
795
+            return None
796
+        dod = self.dod_store.load(path)
797
+        if dod.status in {"done", "fixing"}:
798
+            return None
799
+
800
+        planned_targets = collect_planned_artifact_targets(
801
+            dod,
802
+            project_root=self.project_root,
803
+        )
804
+        if not planned_targets:
805
+            return None
806
+        if not all_planned_artifacts_exist(dod, project_root=self.project_root):
807
+            return None
808
+
809
+        planned_roots: list[str] = []
810
+        seen_roots: set[str] = set()
811
+        for target, expect_directory in planned_targets:
812
+            root = str(target if expect_directory else target.parent)
813
+            if root in seen_roots:
814
+                continue
815
+            seen_roots.add(root)
816
+            planned_roots.append(root)
817
+        return tuple(planned_roots)
818
+
248819
 
249820
 class HookManager:
250821
     """Runs tool hooks across Loader's three lifecycle events."""
@@ -437,6 +1008,7 @@ def build_default_tool_hooks(
4371008
     registry: ToolRegistry,
4381009
     rollback_plan: RollbackPlan | None,
4391010
     workspace_root: Path,
1011
+    session: Any,
4401012
 ) -> HookManager:
4411013
     """Build Loader's default tool hook stack for one runtime turn."""
4421014
 
@@ -445,6 +1017,21 @@ def build_default_tool_hooks(
4451017
             FilePathAliasHook(),
4461018
             SearchPathAliasHook(),
4471019
             RelativePathContextHook(action_tracker, workspace_root),
1020
+            ActiveRepairScopeHook(
1021
+                dod_store=DefinitionOfDoneStore(workspace_root),
1022
+                project_root=workspace_root,
1023
+                session=session,
1024
+            ),
1025
+            ActiveRepairMutationScopeHook(
1026
+                dod_store=DefinitionOfDoneStore(workspace_root),
1027
+                project_root=workspace_root,
1028
+                session=session,
1029
+            ),
1030
+            LateReferenceDriftHook(
1031
+                dod_store=DefinitionOfDoneStore(workspace_root),
1032
+                project_root=workspace_root,
1033
+                session=session,
1034
+            ),
4481035
             DuplicateActionHook(action_tracker),
4491036
             ActionValidationHook(validator),
4501037
             RollbackTrackingHook(registry, rollback_plan),
src/loader/runtime/repair.pymodified
@@ -2,11 +2,67 @@
22
 
33
 from __future__ import annotations
44
 
5
+import re
56
 from dataclasses import dataclass, field
7
+from pathlib import Path
68
 
79
 from ..llm.base import ToolCall
810
 from .context import RuntimeContext
11
+from .dod import (
12
+    DefinitionOfDone,
13
+    collect_planned_artifact_targets,
14
+    infer_next_declared_html_output_file,
15
+    planned_artifact_target_satisfied,
16
+)
917
 from .parsing import parse_tool_calls
18
+from .workflow import effective_pending_todo_items, reconcile_aggregate_completion_steps
19
+
20
+_SPECIAL_DOD_ITEMS = {
21
+    "Complete the requested work",
22
+    "Collect verification evidence",
23
+}
24
+_LATE_STAGE_EMPTY_RETRY_EXTRA = 2
25
+_WORKING_NOTE_TOOL_NAMES = (
26
+    "notepad_write_working",
27
+    "notepad_append",
28
+    "notepad_write_priority",
29
+    "notepad_write_manual",
30
+)
31
+_MUTATION_TODO_HINTS = (
32
+    "create",
33
+    "creating",
34
+    "update",
35
+    "updating",
36
+    "edit",
37
+    "editing",
38
+    "write",
39
+    "writing",
40
+    "fix",
41
+    "fixing",
42
+    "modify",
43
+    "modifying",
44
+    "change",
45
+    "changing",
46
+    "patch",
47
+    "patching",
48
+    "replace",
49
+    "replacing",
50
+    "correct",
51
+    "correcting",
52
+    "rewrite",
53
+    "rewriting",
54
+)
55
+_CONSISTENCY_REVIEW_HINTS = (
56
+    "consistent",
57
+    "consistently",
58
+    "formatted",
59
+    "link",
60
+    "linked",
61
+    "navigation",
62
+    "work properly",
63
+    "all files",
64
+    "every file",
65
+)
1066
 
1167
 
1268
 @dataclass(slots=True)
@@ -52,29 +108,37 @@ class ResponseRepairer:
52108
         original_task: str | None,
53109
         empty_retry_count: int,
54110
         max_empty_retries: int,
111
+        dod: DefinitionOfDone | None = None,
55112
     ) -> EmptyResponseDecision:
56113
         """Return the next action when the assistant responds with empty content."""
57114
 
58
-        _ = task, original_task, max_empty_retries
59
-        if empty_retry_count == 1:
115
+        _ = task, original_task
116
+        effective_max_empty_retries = self._effective_max_empty_retries(
117
+            dod,
118
+            base_max_empty_retries=max_empty_retries,
119
+        )
120
+        if empty_retry_count <= effective_max_empty_retries:
60121
             return EmptyResponseDecision(
61122
                 should_continue=True,
62123
                 reason_code="empty_response_retry",
63
-                reason_summary="retried after the assistant returned an empty response",
64
-                retry_message=(
65
-                    "[EMPTY ASSISTANT RESPONSE]\n"
66
-                    "Your last response was empty. Respond directly to the task "
67
-                    "or call tools if needed. Do not return an empty response."
124
+                reason_summary=(
125
+                    "retried after the assistant returned an empty response"
126
+                ),
127
+                retry_message=self._build_empty_response_retry_message(
128
+                    dod,
129
+                    retry_number=empty_retry_count,
130
+                    max_empty_retries=effective_max_empty_retries,
68131
                 ),
69132
             )
70133
 
71134
         return EmptyResponseDecision(
72135
             should_continue=False,
73136
             reason_code="empty_response_retry_exhausted",
74
-            reason_summary="stopped after the assistant returned empty responses twice",
137
+            reason_summary="stopped after the assistant returned empty responses repeatedly",
75138
             final_response=(
76
-                "I didn't get a usable response from the model after retrying once. "
77
-                "Please try again or switch to a different backend/model."
139
+                "I didn't get a usable response from the model after "
140
+                f"retrying {effective_max_empty_retries} times. Please try again or "
141
+                "switch to a different backend/model."
78142
             ),
79143
             failure="assistant returned empty output repeatedly",
80144
         )
@@ -167,3 +231,383 @@ class ResponseRepairer:
167231
             allowed_tool_names=allowed_tool_names,
168232
         )
169233
         return parsed.tool_calls
234
+
235
+    def _build_empty_response_retry_message(
236
+        self,
237
+        dod: DefinitionOfDone | None,
238
+        *,
239
+        retry_number: int,
240
+        max_empty_retries: int,
241
+    ) -> str:
242
+        progress_lines: list[str] = []
243
+        if dod is not None:
244
+            reconcile_aggregate_completion_steps(
245
+                dod,
246
+                project_root=self.context.project_root,
247
+            )
248
+            latest_working_note = self._latest_working_note()
249
+            if latest_working_note:
250
+                progress_lines.append(
251
+                    "Latest working note: " + latest_working_note
252
+                )
253
+
254
+            planned_lines = self._planned_artifact_progress_lines(dod)
255
+            progress_lines.extend(planned_lines)
256
+            progress_lines.extend(
257
+                self._next_step_resume_lines(
258
+                    dod,
259
+                    retry_number=retry_number,
260
+                )
261
+            )
262
+
263
+            touched = [
264
+                f"`{Path(path).name or path}`"
265
+                for path in dod.touched_files[-3:]
266
+                if str(path).strip()
267
+            ]
268
+            if touched:
269
+                progress_lines.append(
270
+                    "Confirmed touched files: " + ", ".join(touched)
271
+                )
272
+
273
+            completed = [
274
+                item
275
+                for item in dod.completed_items
276
+                if item not in _SPECIAL_DOD_ITEMS
277
+            ]
278
+            if completed:
279
+                progress_lines.append(
280
+                    "Confirmed completed work: " + "; ".join(completed[-2:])
281
+                )
282
+
283
+            next_pending = next(
284
+                (
285
+                    item
286
+                    for item in dod.pending_items
287
+                    if item not in _SPECIAL_DOD_ITEMS
288
+                ),
289
+                None,
290
+            )
291
+            if next_pending:
292
+                progress_lines.append(f"Next pending item: {next_pending}")
293
+            todo_refresh = self._todo_refresh_retry_line(dod)
294
+            if todo_refresh:
295
+                progress_lines.append(todo_refresh)
296
+
297
+        if not progress_lines:
298
+            return (
299
+                "[EMPTY ASSISTANT RESPONSE]\n"
300
+                f"Your last response was empty (retry {retry_number}/{max_empty_retries}). "
301
+                "Respond directly to the task "
302
+                "or call tools if needed. Do not return an empty response."
303
+            )
304
+
305
+        return "\n".join(
306
+            [
307
+                "[EMPTY ASSISTANT RESPONSE]",
308
+                (
309
+                    "Your last response was empty "
310
+                    f"(retry {retry_number}/{max_empty_retries}). Continue from the "
311
+                    "confirmed progress below instead of restarting."
312
+                ),
313
+                *[f"- {line}" for line in progress_lines],
314
+                "",
315
+                "Respond directly to the task or call tools if needed. Do not return an empty response.",
316
+            ]
317
+        )
318
+
319
+    def _todo_refresh_retry_line(self, dod: DefinitionOfDone) -> str | None:
320
+        non_special_pending = [
321
+            item for item in dod.pending_items if item not in _SPECIAL_DOD_ITEMS
322
+        ]
323
+        non_special_completed = [
324
+            item for item in dod.completed_items if item not in _SPECIAL_DOD_ITEMS
325
+        ]
326
+        if len(dod.touched_files) < 2 and (len(non_special_pending) + len(non_special_completed)) < 3:
327
+            return None
328
+        return (
329
+            "If the tracked steps are stale, refresh `TodoWrite` alongside the next "
330
+            "concrete mutation instead of spending a full turn on bookkeeping alone."
331
+        )
332
+
333
+    def _effective_max_empty_retries(
334
+        self,
335
+        dod: DefinitionOfDone | None,
336
+        *,
337
+        base_max_empty_retries: int,
338
+    ) -> int:
339
+        if dod is None:
340
+            return base_max_empty_retries
341
+        completed_artifacts, missing_artifacts = self._planned_artifact_counts(dod)
342
+        if completed_artifacts < 3 or missing_artifacts == 0:
343
+            return base_max_empty_retries
344
+        return base_max_empty_retries + _LATE_STAGE_EMPTY_RETRY_EXTRA
345
+
346
+    def _planned_artifact_counts(self, dod: DefinitionOfDone) -> tuple[int, int]:
347
+        completed = 0
348
+        missing = 0
349
+        for target, expect_directory in collect_planned_artifact_targets(
350
+            dod,
351
+            project_root=self.context.project_root,
352
+            max_paths=12,
353
+        ):
354
+            if planned_artifact_target_satisfied(
355
+                dod,
356
+                target=target,
357
+                expect_directory=expect_directory,
358
+                project_root=self.context.project_root,
359
+            ):
360
+                completed += 1
361
+            else:
362
+                missing += 1
363
+        return completed, missing
364
+
365
+    def _planned_artifact_progress_lines(self, dod: DefinitionOfDone) -> list[str]:
366
+        targets = collect_planned_artifact_targets(
367
+            dod,
368
+            project_root=self.context.project_root,
369
+            max_paths=12,
370
+        )
371
+        if not targets:
372
+            return []
373
+
374
+        missing_labels = [
375
+            self._format_artifact_label(target, expect_directory=expect_directory)
376
+            for target, expect_directory in targets
377
+            if not planned_artifact_target_satisfied(
378
+                dod,
379
+                target=target,
380
+                expect_directory=expect_directory,
381
+                project_root=self.context.project_root,
382
+            )
383
+        ]
384
+        if not missing_labels:
385
+            return []
386
+
387
+        lines = [f"Next missing planned artifact: {missing_labels[0]}"]
388
+        first_missing_target, first_missing_is_directory = next(
389
+            (
390
+                (target, expect_directory)
391
+                for target, expect_directory in targets
392
+                if not planned_artifact_target_satisfied(
393
+                    dod,
394
+                    target=target,
395
+                    expect_directory=expect_directory,
396
+                    project_root=self.context.project_root,
397
+                )
398
+            ),
399
+            (None, False),
400
+        )
401
+        if first_missing_target is not None and first_missing_is_directory:
402
+            next_output_file = infer_next_declared_html_output_file(
403
+                target=first_missing_target,
404
+                project_root=self.context.project_root,
405
+            )
406
+            if next_output_file is not None:
407
+                lines.append(
408
+                    "Next declared output under "
409
+                    f"{self._format_artifact_label(first_missing_target, expect_directory=True)}: "
410
+                    f"{self._format_artifact_label(next_output_file, expect_directory=False)}"
411
+                )
412
+        if len(missing_labels) > 1:
413
+            preview = ", ".join(missing_labels[:3])
414
+            if len(missing_labels) > 3:
415
+                preview += ", ..."
416
+            lines.append("Remaining planned artifacts: " + preview)
417
+        return lines
418
+
419
+    def _next_step_resume_lines(
420
+        self,
421
+        dod: DefinitionOfDone,
422
+        *,
423
+        retry_number: int,
424
+    ) -> list[str]:
425
+        completed_artifacts, _ = self._planned_artifact_counts(dod)
426
+        next_pending = next(
427
+            (
428
+                item
429
+                for item in effective_pending_todo_items(
430
+                    dod,
431
+                    project_root=self.context.project_root,
432
+                )
433
+                if item not in _SPECIAL_DOD_ITEMS
434
+            ),
435
+            None,
436
+        )
437
+        if (
438
+            completed_artifacts == 0
439
+            and next_pending
440
+            and not _todo_is_mutation_step(next_pending)
441
+            and not _todo_is_consistency_review_step(next_pending)
442
+        ):
443
+            lines = [f"Resume with this exact next step: advance `{next_pending}`."]
444
+            lines.append(
445
+                "Make the next response one concrete evidence-gathering tool call that "
446
+                "directly advances that step."
447
+            )
448
+            lines.append(
449
+                "Do not jump ahead to later artifact creation, verification, or a "
450
+                "completion summary until that discovery step is satisfied."
451
+            )
452
+            if retry_number >= 2:
453
+                lines.append(
454
+                    "Do not restart from scratch or return another working note; emit the "
455
+                    "next evidence-gathering tool call now."
456
+                )
457
+            else:
458
+                lines.append(
459
+                    "Do not restart from scratch unless one specific missing fact blocks "
460
+                    "that discovery step."
461
+                )
462
+            return lines
463
+
464
+        for target, expect_directory in collect_planned_artifact_targets(
465
+            dod,
466
+            project_root=self.context.project_root,
467
+            max_paths=12,
468
+        ):
469
+            if planned_artifact_target_satisfied(
470
+                dod,
471
+                target=target,
472
+                expect_directory=expect_directory,
473
+                project_root=self.context.project_root,
474
+            ):
475
+                continue
476
+            label = self._format_artifact_label(
477
+                target,
478
+                expect_directory=expect_directory,
479
+            )
480
+            if expect_directory:
481
+                next_output_file = infer_next_declared_html_output_file(
482
+                    target=target,
483
+                    project_root=self.context.project_root,
484
+                )
485
+                if next_output_file is not None:
486
+                    next_output_label = self._format_artifact_label(
487
+                        next_output_file,
488
+                        expect_directory=False,
489
+                    )
490
+                    if next_pending and _todo_is_mutation_step(next_pending):
491
+                        lines = [
492
+                            "Resume with this exact next step: continue "
493
+                            f"`{next_pending}` by creating {next_output_label}."
494
+                        ]
495
+                    else:
496
+                        lines = [
497
+                            "Resume with this exact next step: create "
498
+                            f"{next_output_label}."
499
+                        ]
500
+                    lines.append(
501
+                        f"It is the next missing declared output under {label}."
502
+                    )
503
+                    lines.append(
504
+                        f"Prefer one `write` call for `{next_output_file}` before more research."
505
+                    )
506
+                    if not next_output_file.parent.exists():
507
+                        lines.append(
508
+                            "The `write` tool can create that file's parent directories "
509
+                            "automatically, so do the write in one step instead of stopping "
510
+                            "for a separate mkdir."
511
+                        )
512
+                    if retry_number >= 2:
513
+                        lines.append(
514
+                            "Do not restart discovery; emit the next mutation tool call now."
515
+                        )
516
+                    else:
517
+                        lines.append(
518
+                            "Do not restart discovery unless one specific missing fact blocks this step."
519
+                        )
520
+                    return lines
521
+            if expect_directory and target.is_dir():
522
+                if next_pending and _todo_is_mutation_step(next_pending):
523
+                    lines = [
524
+                        "Resume with this exact next step: continue "
525
+                        f"`{next_pending}` by creating the next output file under {label}."
526
+                    ]
527
+                else:
528
+                    lines = [
529
+                        "Resume with this exact next step: create the next output file "
530
+                        f"under {label}."
531
+                    ]
532
+                lines.append(
533
+                    f"Prefer one concrete `write` call for a file inside `{target}` before more research."
534
+                )
535
+            else:
536
+                lines = [f"Resume with this exact next step: create {label}."]
537
+            if expect_directory and not target.is_dir():
538
+                lines.append(
539
+                    f"Prefer one concrete directory-creation step for `{target}` before more research."
540
+                )
541
+            elif not expect_directory:
542
+                lines.append(
543
+                    f"Prefer one `write` call for `{target}` before any more reference reads."
544
+                )
545
+                if not target.parent.exists():
546
+                    lines.append(
547
+                        "The `write` tool can create that file's parent directories "
548
+                        "automatically, so do the write in one step instead of stopping "
549
+                        "for a separate mkdir."
550
+                    )
551
+                lines.append(
552
+                    "Shape the next response as one concrete `write(file_path=..., "
553
+                    "content=...)` tool call for that exact path."
554
+                )
555
+            if completed_artifacts >= 3:
556
+                lines.append(
557
+                    "Follow the same one-file-at-a-time mutation pattern that already "
558
+                    "created the confirmed planned artifacts."
559
+                )
560
+            lines.append(
561
+                "Your next response should be the concrete mutation tool call itself, "
562
+                "not TodoWrite alone, verification, or a completion summary."
563
+            )
564
+            if retry_number >= 2:
565
+                lines.append(
566
+                    "Do not restart discovery; emit the next mutation tool call now."
567
+                )
568
+            else:
569
+                lines.append(
570
+                    "Do not restart discovery unless one specific missing fact blocks this step."
571
+                )
572
+            return lines
573
+        return []
574
+
575
+    @staticmethod
576
+    def _format_artifact_label(path: Path, *, expect_directory: bool) -> str:
577
+        label = path.name or str(path)
578
+        if expect_directory and not label.endswith("/"):
579
+            label += "/"
580
+        return f"`{label}`"
581
+
582
+    def _latest_working_note(self) -> str | None:
583
+        messages = list(getattr(self.context.session, "messages", []) or [])
584
+        for message in reversed(messages):
585
+            content = str(getattr(message, "content", "") or "").strip()
586
+            if not content:
587
+                continue
588
+            for tool_name in _WORKING_NOTE_TOOL_NAMES:
589
+                prefix = f"Observation [{tool_name}]: Result:"
590
+                if prefix not in content:
591
+                    continue
592
+                note = content.split(prefix, 1)[1].strip()
593
+                if not note:
594
+                    continue
595
+                first_line = next(
596
+                    (line.strip() for line in note.splitlines() if line.strip()),
597
+                    "",
598
+                )
599
+                if not first_line:
600
+                    continue
601
+                first_line = re.sub(r"^-\s*\[[^\]]+\]\s*", "", first_line).strip()
602
+                return first_line or None
603
+        return None
604
+
605
+
606
+def _todo_is_mutation_step(label: str) -> bool:
607
+    lowered = label.lower()
608
+    return any(token in lowered for token in _MUTATION_TODO_HINTS)
609
+
610
+
611
+def _todo_is_consistency_review_step(label: str) -> bool:
612
+    lowered = label.lower()
613
+    return any(token in lowered for token in _CONSISTENCY_REVIEW_HINTS)
src/loader/runtime/repair_focus.pyadded
@@ -0,0 +1,132 @@
1
+"""Shared helpers for extracting and enforcing active repair focus."""
2
+
3
+from __future__ import annotations
4
+
5
+import re
6
+from dataclasses import dataclass
7
+from os import sep
8
+from pathlib import Path
9
+
10
+from ..llm.base import Message
11
+
12
+
13
+@dataclass(frozen=True)
14
+class ActiveRepairContext:
15
+    """Concrete repair focus extracted from recent verification feedback."""
16
+
17
+    artifact_path: str
18
+    repair_lines: list[str]
19
+    allowed_paths: tuple[str, ...]
20
+    allowed_roots: tuple[str, ...]
21
+
22
+
23
+def extract_active_repair_context(
24
+    messages: list[Message],
25
+) -> ActiveRepairContext | None:
26
+    """Return the most recent concrete repair target from session history."""
27
+
28
+    for message in reversed(messages):
29
+        content = str(getattr(message, "content", "") or "")
30
+        if "Repair focus:" not in content:
31
+            continue
32
+
33
+        repair_lines: list[str] = []
34
+        artifact_path = ""
35
+        absolute_paths: list[str] = []
36
+        capture = False
37
+        for raw_line in content.splitlines():
38
+            line = raw_line.strip()
39
+            if not capture:
40
+                if line == "Repair focus:":
41
+                    capture = True
42
+                continue
43
+            if not line:
44
+                if repair_lines:
45
+                    break
46
+                continue
47
+            if not line.startswith("- "):
48
+                if repair_lines:
49
+                    break
50
+                continue
51
+
52
+            repair_lines.append(line)
53
+            if not artifact_path:
54
+                match = re.search(r"Immediate next step: edit `([^`]+)`", line)
55
+                if match:
56
+                    artifact_path = normalize_repair_path(match.group(1))
57
+
58
+            for candidate in re.findall(r"`([^`]+)`", line):
59
+                if not candidate.startswith(("/", "~")):
60
+                    continue
61
+                normalized = normalize_repair_path(candidate)
62
+                if normalized not in absolute_paths:
63
+                    absolute_paths.append(normalized)
64
+
65
+        if repair_lines:
66
+            if artifact_path:
67
+                if artifact_path not in absolute_paths:
68
+                    absolute_paths.insert(0, artifact_path)
69
+            allowed_paths = tuple(
70
+                sorted(
71
+                    absolute_paths,
72
+                    key=lambda item: (not Path(item).exists(), item),
73
+                )
74
+            )
75
+            allowed_roots = _collapse_roots(_path_roots(set(absolute_paths)))
76
+            return ActiveRepairContext(
77
+                artifact_path=artifact_path,
78
+                repair_lines=repair_lines,
79
+                allowed_paths=allowed_paths,
80
+                allowed_roots=allowed_roots,
81
+            )
82
+    return None
83
+
84
+
85
+def path_within_allowed_roots(path: str, allowed_roots: tuple[str, ...]) -> bool:
86
+    """Return whether the normalized path stays within the repair artifact set."""
87
+
88
+    normalized = normalize_repair_path(path)
89
+    normalized_roots = tuple(
90
+        normalize_repair_path(root) for root in allowed_roots if str(root).strip()
91
+    )
92
+    return any(
93
+        normalized == root or normalized.startswith(f"{root}{sep}")
94
+        for root in normalized_roots
95
+    )
96
+
97
+
98
+def path_matches_allowed_paths(path: str, allowed_paths: tuple[str, ...]) -> bool:
99
+    """Return whether the normalized path matches one concrete repair file."""
100
+
101
+    normalized = normalize_repair_path(path)
102
+    normalized_paths = {
103
+        normalize_repair_path(candidate) for candidate in allowed_paths if str(candidate).strip()
104
+    }
105
+    return normalized in normalized_paths
106
+
107
+
108
+def normalize_repair_path(raw_path: str) -> str:
109
+    text = str(raw_path or "").strip()
110
+    if not text:
111
+        return ""
112
+    try:
113
+        return str(Path(text).expanduser().resolve(strict=False))
114
+    except (OSError, RuntimeError, ValueError):
115
+        return str(Path(text).expanduser())
116
+
117
+
118
+def _path_roots(paths: set[str]) -> set[str]:
119
+    roots: set[str] = set()
120
+    for raw_path in paths:
121
+        path = Path(raw_path)
122
+        roots.add(str(path.parent))
123
+    return roots
124
+
125
+
126
+def _collapse_roots(roots: set[str]) -> tuple[str, ...]:
127
+    collapsed: list[str] = []
128
+    for root in sorted(roots, key=lambda item: (len(item), item)):
129
+        if any(root == candidate or root.startswith(f"{candidate}{sep}") for candidate in collapsed):
130
+            continue
131
+        collapsed.append(root)
132
+    return tuple(collapsed)
src/loader/runtime/safeguard_services.pymodified
@@ -8,8 +8,6 @@ from dataclasses import dataclass
88
 from difflib import get_close_matches
99
 from pathlib import Path
1010
 
11
-from .semantic_rules import html_toc as html_toc_rule
12
-
1311
 TEXT_REWRITE_SUFFIXES = frozenset(
1412
     {
1513
         ".c",
@@ -145,7 +143,6 @@ class ActionTracker:
145143
     READ_REPEAT_THRESHOLD = 3
146144
     SEARCH_REPEAT_THRESHOLD = 2
147145
     BASH_OBSERVATION_REPEAT_THRESHOLD = 2
148
-    HTML_CHAPTER_EVIDENCE_THRESHOLD = 3
149146
     RECENT_PATH_CONTEXT_LIMIT = 12
150147
 
151148
     def __init__(self) -> None:
@@ -160,10 +157,7 @@ class ActionTracker:
160157
         self._recent_reads: dict[str, tuple[int, int, int]] = {}
161158
         self._recent_searches: dict[str, tuple[int, int, int]] = {}
162159
         self._recent_bash_observations: dict[str, tuple[int, int, int]] = {}
163
-        self._recent_html_directory_reads: dict[str, tuple[int, set[str]]] = {}
164160
         self._recent_path_contexts: list[str] = []
165
-        self._validated_html_tocs: dict[str, int] = {}
166
-        self._verified_html_inventory_dirs: set[str] = set()
167161
 
168162
     def reset(self) -> None:
169163
         self._file_writes.clear()
@@ -177,10 +171,7 @@ class ActionTracker:
177171
         self._recent_reads.clear()
178172
         self._recent_searches.clear()
179173
         self._recent_bash_observations.clear()
180
-        self._recent_html_directory_reads.clear()
181174
         self._recent_path_contexts.clear()
182
-        self._validated_html_tocs.clear()
183
-        self._verified_html_inventory_dirs.clear()
184175
 
185176
     def _normalize_path(self, path: str) -> str:
186177
         expanded = Path(path).expanduser()
@@ -250,22 +241,6 @@ class ActionTracker:
250241
     def recent_path_contexts(self) -> list[str]:
251242
         return list(self._recent_path_contexts)
252243
 
253
-    def note_validated_html_toc(self, index_path: str) -> None:
254
-        """Record that one index currently satisfies the semantic chapter-link check."""
255
-
256
-        normalized = self._normalize_path(index_path)
257
-        if not html_toc_rule.is_html_toc_index_path(normalized):
258
-            return
259
-        self._validated_html_tocs[normalized] = self._mutation_epoch
260
-
261
-    def note_verified_html_inventory(self, index_path: str) -> None:
262
-        """Record that one sibling chapter inventory is already known exactly."""
263
-
264
-        normalized = self._normalize_path(index_path)
265
-        path = Path(normalized)
266
-        chapters_dir = path if html_toc_rule.is_html_toc_chapters_dir(path) else path.parent / "chapters"
267
-        self._verified_html_inventory_dirs.add(self._normalize_path(str(chapters_dir)))
268
-
269244
     def check_tool_call(self, tool_name: str, arguments: dict) -> tuple[bool, str]:
270245
         if tool_name == "write":
271246
             file_path = arguments.get("file_path", "")
@@ -291,28 +266,8 @@ class ActionTracker:
291266
                     return True, f"Same patch already applied to: {file_path}"
292267
 
293268
         elif tool_name == "read":
294
-            inventory_duplicate, inventory_reason = self._check_verified_html_inventory_observation(
295
-                tool_name,
296
-                arguments,
297
-            )
298
-            if inventory_duplicate:
299
-                return True, inventory_reason
300
-            validated_duplicate, validated_reason = self._check_validated_html_toc_observation(
301
-                tool_name,
302
-                arguments,
303
-            )
304
-            if validated_duplicate:
305
-                return True, validated_reason
306269
             read_key = self._make_read_key(arguments)
307270
             if read_key:
308
-                sufficiency_duplicate, sufficiency_reason = (
309
-                    self._check_html_observation_sufficiency(
310
-                        tool_name,
311
-                        arguments,
312
-                    )
313
-                )
314
-                if sufficiency_duplicate:
315
-                    return True, sufficiency_reason
316271
                 duplicate, reason = self._check_recent_observation(
317272
                     self._recent_reads,
318273
                     read_key,
@@ -328,28 +283,8 @@ class ActionTracker:
328283
                     return True, reason
329284
 
330285
         elif tool_name in {"glob", "grep"}:
331
-            inventory_duplicate, inventory_reason = self._check_verified_html_inventory_observation(
332
-                tool_name,
333
-                arguments,
334
-            )
335
-            if inventory_duplicate:
336
-                return True, inventory_reason
337
-            validated_duplicate, validated_reason = self._check_validated_html_toc_observation(
338
-                tool_name,
339
-                arguments,
340
-            )
341
-            if validated_duplicate:
342
-                return True, validated_reason
343286
             observation_key = self._make_search_key(tool_name, arguments)
344287
             if observation_key:
345
-                sufficiency_duplicate, sufficiency_reason = (
346
-                    self._check_html_observation_sufficiency(
347
-                        tool_name,
348
-                        arguments,
349
-                    )
350
-                )
351
-                if sufficiency_duplicate:
352
-                    return True, sufficiency_reason
353288
                 duplicate, reason = self._check_recent_observation(
354289
                     self._recent_searches,
355290
                     observation_key,
@@ -365,18 +300,6 @@ class ActionTracker:
365300
         elif tool_name == "bash":
366301
             command = str(arguments.get("command", "")).strip()
367302
             if self._is_observational_bash(command):
368
-                inventory_duplicate, inventory_reason = self._check_verified_html_inventory_observation(
369
-                    tool_name,
370
-                    arguments,
371
-                )
372
-                if inventory_duplicate:
373
-                    return True, inventory_reason
374
-                validated_duplicate, validated_reason = self._check_validated_html_toc_observation(
375
-                    tool_name,
376
-                    arguments,
377
-                )
378
-                if validated_duplicate:
379
-                    return True, validated_reason
380303
                 duplicate, reason = self._check_recent_observation(
381304
                     self._recent_bash_observations,
382305
                     self._normalize_command(command),
@@ -406,7 +329,6 @@ class ActionTracker:
406329
             if file_path:
407330
                 self.record_file_create(file_path, content)
408331
                 self._record_path_context(file_path)
409
-                self._clear_verified_html_inventory_for_path(file_path)
410332
                 self._note_mutation()
411333
 
412334
         elif tool_name == "edit":
@@ -416,7 +338,6 @@ class ActionTracker:
416338
             if file_path:
417339
                 self.record_edit(file_path, old_string, new_string)
418340
                 self._record_path_context(file_path)
419
-                self._clear_verified_html_inventory_for_path(file_path)
420341
                 self._note_mutation()
421342
 
422343
         elif tool_name == "patch":
@@ -429,7 +350,6 @@ class ActionTracker:
429350
                 elif isinstance(raw_patch, str) and raw_patch.strip():
430351
                     self.record_edit(file_path, raw_patch, "raw_patch")
431352
                 self._record_path_context(file_path)
432
-                self._clear_verified_html_inventory_for_path(file_path)
433353
                 self._note_mutation()
434354
 
435355
         elif tool_name == "read":
@@ -442,7 +362,6 @@ class ActionTracker:
442362
             file_path = str(arguments.get("file_path", "")).strip()
443363
             if file_path:
444364
                 self._record_path_context(file_path)
445
-            self._record_html_directory_read(arguments)
446365
 
447366
         elif tool_name in {"glob", "grep"}:
448367
             observation_key = self._make_search_key(tool_name, arguments)
@@ -460,9 +379,6 @@ class ActionTracker:
460379
             if command:
461380
                 self.record_command(command)
462381
                 if self._is_mutating_bash(command):
463
-                    target = extract_shell_text_rewrite_target(command)
464
-                    if target:
465
-                        self._clear_verified_html_inventory_for_path(target)
466382
                     self._note_mutation()
467383
                 elif self._is_observational_bash(command):
468384
                     self._record_observation(
@@ -678,230 +594,6 @@ class ActionTracker:
678594
         if len(self._recent_path_contexts) > self.RECENT_PATH_CONTEXT_LIMIT:
679595
             del self._recent_path_contexts[self.RECENT_PATH_CONTEXT_LIMIT :]
680596
 
681
-    def _record_html_directory_read(self, arguments: dict) -> None:
682
-        file_path = str(arguments.get("file_path", "")).strip()
683
-        if not file_path:
684
-            return
685
-        normalized_path = self._normalize_path(file_path)
686
-        path = Path(normalized_path)
687
-        if not html_toc_rule.is_html_toc_chapter_file(path):
688
-            return
689
-
690
-        directory = str(path.parent)
691
-        last_seen = self._recent_html_directory_reads.get(directory)
692
-        if last_seen is None or last_seen[0] != self._mutation_epoch:
693
-            self._recent_html_directory_reads[directory] = (
694
-                self._mutation_epoch,
695
-                {path.name},
696
-            )
697
-            return
698
-
699
-        _, seen_files = last_seen
700
-        updated = set(seen_files)
701
-        updated.add(path.name)
702
-        self._recent_html_directory_reads[directory] = (
703
-            self._mutation_epoch,
704
-            updated,
705
-        )
706
-
707
-    def _check_html_observation_sufficiency(
708
-        self,
709
-        tool_name: str,
710
-        arguments: dict,
711
-    ) -> tuple[bool, str]:
712
-        if tool_name == "read":
713
-            file_path = str(arguments.get("file_path", "")).strip()
714
-            if not file_path:
715
-                return False, ""
716
-            normalized_path = self._normalize_path(file_path)
717
-            path = Path(normalized_path)
718
-            if not html_toc_rule.is_html_toc_index_path(path):
719
-                return False, ""
720
-            chapters_dir = str(path.parent / "chapters")
721
-            chapter_count = self._chapter_evidence_count(chapters_dir)
722
-            if chapter_count < self.HTML_CHAPTER_EVIDENCE_THRESHOLD:
723
-                return False, ""
724
-            read_key = self._make_read_key(arguments)
725
-            if read_key is None:
726
-                return False, ""
727
-            last_seen = self._recent_reads.get(read_key)
728
-            if last_seen is None:
729
-                return False, ""
730
-            _, _, repeat_count = last_seen
731
-            if repeat_count < 2:
732
-                return False, ""
733
-            return (
734
-                True,
735
-                "Already confirmed multiple linked chapter files in "
736
-                f"{html_toc_rule.describe_html_toc_chapters_dir(path)}; reuse that file/title "
737
-                f"evidence and update {html_toc_rule.describe_html_toc_target(path)} instead of "
738
-                "rereading it",
739
-            )
740
-
741
-        if tool_name in {"glob", "grep"}:
742
-            search_path = str(arguments.get("path", "")).strip()
743
-            if not search_path:
744
-                return False, ""
745
-            normalized_path = self._normalize_path(search_path)
746
-            path = Path(normalized_path)
747
-            if not html_toc_rule.is_html_toc_chapters_dir(path):
748
-                return False, ""
749
-            chapter_count = self._chapter_evidence_count(str(path))
750
-            if chapter_count < self.HTML_CHAPTER_EVIDENCE_THRESHOLD:
751
-                return False, ""
752
-            observation_key = self._make_search_key(tool_name, arguments)
753
-            if observation_key is None or observation_key not in self._recent_searches:
754
-                return False, ""
755
-            return (
756
-                True,
757
-                "Already confirmed multiple linked chapter files in "
758
-                f"{html_toc_rule.describe_html_toc_chapters_dir(path)}; reuse that filename/title "
759
-                f"evidence and update {html_toc_rule.describe_html_toc_target(path)} instead of "
760
-                "rerunning the directory search",
761
-            )
762
-
763
-        return False, ""
764
-
765
-    def _chapter_evidence_count(self, directory: str) -> int:
766
-        last_seen = self._recent_html_directory_reads.get(directory)
767
-        if last_seen is None:
768
-            return 0
769
-        last_epoch, seen_files = last_seen
770
-        if last_epoch != self._mutation_epoch:
771
-            return 0
772
-        return len(seen_files)
773
-
774
-    def _check_validated_html_toc_observation(
775
-        self,
776
-        tool_name: str,
777
-        arguments: dict,
778
-    ) -> tuple[bool, str]:
779
-        related_paths = self._validated_html_related_paths(tool_name, arguments)
780
-        if not related_paths:
781
-            return False, ""
782
-
783
-        for path in related_paths:
784
-            if self._matches_validated_html_toc(path):
785
-                return (
786
-                    True,
787
-                    html_toc_rule.build_validated_html_toc_observation_reason(path),
788
-                )
789
-        return False, ""
790
-
791
-    def _check_verified_html_inventory_observation(
792
-        self,
793
-        tool_name: str,
794
-        arguments: dict,
795
-    ) -> tuple[bool, str]:
796
-        related_paths = self._verified_inventory_related_paths(tool_name, arguments)
797
-        if not related_paths:
798
-            return False, ""
799
-
800
-        for path in related_paths:
801
-            if self._matches_verified_html_inventory(path):
802
-                return (
803
-                    True,
804
-                    html_toc_rule.build_verified_html_inventory_observation_reason(path),
805
-                )
806
-        return False, ""
807
-
808
-    def _validated_html_related_paths(
809
-        self,
810
-        tool_name: str,
811
-        arguments: dict,
812
-    ) -> list[str]:
813
-        if tool_name == "read":
814
-            file_path = str(arguments.get("file_path", "")).strip()
815
-            return [self._normalize_path(file_path)] if file_path else []
816
-
817
-        if tool_name in {"glob", "grep"}:
818
-            search_path = str(arguments.get("path", "")).strip()
819
-            return [self._normalize_path(search_path)] if search_path else []
820
-
821
-        if tool_name == "bash":
822
-            command = str(arguments.get("command", "")).strip()
823
-            if not command:
824
-                return []
825
-            return self._extract_observational_bash_paths(command)
826
-
827
-        return []
828
-
829
-    def _verified_inventory_related_paths(
830
-        self,
831
-        tool_name: str,
832
-        arguments: dict,
833
-    ) -> list[str]:
834
-        if tool_name == "read":
835
-            file_path = str(arguments.get("file_path", "")).strip()
836
-            return [self._normalize_path(file_path)] if file_path else []
837
-
838
-        if tool_name in {"glob", "grep"}:
839
-            search_path = str(arguments.get("path", "")).strip()
840
-            return [self._normalize_path(search_path)] if search_path else []
841
-
842
-        if tool_name == "bash":
843
-            command = str(arguments.get("command", "")).strip()
844
-            if not command:
845
-                return []
846
-            return self._extract_observational_bash_paths(command)
847
-
848
-        return []
849
-
850
-    def _matches_validated_html_toc(self, path: str) -> bool:
851
-        normalized = self._normalize_path(path)
852
-        candidate = Path(normalized)
853
-        for index_path, epoch in self._validated_html_tocs.items():
854
-            if epoch != self._mutation_epoch:
855
-                continue
856
-            index = Path(index_path)
857
-            chapters = Path(self._normalize_path(str(index.parent / "chapters")))
858
-            if candidate == index or candidate == chapters:
859
-                return True
860
-            if candidate.parent == chapters:
861
-                return True
862
-        return False
863
-
864
-    def _matches_verified_html_inventory(self, path: str) -> bool:
865
-        normalized = self._normalize_path(path)
866
-        candidate = Path(normalized)
867
-        for directory in self._verified_html_inventory_dirs:
868
-            chapters = Path(directory)
869
-            if candidate == chapters or candidate.parent == chapters:
870
-                return True
871
-        return False
872
-
873
-    def _clear_verified_html_inventory_for_path(self, path_value: str) -> None:
874
-        normalized = self._normalize_path(path_value)
875
-        candidate = Path(normalized)
876
-        stale: set[str] = set()
877
-        for directory in self._verified_html_inventory_dirs:
878
-            chapters = Path(directory)
879
-            if candidate == chapters or candidate.parent == chapters:
880
-                stale.add(directory)
881
-        self._verified_html_inventory_dirs.difference_update(stale)
882
-
883
-    def _extract_observational_bash_paths(self, command: str) -> list[str]:
884
-        norm_cmd = self._normalize_command(command)
885
-        try:
886
-            argv = shlex.split(norm_cmd)
887
-        except ValueError:
888
-            return []
889
-        if not argv:
890
-            return []
891
-
892
-        paths: list[str] = []
893
-        for token in argv[1:]:
894
-            candidate = _strip_shell_token(token)
895
-            if not candidate or candidate.startswith("-"):
896
-                continue
897
-            if any(marker in candidate for marker in ("/", "~")) or Path(candidate).suffix == ".html":
898
-                paths.append(self._normalize_path(candidate))
899
-                continue
900
-            if candidate.rstrip("/").endswith("chapters"):
901
-                paths.append(self._normalize_path(candidate))
902
-        return paths
903
-
904
-
905597
 @dataclass
906598
 class ValidationResult:
907599
     """Result of pre-action validation."""
@@ -1023,6 +715,10 @@ class PreActionValidator:
1023715
         if not path_result.valid:
1024716
             return path_result
1025717
 
718
+        sibling_result = self._validate_numbered_sibling_conflict(str(file_path))
719
+        if not sibling_result.valid:
720
+            return sibling_result
721
+
1026722
         if content is None or (isinstance(content, str) and not content.strip()):
1027723
             return ValidationResult(
1028724
                 valid=True,
@@ -1040,6 +736,13 @@ class PreActionValidator:
1040736
                     severity="block",
1041737
                 )
1042738
 
739
+        html_declared_target_result = self._validate_html_declared_target_set(
740
+            str(file_path),
741
+            str(content),
742
+        )
743
+        if not html_declared_target_result.valid:
744
+            return html_declared_target_result
745
+
1043746
         return ValidationResult(valid=True)
1044747
 
1045748
     def _validate_edit(self, arguments: dict) -> ValidationResult:
@@ -1087,6 +790,13 @@ class PreActionValidator:
1087790
         if not html_index_result.valid:
1088791
             return html_index_result
1089792
 
793
+        html_declared_target_result = self._validate_html_declared_target_set(
794
+            str(file_path),
795
+            str(new_string),
796
+        )
797
+        if not html_declared_target_result.valid:
798
+            return html_declared_target_result
799
+
1090800
         return ValidationResult(valid=True)
1091801
 
1092802
     def _validate_patch(self, arguments: dict) -> ValidationResult:
@@ -1106,6 +816,10 @@ class PreActionValidator:
1106816
         if not path_result.valid:
1107817
             return path_result
1108818
 
819
+        sibling_result = self._validate_numbered_sibling_conflict(str(file_path))
820
+        if not sibling_result.valid:
821
+            return sibling_result
822
+
1109823
         has_hunks = isinstance(hunks, list) and bool(hunks)
1110824
         has_raw_patch = isinstance(raw_patch, str) and bool(raw_patch.strip())
1111825
         if not has_hunks and not has_raw_patch:
@@ -1118,6 +832,42 @@ class PreActionValidator:
1118832
 
1119833
         return ValidationResult(valid=True)
1120834
 
835
+    def _validate_numbered_sibling_conflict(self, file_path: str) -> ValidationResult:
836
+        path = Path(file_path).expanduser()
837
+        if path.exists() or not path.suffix or not path.parent.exists():
838
+            return ValidationResult(valid=True)
839
+
840
+        prefix_match = re.match(r"^(\d+)[-_]", path.name)
841
+        if prefix_match is None:
842
+            return ValidationResult(valid=True)
843
+
844
+        prefix = prefix_match.group(1)
845
+        siblings = sorted(
846
+            candidate
847
+            for candidate in path.parent.iterdir()
848
+            if (
849
+                candidate.is_file()
850
+                and candidate.suffix == path.suffix
851
+                and candidate.name != path.name
852
+                and re.match(rf"^{re.escape(prefix)}[-_]", candidate.name)
853
+            )
854
+        )
855
+        if not siblings:
856
+            return ValidationResult(valid=True)
857
+
858
+        preview = ", ".join(candidate.name for candidate in siblings[:3])
859
+        if len(siblings) > 3:
860
+            preview += ", ..."
861
+        return ValidationResult(
862
+            valid=False,
863
+            reason="New file conflicts with an existing numbered sibling",
864
+            suggestion=(
865
+                f"Reuse the confirmed numbered file in `{path.parent}` instead of "
866
+                f"creating an alternate filename for step {prefix}, for example: {preview}"
867
+            ),
868
+            severity="error",
869
+        )
870
+
1121871
     def _validate_read(self, arguments: dict) -> ValidationResult:
1122872
         file_path = arguments.get("file_path", "")
1123873
 
@@ -1129,7 +879,19 @@ class PreActionValidator:
1129879
                 severity="error",
1130880
             )
1131881
 
1132
-        return self._validate_path(file_path)
882
+        path_result = self._validate_path(file_path)
883
+        if not path_result.valid:
884
+            return path_result
885
+
886
+        sibling_result = self._validate_numbered_sibling_conflict(str(file_path))
887
+        if not sibling_result.valid:
888
+            return ValidationResult(
889
+                valid=False,
890
+                reason="Read target conflicts with an existing numbered sibling",
891
+                suggestion=sibling_result.suggestion,
892
+                severity="error",
893
+            )
894
+        return path_result
1133895
 
1134896
     def _validate_search(self, tool_name: str, arguments: dict) -> ValidationResult:
1135897
         pattern = arguments.get("pattern", "")
@@ -1150,7 +912,7 @@ class PreActionValidator:
1150912
         content: str,
1151913
     ) -> ValidationResult:
1152914
         normalized = Path(file_path).expanduser()
1153
-        if not html_toc_rule.is_html_toc_index_path(normalized) or "<a " not in content:
915
+        if normalized.suffix.lower() != ".html" or "<a " not in content:
1154916
             return ValidationResult(valid=True)
1155917
 
1156918
         link_pairs = re.findall(r'<a\s+href="([^"]+)">([^<]+)</a>', content)
@@ -1159,65 +921,147 @@ class PreActionValidator:
1159921
 
1160922
         root = normalized.parent
1161923
         missing: list[str] = []
1162
-        mismatched: list[str] = []
1163
-        for href, label in link_pairs:
924
+        for href, _label in link_pairs:
925
+            target_text = href.strip()
926
+            if not target_text or target_text.startswith(("#", "mailto:", "tel:", "javascript:")):
927
+                continue
928
+            if "://" in target_text:
929
+                continue
1164930
             target = (root / href).resolve(strict=False)
1165931
             if not target.exists():
1166932
                 if href not in missing:
1167933
                     missing.append(href)
1168
-                continue
1169
-
1170
-            title = html_toc_rule.read_html_title(target)
1171
-            if title and label.strip() != title:
1172
-                if href not in mismatched:
1173
-                    mismatched.append(href)
1174934
 
1175935
         if missing:
1176
-            suggestions = self._suggest_existing_html_targets(root, missing)
1177
-            preview_items = [
1178
-                html_toc_rule.format_html_inventory_entry(root, root / suggestion)
1179
-                for suggestion in suggestions
1180
-            ]
1181
-            if not preview_items:
1182
-                preview_items = missing
1183
-            preview = ", ".join(preview_items[:3])
1184
-            if len(preview_items) > 3:
936
+            preview = ", ".join(missing[:3])
937
+            if len(missing) > 3:
1185938
                 preview += ", ..."
1186939
             return ValidationResult(
1187940
                 valid=False,
1188
-                reason="Edited TOC references chapter files that do not exist",
1189
-                suggestion=(
1190
-                    f"Use only existing chapter href/title pairs from beside "
1191
-                    f"{html_toc_rule.describe_html_toc_target(normalized)}, for example: "
1192
-                    f"{preview}"
1193
-                ),
1194
-                severity="error",
1195
-            )
1196
-
1197
-        if mismatched:
1198
-            exact_entries = [
1199
-                html_toc_rule.format_html_inventory_entry(root, (root / href).resolve(strict=False))
1200
-                for href in mismatched
1201
-                if (root / href).resolve(strict=False).exists()
1202
-            ]
1203
-            if not exact_entries:
1204
-                exact_entries = mismatched
1205
-            preview = "; ".join(exact_entries[:2])
1206
-            if len(exact_entries) > 2:
1207
-                preview += "; ..."
1208
-            return ValidationResult(
1209
-                valid=False,
1210
-                reason="Edited TOC labels do not match the linked chapter titles",
941
+                reason="Edited HTML links point to files that do not exist",
1211942
                 suggestion=(
1212
-                    f"Copy the exact href/title pair from the linked HTML file for "
1213
-                    f"{html_toc_rule.describe_html_toc_target(normalized)}, for example: "
1214
-                    f"{preview}"
943
+                    "Use only existing local targets for href values and avoid "
944
+                    f"introducing missing links, for example fix: {preview}"
1215945
                 ),
1216946
                 severity="error",
1217947
             )
1218948
 
1219949
         return ValidationResult(valid=True)
1220950
 
951
+    def _validate_html_declared_target_set(
952
+        self,
953
+        file_path: str,
954
+        content: str,
955
+    ) -> ValidationResult:
956
+        normalized = Path(file_path).expanduser()
957
+        if normalized.suffix.lower() != ".html" or normalized.name.lower() == "index.html":
958
+            return ValidationResult(valid=True)
959
+
960
+        local_targets = self._collect_local_html_targets(normalized, content)
961
+        if not local_targets:
962
+            return ValidationResult(valid=True)
963
+
964
+        root = self._resolve_html_artifact_root(normalized)
965
+        existing_html_files = [
966
+            path
967
+            for path in root.rglob("*.html")
968
+            if path.is_file() and path != normalized
969
+        ]
970
+        if not existing_html_files:
971
+            return ValidationResult(valid=True)
972
+
973
+        declared_targets = self._collect_declared_html_targets(root, existing_html_files)
974
+        undeclared_missing: list[str] = []
975
+        for href, resolved in local_targets:
976
+            if resolved.exists():
977
+                continue
978
+            relative_target = self._relative_html_target(root, resolved)
979
+            if relative_target is None:
980
+                continue
981
+            if relative_target not in declared_targets and href not in undeclared_missing:
982
+                undeclared_missing.append(href)
983
+
984
+        if not undeclared_missing:
985
+            return ValidationResult(valid=True)
986
+
987
+        preview = ", ".join(undeclared_missing[:3])
988
+        if len(undeclared_missing) > 3:
989
+            preview += ", ..."
990
+        declared_preview = ", ".join(sorted(declared_targets)[:3])
991
+        suggestion = (
992
+            "Keep non-root HTML pages within the current declared local-link set and "
993
+            f"avoid introducing new missing sibling targets, for example fix: {preview}"
994
+        )
995
+        if declared_preview:
996
+            suggestion += f". Already-declared local targets include: {declared_preview}"
997
+        return ValidationResult(
998
+            valid=False,
999
+            reason="HTML page introduces new local targets outside the current declared artifact set",
1000
+            suggestion=suggestion,
1001
+            severity="error",
1002
+        )
1003
+
1004
+    def _collect_local_html_targets(
1005
+        self,
1006
+        file_path: Path,
1007
+        content: str,
1008
+    ) -> list[tuple[str, Path]]:
1009
+        pattern = re.compile(r'href\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE)
1010
+        targets: list[tuple[str, Path]] = []
1011
+        seen: set[str] = set()
1012
+        for href in pattern.findall(content):
1013
+            target_text = href.strip()
1014
+            if not self._is_local_html_link_target(target_text):
1015
+                continue
1016
+            resolved = (file_path.parent / target_text).resolve(strict=False)
1017
+            key = f"{target_text}::{resolved}"
1018
+            if key in seen:
1019
+                continue
1020
+            seen.add(key)
1021
+            targets.append((target_text, resolved))
1022
+        return targets
1023
+
1024
+    def _collect_declared_html_targets(
1025
+        self,
1026
+        root: Path,
1027
+        html_files: list[Path],
1028
+    ) -> set[str]:
1029
+        declared: set[str] = set()
1030
+        for html_file in html_files:
1031
+            try:
1032
+                text = html_file.read_text()
1033
+            except OSError:
1034
+                continue
1035
+            for _href, resolved in self._collect_local_html_targets(html_file, text):
1036
+                relative_target = self._relative_html_target(root, resolved)
1037
+                if relative_target is not None:
1038
+                    declared.add(relative_target)
1039
+        return declared
1040
+
1041
+    def _resolve_html_artifact_root(self, file_path: Path) -> Path:
1042
+        for candidate in [file_path.parent, *file_path.parents]:
1043
+            if (candidate / "index.html").exists():
1044
+                return candidate
1045
+        return file_path.parent
1046
+
1047
+    def _relative_html_target(self, root: Path, target: Path) -> str | None:
1048
+        try:
1049
+            return str(target.relative_to(root))
1050
+        except ValueError:
1051
+            return None
1052
+
1053
+    @staticmethod
1054
+    def _is_local_html_link_target(href: str) -> bool:
1055
+        target = href.strip()
1056
+        if not target:
1057
+            return False
1058
+        if target.startswith(("#", "mailto:", "tel:", "javascript:")):
1059
+            return False
1060
+        if "://" in target:
1061
+            return False
1062
+        normalized = target.split("#", 1)[0].split("?", 1)[0].strip().lower()
1063
+        return normalized.endswith(".html")
1064
+
12211065
     def _suggest_existing_html_targets(self, root: Path, missing: list[str]) -> list[str]:
12221066
         available_by_directory: dict[Path, list[str]] = {}
12231067
         suggestions: list[str] = []
src/loader/runtime/tool_batch_recovery.pymodified
@@ -6,6 +6,7 @@ import re
66
 from collections.abc import Awaitable, Callable
77
 from difflib import SequenceMatcher
88
 from pathlib import Path
9
+from typing import Any
910
 
1011
 from ..llm.base import Message, Role, ToolCall
1112
 from .compaction import (
@@ -17,7 +18,7 @@ from .context import RuntimeContext
1718
 from .events import AgentEvent
1819
 from .executor import ToolExecutionOutcome
1920
 from .recovery import RecoveryContext, format_failure_message, format_recovery_prompt
20
-from .semantic_rules import html_toc as html_toc_rule
21
+from .repair_focus import ActiveRepairContext, extract_active_repair_context
2122
 
2223
 EventSink = Callable[[AgentEvent], Awaitable[None]]
2324
 
@@ -59,7 +60,9 @@ class ToolBatchRecoveryController:
5960
                     type="error",
6061
                     content=(
6162
                         "Loop detected: already tried a similar command. "
62
-                        "Try a DIFFERENT approach (e.g., read a config file first)."
63
+                        "Try a different next step using the files and facts you already have "
64
+                        "(for example, make the specific edit, verify the current result, or "
65
+                        "inspect one concrete unresolved target)."
6366
                     ),
6467
                     tool_name=tool_call.name,
6568
                 )
@@ -129,21 +132,71 @@ class ToolBatchRecoveryController:
129132
 
130133
         session = self.context.session
131134
         current_task = getattr(session, "current_task", None)
132
-        focus_path = self._preferred_focus_path(
133
-            tool_call=tool_call,
134
-            current_task=current_task,
135
-        )
135
+        active_repair = self._active_repair_context()
136
+        effective_task = current_task
137
+        if active_repair is not None and active_repair.artifact_path:
138
+            effective_task = (
139
+                "Repair the current artifact using the failed verification evidence: "
140
+                f"{active_repair.artifact_path}"
141
+            )
142
+            focus_path = active_repair.artifact_path
143
+            preferred_next_step = (
144
+                f"Update `{active_repair.artifact_path}` to resolve the current "
145
+                "verification failures."
146
+            )
147
+        else:
148
+            focus_path = self._preferred_focus_path(
149
+                tool_call=tool_call,
150
+                current_task=current_task,
151
+            )
152
+            preferred_next_step = infer_preferred_next_step(
153
+                session.messages,
154
+                current_task=effective_task,
155
+                focus_path=focus_path or None,
156
+            )
136157
         confirmed_facts = summarize_confirmed_facts(session.messages)
137
-        preferred_next_step = infer_preferred_next_step(
138
-            session.messages,
139
-            current_task=current_task,
140
-            focus_path=focus_path or None,
141
-        )
142
-        actionable_known_state = bool(confirmed_facts and preferred_next_step)
143158
         lines = [prompt]
144
-        if confirmed_facts or preferred_next_step or current_task:
159
+        candidate_lines = self._file_not_found_candidate_lines(
160
+            tool_call,
161
+            outcome,
162
+            active_repair=active_repair,
163
+        )
164
+        actionable_known_state = bool(
165
+            active_repair or current_task or confirmed_facts or preferred_next_step or candidate_lines
166
+        )
167
+        if active_repair is not None:
168
+            lines.extend(["", "## ACTIVE REPAIR TARGET"])
169
+            lines.append(
170
+                "- Verification already failed on the current artifact set. "
171
+                "Stay on this repair until the broken local references are fixed."
172
+            )
173
+            lines.extend(active_repair.repair_lines)
174
+            drifted_path = self._canonicalize_path(
175
+                str(
176
+                    tool_call.arguments.get("file_path")
177
+                    or tool_call.arguments.get("path")
178
+                    or ""
179
+                ).strip()
180
+            )
181
+            if (
182
+                drifted_path
183
+                and active_repair.artifact_path
184
+                and drifted_path != active_repair.artifact_path
185
+            ):
186
+                lines.append(
187
+                    f"- The failed tool call drifted to `{drifted_path}`. "
188
+                    f"Return to `{active_repair.artifact_path}` instead of reopening "
189
+                    "the original discovery task."
190
+                )
191
+            lines.append(
192
+                "- Treat this repair as higher priority than the original discovery "
193
+                "prompt until verification passes."
194
+            )
195
+        if active_repair or confirmed_facts or preferred_next_step or current_task:
145196
             lines.extend(["", "## CONTINUE FROM KNOWN STATE"])
146
-            if current_task:
197
+            if active_repair is not None and active_repair.artifact_path:
198
+                lines.append(f"- Active repair target: `{active_repair.artifact_path}`")
199
+            elif current_task:
147200
                 lines.append(f"- Current task: {current_task}")
148201
             if confirmed_facts:
149202
                 lines.append(f"- Confirmed facts: {confirmed_facts}")
@@ -153,18 +206,28 @@ class ToolBatchRecoveryController:
153206
                 "- Preserve progress: do not restart by rereading already-confirmed files "
154207
                 "unless you need genuinely new evidence."
155208
             )
209
+            if active_repair is not None:
210
+                lines.append(
211
+                    "- Do not go back to the original reference guide or invent alternate "
212
+                    "paths while this repair target is unresolved."
213
+                )
156214
             if actionable_known_state:
215
+                target_line = (
216
+                    f"- Prefer edit/write/patch on `{active_repair.artifact_path}` over "
217
+                    "rereading the same files."
218
+                    if active_repair is not None and active_repair.artifact_path
219
+                    else "- Prefer edit/write/patch on the target file over rereading the same files."
220
+                )
157221
                 lines.extend(
158222
                     [
159223
                         "",
160224
                         "## ACTION BIAS FOR THIS RECOVERY",
161225
                         "- The confirmed findings above are already enough to keep moving.",
162
-                        "- Prefer edit/write/patch on the target file over rereading the same files.",
226
+                        target_line,
163227
                         "- Only inspect one more file if a specific filename, href, or title is still unknown.",
164228
                         "- Treat the preferred next step as the default path forward.",
165229
                     ]
166230
                 )
167
-        candidate_lines = self._file_not_found_candidate_lines(tool_call, outcome)
168231
         if candidate_lines:
169232
             lines.extend(["", "## LIKELY FILE CANDIDATES", *candidate_lines])
170233
         target_excerpt_lines = self._target_excerpt_lines(tool_call)
@@ -229,6 +292,8 @@ class ToolBatchRecoveryController:
229292
         self,
230293
         tool_call: ToolCall,
231294
         outcome: ToolExecutionOutcome,
295
+        *,
296
+        active_repair: ActiveRepairContext | None = None,
232297
     ) -> list[str]:
233298
         if tool_call.name not in {"read", "write", "edit", "patch"}:
234299
             return []
@@ -247,14 +312,26 @@ class ToolBatchRecoveryController:
247312
 
248313
         candidates = self._rank_known_file_candidates(missing_path)
249314
         if not candidates:
315
+            if active_repair is not None and active_repair.artifact_path:
316
+                return [
317
+                    f"- Requested file does not exist: `{missing_path}`",
318
+                    f"- Active repair target is `{active_repair.artifact_path}`.",
319
+                    "- Repair the known target instead of inventing a new path.",
320
+                ]
250321
             return []
251322
 
252323
         names = ", ".join(self._describe_candidate(candidate) for candidate in candidates[:3])
253
-        return [
324
+        lines = [
254325
             f"- Requested file does not exist: `{missing_path}`",
255326
             f"- Closest known files in the same directory: {names}",
256327
             "- Prefer one of those exact filenames instead of retrying the missing path.",
257328
         ]
329
+        if active_repair is not None and active_repair.artifact_path:
330
+            lines.append(
331
+                f"- Keep the repair centered on `{active_repair.artifact_path}` rather than "
332
+                "switching back to broad discovery."
333
+            )
334
+        return lines
258335
 
259336
     def _rank_known_file_candidates(self, missing_path: str) -> list[str]:
260337
         missing_parent = str(Path(missing_path).parent)
@@ -316,51 +393,261 @@ class ToolBatchRecoveryController:
316393
 
317394
     def _describe_candidate(self, candidate: str) -> str:
318395
         path = Path(candidate)
319
-        label = f"`{path.name}`"
320
-        if path.suffix == ".html":
321
-            title = html_toc_rule.read_html_title(path)
322
-            if title:
323
-                return f"{label} = {title}"
324
-        return label
396
+        return f"`{path.name}`"
325397
 
326398
     def _target_excerpt_lines(self, tool_call: ToolCall) -> list[str]:
327
-        file_path = str(
399
+        if tool_call.name not in {"edit", "patch"}:
400
+            return []
401
+
402
+        raw_path = str(
328403
             tool_call.arguments.get("file_path")
329404
             or tool_call.arguments.get("path")
330405
             or ""
331406
         ).strip()
332
-        if not file_path:
407
+        target_path = self._canonicalize_path(raw_path)
408
+        if not target_path:
333409
             return []
334
-        current_task = getattr(self.context.session, "current_task", None)
335
-        if not html_toc_rule.task_targets_html_toc(current_task):
410
+
411
+        path = Path(target_path)
412
+        if not path.is_file():
336413
             return []
337414
 
338
-        inventory = html_toc_rule.summarize_html_inventory(file_path, limit=12)
339
-        excerpt = html_toc_rule.extract_html_toc_excerpt(file_path)
340
-        if not inventory and not excerpt:
415
+        try:
416
+            content = path.read_text()
417
+        except Exception:
341418
             return []
342419
 
343
-        lines: list[str] = []
344
-        if inventory:
345
-            lines.append(f"- Verified chapter inventory: {inventory}")
346
-        if excerpt:
347
-            lines.append("- Current TOC block:")
348
-            lines.extend(f"  {line}" for line in excerpt.splitlines())
349
-        replacement = html_toc_rule.build_html_toc_replacement_block(file_path)
350
-        if replacement:
351
-            lines.append("- Suggested replacement block:")
352
-            lines.extend(f"  {line}" for line in replacement.splitlines())
353
-        if excerpt and replacement:
354
-            lines.append("- Exact edit guidance:")
355
-            lines.append(f"  file_path: {file_path}")
356
-            lines.append("  old_string: use the Current TOC block above exactly")
357
-            lines.append("  new_string: use the Suggested replacement block above exactly")
358
-            lines.append("  Do not rewrite the whole file.")
359
-        edit_template = html_toc_rule.build_html_toc_edit_call_template(file_path)
360
-        if edit_template:
361
-            lines.append("- Suggested edit call:")
362
-            lines.extend(f"  {line}" for line in edit_template.splitlines())
363
-        return lines
420
+        file_lines = content.splitlines()
421
+        if not file_lines:
422
+            return [
423
+                f"- Target file: `{target_path}`",
424
+                "- The file is currently empty.",
425
+                "- Use the exact on-disk state above when preparing the next mutation.",
426
+            ]
427
+
428
+        start, end, label = self._excerpt_window_for_tool_call(
429
+            file_lines=file_lines,
430
+            content=content,
431
+            tool_call=tool_call,
432
+        )
433
+        excerpt = self._format_excerpt_lines(file_lines, start, end)
434
+        if not excerpt:
435
+            return []
436
+
437
+        return [
438
+            f"- Target file: `{target_path}`",
439
+            f"- {label}",
440
+            *excerpt,
441
+            "- Use the exact on-disk text above when preparing the next mutation.",
442
+            "- If several adjacent lines are wrong, replace the containing block in one edit instead of retrying a smaller substitution.",
443
+        ]
444
+
445
+    def _excerpt_window_for_tool_call(
446
+        self,
447
+        *,
448
+        file_lines: list[str],
449
+        content: str,
450
+        tool_call: ToolCall,
451
+    ) -> tuple[int, int, str]:
452
+        if tool_call.name == "edit":
453
+            window = self._edit_excerpt_window(
454
+                file_lines=file_lines,
455
+                content=content,
456
+                arguments=tool_call.arguments,
457
+            )
458
+            if window is not None:
459
+                return window
460
+        if tool_call.name == "patch":
461
+            window = self._patch_excerpt_window(
462
+                file_lines=file_lines,
463
+                arguments=tool_call.arguments,
464
+            )
465
+            if window is not None:
466
+                return window
467
+        return self._bounded_window(
468
+            file_lines=file_lines,
469
+            start=0,
470
+            length=min(10, len(file_lines)),
471
+            label="Current file contents:",
472
+        )
473
+
474
+    def _edit_excerpt_window(
475
+        self,
476
+        *,
477
+        file_lines: list[str],
478
+        content: str,
479
+        arguments: dict[str, Any],
480
+    ) -> tuple[int, int, str] | None:
481
+        old_string = str(arguments.get("old_string") or "")
482
+        new_string = str(arguments.get("new_string") or "")
483
+
484
+        if old_string:
485
+            exact_window = self._exact_string_window(
486
+                content=content,
487
+                file_lines=file_lines,
488
+                needle=old_string,
489
+                label="Current file contents for the requested edit:",
490
+            )
491
+            if exact_window is not None:
492
+                return exact_window
493
+
494
+        anchor = old_string or new_string
495
+        approximate_window = self._approximate_string_window(
496
+            file_lines=file_lines,
497
+            needle=anchor,
498
+            label="Closest on-disk block to the requested edit:",
499
+        )
500
+        if approximate_window is not None:
501
+            return approximate_window
502
+        return None
503
+
504
+    def _patch_excerpt_window(
505
+        self,
506
+        *,
507
+        file_lines: list[str],
508
+        arguments: dict[str, Any],
509
+    ) -> tuple[int, int, str] | None:
510
+        hunks = arguments.get("hunks")
511
+        if not isinstance(hunks, list) or not hunks:
512
+            return None
513
+
514
+        first_hunk = hunks[0]
515
+        if not isinstance(first_hunk, dict):
516
+            return None
517
+
518
+        anchor_lines: list[str] = []
519
+        raw_lines = first_hunk.get("lines")
520
+        if isinstance(raw_lines, list):
521
+            for raw_line in raw_lines:
522
+                if not isinstance(raw_line, str) or not raw_line:
523
+                    continue
524
+                if raw_line[0] in {" ", "-"}:
525
+                    anchor_lines.append(raw_line[1:])
526
+
527
+        anchor = "\n".join(anchor_lines).strip()
528
+        approximate_window = self._approximate_string_window(
529
+            file_lines=file_lines,
530
+            needle=anchor,
531
+            label="Closest on-disk block to the requested patch:",
532
+        )
533
+        if approximate_window is not None:
534
+            return approximate_window
535
+
536
+        old_start = first_hunk.get("old_start", 1)
537
+        old_lines = first_hunk.get("old_lines", len(anchor_lines) or 1)
538
+        try:
539
+            start = max(0, int(old_start) - 1)
540
+        except (TypeError, ValueError):
541
+            start = 0
542
+        try:
543
+            length = max(1, int(old_lines))
544
+        except (TypeError, ValueError):
545
+            length = max(1, len(anchor_lines) or 1)
546
+        return self._bounded_window(
547
+            file_lines=file_lines,
548
+            start=start,
549
+            length=length,
550
+            label="Current file contents near the requested patch location:",
551
+        )
552
+
553
+    def _exact_string_window(
554
+        self,
555
+        *,
556
+        content: str,
557
+        file_lines: list[str],
558
+        needle: str,
559
+        label: str,
560
+    ) -> tuple[int, int, str] | None:
561
+        if not needle:
562
+            return None
563
+        index = content.find(needle)
564
+        if index == -1:
565
+            return None
566
+        start_line = content[:index].count("\n")
567
+        block_length = max(1, len(needle.splitlines()))
568
+        return self._bounded_window(
569
+            file_lines=file_lines,
570
+            start=start_line,
571
+            length=block_length,
572
+            label=label,
573
+        )
574
+
575
+    def _approximate_string_window(
576
+        self,
577
+        *,
578
+        file_lines: list[str],
579
+        needle: str,
580
+        label: str,
581
+    ) -> tuple[int, int, str] | None:
582
+        normalized_needle = self._normalize_match_text(needle)
583
+        if not normalized_needle:
584
+            return None
585
+
586
+        needle_lines = [line for line in needle.splitlines() if line.strip()]
587
+        if not needle_lines:
588
+            needle_lines = [needle.strip()]
589
+
590
+        min_window = 1
591
+        max_window = min(len(file_lines), max(1, len(needle_lines) + 2))
592
+        best_score = 0.0
593
+        best_start = 0
594
+        best_length = min(max_window, max(1, len(needle_lines)))
595
+        for window_length in range(min_window, max_window + 1):
596
+            for start in range(0, len(file_lines) - window_length + 1):
597
+                candidate = "\n".join(file_lines[start : start + window_length])
598
+                score = SequenceMatcher(
599
+                    None,
600
+                    normalized_needle,
601
+                    self._normalize_match_text(candidate),
602
+                ).ratio()
603
+                if score > best_score:
604
+                    best_score = score
605
+                    best_start = start
606
+                    best_length = window_length
607
+
608
+        if best_score < 0.25:
609
+            return None
610
+
611
+        return self._bounded_window(
612
+            file_lines=file_lines,
613
+            start=best_start,
614
+            length=best_length,
615
+            label=label,
616
+        )
617
+
618
+    def _bounded_window(
619
+        self,
620
+        *,
621
+        file_lines: list[str],
622
+        start: int,
623
+        length: int,
624
+        label: str,
625
+    ) -> tuple[int, int, str]:
626
+        context_before = 2
627
+        context_after = 2
628
+        start_index = max(0, start - context_before)
629
+        end_index = min(len(file_lines), start + max(1, length) + context_after)
630
+        return start_index, end_index, label
631
+
632
+    def _format_excerpt_lines(
633
+        self,
634
+        file_lines: list[str],
635
+        start: int,
636
+        end: int,
637
+    ) -> list[str]:
638
+        if start >= end:
639
+            return []
640
+        width = len(str(end))
641
+        return [
642
+            f"  {line_number:>{width}} | {file_lines[line_number - 1]}"
643
+            for line_number in range(start + 1, end + 1)
644
+        ]
645
+
646
+    def _normalize_match_text(self, text: str) -> str:
647
+        return " ".join(str(text or "").split())
648
+
649
+    def _active_repair_context(self) -> ActiveRepairContext | None:
650
+        return extract_active_repair_context(self.context.session.messages)
364651
 
365652
     def _canonicalize_path(self, raw_path: str) -> str:
366653
         if not raw_path:
src/loader/runtime/tool_batches.pymodified
1089 lines changed — click to load
@@ -7,16 +7,20 @@ from dataclasses import dataclass, field
77
 from pathlib import Path
88
 from typing import Any
99
 
10
-from ..llm.base import Role, ToolCall
10
+from ..llm.base import ToolCall
1111
 from .compaction import infer_preferred_next_step, summarize_confirmed_facts
1212
 from .context import RuntimeContext
1313
 from .dod import (
1414
     DefinitionOfDone,
1515
     DefinitionOfDoneStore,
16
+    all_planned_artifacts_exist,
1617
     begin_new_verification_attempt,
18
+    collect_planned_artifact_targets,
1719
     derive_verification_commands,
1820
     ensure_active_verification_attempt,
21
+    infer_next_declared_html_output_file,
1922
     is_state_mutating_tool_call,
23
+    planned_artifact_target_satisfied,
2024
     record_successful_tool_call,
2125
     synthesize_todo_items,
2226
 )
@@ -25,15 +29,20 @@ from .evidence_provenance import EvidenceProvenance, EvidenceProvenanceStatus
2529
 from .executor import ToolExecutionState, ToolExecutor
2630
 from .logging import get_runtime_logger
2731
 from .policy_timeline import append_verification_timeline_entry
32
+from .repair_focus import extract_active_repair_context
2833
 from .safeguard_services import extract_shell_text_rewrite_target
29
-from .semantic_rules import html_toc as html_toc_rule
3034
 from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate
3135
 from .tool_batch_recovery import ToolBatchRecoveryController
3236
 from .verification_observations import (
3337
     VerificationObservation,
3438
     VerificationObservationStatus,
3539
 )
36
-from .workflow import advance_todos_from_tool_call, sync_todos_to_definition_of_done
40
+from .workflow import (
41
+    advance_todos_from_tool_call,
42
+    effective_pending_todo_items,
43
+    reconcile_aggregate_completion_steps,
44
+    sync_todos_to_definition_of_done,
45
+)
3746
 
3847
 EventSink = Callable[[AgentEvent], Awaitable[None]]
3948
 ConfirmationHandler = (
@@ -48,17 +57,46 @@ _TODO_NUDGE_EXCLUDED_ITEMS = {
4857
 }
4958
 _MUTATION_TODO_HINTS = (
5059
     "create",
60
+    "creating",
5161
     "update",
62
+    "updating",
5263
     "edit",
64
+    "editing",
5365
     "write",
66
+    "writing",
5467
     "fix",
68
+    "fixing",
5569
     "modify",
70
+    "modifying",
5671
     "change",
72
+    "changing",
5773
     "patch",
74
+    "patching",
5875
     "replace",
76
+    "replacing",
5977
     "correct",
78
+    "correcting",
6079
     "rewrite",
80
+    "rewriting",
81
+)
82
+_CONSISTENCY_REVIEW_HINTS = (
83
+    "consistent",
84
+    "consistently",
85
+    "formatted",
86
+    "link",
87
+    "linked",
88
+    "navigation",
89
+    "work properly",
90
+    "all files",
91
+    "every file",
92
+    "ensure",
6193
 )
94
+_BOOKKEEPING_NOTE_TOOL_NAMES = {
95
+    "notepad_write_working",
96
+    "notepad_append",
97
+    "notepad_write_priority",
98
+    "notepad_write_manual",
99
+}
62100
 
63101
 
64102
 @dataclass
@@ -88,7 +126,6 @@ class ToolBatchRunner:
88126
         self.confidence_gate = confidence_gate or ToolBatchConfidenceGate(context)
89127
         self.recovery_controller = recovery_controller or ToolBatchRecoveryController(context)
90128
         self.verification_gate = verification_gate or ToolBatchVerificationGate(context)
91
-        self._inventory_hint_targets: set[str] = set()
92129
 
93130
     async def execute_batch(
94131
         self,
@@ -205,10 +242,6 @@ class ToolBatchRunner:
205242
                 if label:
206243
                     completed_labels.append(label)
207244
                 await _emit_batch_todos()
208
-                self._annotate_verified_html_inventory(executed_tool_call, outcome)
209
-                self._queue_verified_html_inventory_nudge(executed_tool_call)
210
-                self._annotate_validated_html_toc_completion(executed_tool_call, outcome)
211
-                self._queue_validated_html_toc_completion_nudge(executed_tool_call)
212245
                 if loop_response is not None:
213246
                     result.halted = True
214247
                     result.final_response = loop_response
@@ -244,6 +277,16 @@ class ToolBatchRunner:
244277
             if outcome.state == ToolExecutionState.DUPLICATE:
245278
                 self._queue_duplicate_observation_nudge(tool_call, dod=dod)
246279
             elif outcome.state == ToolExecutionState.BLOCKED:
280
+                self._queue_blocked_active_repair_nudge(outcome.event_content)
281
+                self._queue_blocked_active_repair_mutation_nudge(outcome.event_content)
282
+                self._queue_blocked_completed_artifact_scope_nudge(
283
+                    outcome.event_content,
284
+                    dod=dod,
285
+                )
286
+                self._queue_blocked_late_reference_drift_nudge(
287
+                    outcome.event_content,
288
+                    dod=dod,
289
+                )
247290
                 self._queue_blocked_shell_rewrite_nudge(tool_call)
248291
                 self._queue_blocked_html_edit_nudge(tool_call, outcome.event_content)
249292
 
@@ -290,10 +333,17 @@ class ToolBatchRunner:
290333
             return
291334
 
292335
         current_task = getattr(self.context.session, "current_task", None)
336
+        missing_artifact = _next_missing_planned_artifact(
337
+            dod,
338
+            project_root=self.context.project_root,
339
+        )
293340
         next_pending = next(
294341
             (
295342
                 item
296
-                for item in dod.pending_items
343
+                for item in effective_pending_todo_items(
344
+                    dod,
345
+                    project_root=self.context.project_root,
346
+                )
297347
                 if item not in _TODO_NUDGE_EXCLUDED_ITEMS
298348
             ),
299349
             None,
@@ -302,13 +352,35 @@ class ToolBatchRunner:
302352
             self.context.session.messages,
303353
             max_items=2,
304354
         )
305
-        if next_pending and not html_toc_rule.task_targets_html_toc(current_task):
355
+        if _should_prioritize_missing_artifact(
356
+            next_pending=next_pending,
357
+            missing_artifact=missing_artifact,
358
+        ):
359
+            prefix = "Reuse the earlier observation instead of repeating it. "
360
+            if confirmed_facts:
361
+                prefix += f"Confirmed facts: {confirmed_facts}. "
362
+            self.context.queue_steering_message(
363
+                prefix
364
+                + "An explicitly planned artifact is still missing."
365
+                + _missing_artifact_resume_suffix(
366
+                    missing_artifact,
367
+                    project_root=self.context.project_root,
368
+                )
369
+                + " Do not switch into review or consistency-check mode until the missing artifact exists."
370
+            )
371
+            return
372
+        if next_pending:
306373
             mutation_suffix = ""
307374
             if _todo_is_mutation_step(next_pending):
308
-                mutation_suffix = (
309
-                    " You already have enough evidence for that step, so stop gathering "
310
-                    "more reference material and perform the change now."
375
+                mutation_suffix = _missing_artifact_resume_suffix(
376
+                    missing_artifact,
377
+                    project_root=self.context.project_root,
311378
                 )
379
+                if not mutation_suffix:
380
+                    mutation_suffix = (
381
+                        " You already have enough evidence for that step, so stop gathering "
382
+                        "more reference material and perform the change now."
383
+                    )
312384
             if confirmed_facts:
313385
                 self.context.queue_steering_message(
314386
                     "Reuse the earlier observation instead of repeating it. "
@@ -326,6 +398,37 @@ class ToolBatchRunner:
326398
                 )
327399
             return
328400
 
401
+        if missing_artifact is not None:
402
+            self.context.queue_steering_message(
403
+                "Reuse the earlier observation instead of repeating it. "
404
+                + _missing_artifact_resume_suffix(
405
+                    missing_artifact,
406
+                    project_root=self.context.project_root,
407
+                ).strip()
408
+            )
409
+            return
410
+
411
+        if all_planned_artifacts_exist(dod, project_root=self.context.project_root):
412
+            verification_commands = dod.verification_commands or derive_verification_commands(
413
+                dod,
414
+                project_root=self.context.project_root,
415
+                task_statement=current_task,
416
+                supplement_existing=True,
417
+            )
418
+            verification_suffix = (
419
+                "Move to verification or final confirmation using the files already on disk."
420
+                if verification_commands
421
+                else "Finish the current review using the files already on disk."
422
+            )
423
+            self.context.queue_steering_message(
424
+                "Reuse the earlier observation instead of repeating it. "
425
+                "All explicitly planned artifacts already exist. "
426
+                "Use the current task artifacts as the source of truth and do not reopen "
427
+                "reference materials unless one specific gap is still unknown. "
428
+                + verification_suffix
429
+            )
430
+            return
431
+
329432
         preferred_next_step = infer_preferred_next_step(
330433
             self.context.session.messages,
331434
             current_task=current_task,
@@ -401,277 +504,159 @@ class ToolBatchRunner:
401504
             f"Apply the change to `{target}` with edit/patch/write."
402505
         )
403506
 
404
-    def _queue_blocked_html_edit_nudge(self, tool_call: ToolCall, event_content: str) -> None:
405
-        """Steer blocked TOC edits back to the confirmed chapter inventory."""
406
-
407
-        if tool_call.name not in {"edit", "patch"}:
408
-            return
409
-        if not self._targets_html_toc_task():
410
-            return
411
-
412
-        target_path = str(tool_call.arguments.get("file_path", "")).strip()
413
-        if not html_toc_rule.is_html_toc_index_path(target_path):
414
-            return
507
+    def _queue_blocked_active_repair_nudge(self, event_content: str) -> None:
508
+        """Reinforce active repair focus after an out-of-scope blocked observation."""
415509
 
416
-        validation = html_toc_rule.validate_html_toc(target_path)
417
-        if (
418
-            "old_string and new_string are identical" in event_content
419
-            and validation is not None
420
-            and validation.valid
421
-        ):
422
-            action_tracker = getattr(self.context.safeguards, "action_tracker", None)
423
-            note_validated = getattr(action_tracker, "note_validated_html_toc", None)
424
-            if callable(note_validated):
425
-                note_validated(target_path)
426
-            target_label = html_toc_rule.describe_html_toc_target(target_path)
427
-            self.context.queue_steering_message(
428
-                f"The HTML table-of-contents target {target_label} already matches the "
429
-                "validated replacement block. "
430
-                f"Semantic verification preview: validated {validation.link_count} linked "
431
-                "entries. "
432
-                "Do not call `edit`, `patch`, or reread the same TOC again. Briefly state "
433
-                f"that {target_label} is already updated so Loader can continue the "
434
-                "verification gate or finish the task."
435
-            )
510
+        if "[Blocked - active repair scope:" not in event_content:
436511
             return
437512
 
438
-        current_task = getattr(self.context.session, "current_task", None)
439
-        confirmed_facts = summarize_confirmed_facts(
440
-            self.context.session.messages,
441
-            max_items=2,
442
-            focus_path=target_path,
443
-        )
444
-        preferred_next_step = infer_preferred_next_step(
445
-            self.context.session.messages,
446
-            current_task=current_task,
447
-            focus_path=target_path,
448
-        )
449
-        verified_inventory = html_toc_rule.summarize_html_inventory(target_path, limit=12)
450
-        current_excerpt = html_toc_rule.extract_html_toc_excerpt(target_path)
451
-        suggested_replacement = html_toc_rule.build_html_toc_replacement_block(target_path)
452
-        suggested_call = html_toc_rule.build_html_toc_edit_call_template(target_path)
453
-        target_label = html_toc_rule.describe_html_toc_target(target_path)
454
-        excerpt_suffix = (
455
-            f"\nCurrent TOC block:\n{current_excerpt}"
456
-            if current_excerpt
457
-            else ""
458
-        )
459
-        replacement_suffix = (
460
-            f"\nSuggested replacement block:\n{suggested_replacement}"
461
-            if suggested_replacement
462
-            else ""
463
-        )
464
-        call_suffix = (
465
-            f"\nSuggested edit call:\n{suggested_call}"
466
-            if suggested_call
467
-            else ""
468
-        )
469
-
470
-        if preferred_next_step and confirmed_facts and verified_inventory:
471
-            self.context.queue_steering_message(
472
-                f"Use the current TOC target contents plus the verified sibling inventory for "
473
-                f"{target_label} instead of guessing. "
474
-                f"Confirmed facts: {confirmed_facts}. "
475
-                f"Known chapter inventory: {verified_inventory}. "
476
-                f"{preferred_next_step} "
477
-                f"Apply those exact href/title pairs in {target_label}. "
478
-                "Do not rewrite the whole document. For `edit`, set `old_string` to the "
479
-                "current TOC block above exactly and set `new_string` to the suggested "
480
-                "replacement block below exactly."
481
-                f"{excerpt_suffix}"
482
-                f"{replacement_suffix}"
483
-                f"{call_suffix}"
484
-            )
513
+        repair = extract_active_repair_context(self.context.session.messages)
514
+        if repair is None:
485515
             return
486516
 
487
-        if verified_inventory:
517
+        if repair.allowed_paths:
518
+            allowed_preview = ", ".join(f"`{path}`" for path in repair.allowed_paths[:3])
519
+            if len(repair.allowed_paths) > 3:
520
+                allowed_preview += ", ..."
488521
             self.context.queue_steering_message(
489
-                f"Use the current TOC target contents plus the verified sibling inventory for "
490
-                f"{target_label} instead of guessing. "
491
-                f"Known chapter inventory: {verified_inventory}. "
492
-                f"Apply those exact href/title pairs in {target_label}. "
493
-                "Do not rewrite the whole document. For `edit`, set `old_string` to the "
494
-                "current TOC block above exactly and set `new_string` to the suggested "
495
-                "replacement block below exactly."
496
-                f"{excerpt_suffix}"
497
-                f"{replacement_suffix}"
498
-                f"{call_suffix}"
522
+                "Verification already identified the active repair target. "
523
+                f"Stay on the concrete repair files {allowed_preview} "
524
+                f"and repair `{repair.artifact_path}` directly. "
525
+                "Do not reopen unrelated reference materials while this repair target is unresolved."
499526
             )
500527
             return
501528
 
529
+        roots_preview = ", ".join(f"`{root}`" for root in repair.allowed_roots[:2])
530
+        if len(repair.allowed_roots) > 2:
531
+            roots_preview += ", ..."
502532
         self.context.queue_steering_message(
503
-            f"Use the current TOC target contents when retrying the edit for {target_label} "
504
-            "instead of guessing. "
505
-            f"{excerpt_suffix}".strip()
533
+            "Verification already identified the active repair target. "
534
+            f"Stay within the current artifact set under {roots_preview} "
535
+            f"and repair `{repair.artifact_path}` directly. "
536
+            "Do not reopen unrelated reference materials while this repair target is unresolved."
506537
         )
507538
 
508
-    def _queue_verified_html_inventory_nudge(self, tool_call: ToolCall) -> None:
509
-        """Proactively hand off verified chapter inventory after sibling discovery."""
510
-
511
-        if tool_call.name != "glob":
512
-            return
513
-
514
-        chapters_path = str(tool_call.arguments.get("path", "")).strip()
515
-        if not chapters_path.endswith("chapters"):
516
-            return
517
-
518
-        index_path = str(Path(chapters_path).expanduser().parent / "index.html")
519
-        if index_path in self._inventory_hint_targets:
520
-            return
539
+    def _queue_blocked_active_repair_mutation_nudge(self, event_content: str) -> None:
540
+        """Keep repair-phase mutations pinned to the named repair files."""
521541
 
522
-        if not self._targets_html_toc_task():
542
+        if "[Blocked - active repair mutation scope:" not in event_content:
523543
             return
524544
 
525
-        verified_inventory = html_toc_rule.summarize_html_inventory(index_path, limit=12)
526
-        if not verified_inventory:
545
+        repair = extract_active_repair_context(self.context.session.messages)
546
+        if repair is None or not repair.allowed_paths:
527547
             return
528548
 
529
-        self._inventory_hint_targets.add(index_path)
530
-        target_label = html_toc_rule.describe_html_toc_target(index_path)
531
-        chapters_label = html_toc_rule.describe_html_toc_chapters_dir(index_path)
549
+        allowed_preview = ", ".join(f"`{path}`" for path in repair.allowed_paths[:3])
550
+        if len(repair.allowed_paths) > 3:
551
+            allowed_preview += ", ..."
532552
         self.context.queue_steering_message(
533
-            f"You already have the verified sibling inventory needed for {target_label}. "
534
-            f"Known chapter inventory: {verified_inventory}. "
535
-            f"Update {target_label} using those exact href/title pairs instead of rereading "
536
-            f"files in {chapters_label} unless one specific title is still unknown."
553
+            "Verification already identified the concrete repair files. "
554
+            f"Keep mutations pinned to {allowed_preview} "
555
+            f"and repair `{repair.artifact_path}` before widening the change set."
537556
         )
538557
 
539
-    def _annotate_verified_html_inventory(self, tool_call: ToolCall, outcome) -> None:
540
-        """Attach verified chapter inventory directly to a successful discovery result."""
541
-
542
-        if tool_call.name != "glob":
543
-            return
544
-
545
-        chapters_path = str(tool_call.arguments.get("path", "")).strip()
546
-        if not chapters_path.endswith("chapters"):
547
-            return
548
-
549
-        if not self._targets_html_toc_task():
550
-            return
551
-
552
-        index_path = str(Path(chapters_path).expanduser().parent / "index.html")
553
-        verified_inventory = html_toc_rule.summarize_html_inventory(index_path, limit=12)
554
-        if not verified_inventory:
555
-            return
556
-
557
-        action_tracker = getattr(self.context.safeguards, "action_tracker", None)
558
-        note_inventory = getattr(action_tracker, "note_verified_html_inventory", None)
559
-        if callable(note_inventory):
560
-            note_inventory(index_path)
561
-
562
-        note = f"Verified chapter inventory: {verified_inventory}"
563
-        merged_event = outcome.event_content
564
-        if note not in merged_event:
565
-            merged_event = f"{note}\n{merged_event}".strip()
566
-            outcome.event_content = merged_event
567
-            outcome.result_output = merged_event
568
-            outcome.message.content = f"{note}\n{outcome.message.content}".strip()
569
-            if outcome.message.tool_results:
570
-                outcome.message.tool_results[0].content = merged_event
571
-
572
-    def _annotate_validated_html_toc_completion(self, tool_call: ToolCall, outcome) -> None:
573
-        """Attach semantic TOC validation evidence to a successful mutating result."""
558
+    def _queue_blocked_late_reference_drift_nudge(
559
+        self,
560
+        event_content: str,
561
+        *,
562
+        dod: DefinitionOfDone,
563
+    ) -> None:
564
+        """Reinforce missing-artifact progress after late-stage reference drift is blocked."""
574565
 
575
-        if not self._targets_html_toc_task():
576
-            return
577
-        target_path = self._validated_html_toc_target(tool_call)
578
-        if target_path is None:
566
+        if "[Blocked - late reference drift:" not in event_content:
579567
             return
580568
 
581
-        validation = html_toc_rule.validate_html_toc(target_path)
582
-        if validation is None or not validation.valid:
569
+        missing_artifact = _next_missing_planned_artifact(
570
+            dod,
571
+            project_root=self.context.project_root,
572
+        )
573
+        if missing_artifact is None:
583574
             return
584575
 
585
-        action_tracker = getattr(self.context.safeguards, "action_tracker", None)
586
-        note_validated = getattr(action_tracker, "note_validated_html_toc", None)
587
-        if callable(note_validated):
588
-            note_validated(target_path)
576
+        planned_roots: list[str] = []
577
+        seen_roots: set[str] = set()
578
+        for target, expect_directory in collect_planned_artifact_targets(
579
+            dod,
580
+            project_root=self.context.project_root,
581
+        ):
582
+            root = str(target if expect_directory else target.parent)
583
+            if root in seen_roots:
584
+                continue
585
+            seen_roots.add(root)
586
+            planned_roots.append(root)
589587
 
590
-        note = (
591
-            "Semantic verification preview: "
592
-            f"validated {validation.link_count} toc links in {Path(target_path).name}"
588
+        roots_preview = ", ".join(f"`{root}`" for root in planned_roots[:2])
589
+        if len(planned_roots) > 2:
590
+            roots_preview += ", ..."
591
+        self.context.queue_steering_message(
592
+            "Late-stage reference rereads are no longer helping. "
593
+            "One explicitly planned artifact is still missing."
594
+            + _missing_artifact_resume_suffix(
595
+                missing_artifact,
596
+                project_root=self.context.project_root,
597
+            )
598
+            + f" Stay within the current output roots under {roots_preview}"
599
+            + " and finish that artifact before reopening older reference materials."
593600
         )
594
-        merged_event = outcome.event_content
595
-        if note not in merged_event:
596
-            merged_event = f"{merged_event}\n{note}".strip()
597
-            outcome.event_content = merged_event
598
-            outcome.result_output = merged_event
599
-            outcome.message.content = f"{outcome.message.content}\n{note}".strip()
600
-            if outcome.message.tool_results:
601
-                outcome.message.tool_results[0].content = merged_event
602601
 
603
-    def _queue_validated_html_toc_completion_nudge(self, tool_call: ToolCall) -> None:
604
-        """Push the next model turn toward finishing once the TOC already validates."""
602
+    def _queue_blocked_completed_artifact_scope_nudge(
603
+        self,
604
+        event_content: str,
605
+        *,
606
+        dod: DefinitionOfDone,
607
+    ) -> None:
608
+        """Keep post-build review anchored to the generated artifact set."""
605609
 
606
-        if not self._targets_html_toc_task():
607
-            return
608
-        target_path = self._validated_html_toc_target(tool_call)
609
-        if target_path is None:
610
+        if "[Blocked - completed artifact set scope:" not in event_content:
610611
             return
611612
 
612
-        validation = html_toc_rule.validate_html_toc(target_path)
613
-        if validation is None or not validation.valid:
614
-            return
613
+        planned_roots: list[str] = []
614
+        seen_roots: set[str] = set()
615
+        for target, expect_directory in collect_planned_artifact_targets(
616
+            dod,
617
+            project_root=self.context.project_root,
618
+        ):
619
+            root = str(target if expect_directory else target.parent)
620
+            if root in seen_roots:
621
+                continue
622
+            seen_roots.add(root)
623
+            planned_roots.append(root)
615624
 
616
-        if tool_call.name == "read":
617
-            target_label = html_toc_rule.describe_html_toc_target(target_path)
618
-            chapters_label = html_toc_rule.describe_html_toc_chapters_dir(target_path)
625
+        next_pending = next(
626
+            (
627
+                item
628
+                for item in effective_pending_todo_items(
629
+                    dod,
630
+                    project_root=self.context.project_root,
631
+                )
632
+                if item not in _TODO_NUDGE_EXCLUDED_ITEMS
633
+            ),
634
+            None,
635
+        )
636
+        roots_preview = ", ".join(f"`{root}`" for root in planned_roots[:2])
637
+        if len(planned_roots) > 2:
638
+            roots_preview += ", ..."
639
+        if next_pending and _todo_is_consistency_review_step(next_pending):
619640
             self.context.queue_steering_message(
620
-                f"The HTML table-of-contents target {target_label} already satisfies the "
621
-                "verified link/title constraints. "
622
-                f"Semantic verification preview: validated {validation.link_count} linked "
623
-                "entries. "
624
-                "No TOC edit is required unless you can point to one specific incorrect href or "
625
-                f"title. Do not reread {target_label} or files in {chapters_label} again. "
626
-                "Briefly state that the table of contents is already correct so Loader can "
627
-                "finish the task."
641
+                "All explicitly planned artifacts already exist. "
642
+                f"Stay within the current output roots under {roots_preview} and continue "
643
+                f"with `{next_pending}` using the generated files as the source of truth. "
644
+                "Do not reopen earlier reference materials."
628645
             )
629646
             return
630647
 
631
-        target_label = html_toc_rule.describe_html_toc_target(target_path)
632
-        chapters_label = html_toc_rule.describe_html_toc_chapters_dir(target_path)
633648
         self.context.queue_steering_message(
634
-            f"The HTML table-of-contents target {target_label} already satisfies the "
635
-            "verified link/title constraints. "
636
-            f"Semantic verification preview: validated {validation.link_count} linked "
637
-            "entries. "
638
-            f"Do not reread {target_label} or files in {chapters_label} unless a specific "
639
-            "href or title is still unresolved. Briefly state that the table of contents has "
640
-            "been updated so Loader can run the verification gate."
641
-        )
642
-
643
-    @staticmethod
644
-    def _validated_html_toc_target(tool_call: ToolCall) -> str | None:
645
-        """Return the index target for a validated HTML TOC action."""
646
-
647
-        target_path = ""
648
-        if tool_call.name in {"write", "edit", "patch", "read"}:
649
-            target_path = str(tool_call.arguments.get("file_path", "")).strip()
650
-        elif tool_call.name == "bash":
651
-            target_path = (
652
-                extract_shell_text_rewrite_target(
653
-                    str(tool_call.arguments.get("command", ""))
654
-                )
655
-                or ""
656
-            ).strip()
657
-
658
-        if not target_path:
659
-            return None
660
-        if not html_toc_rule.is_html_toc_index_path(target_path):
661
-            return None
662
-        return str(Path(target_path).expanduser())
663
-
664
-    def _targets_html_toc_task(self) -> bool:
665
-        current_task = str(getattr(self.context.session, "current_task", "") or "").lower()
666
-        if not current_task:
667
-            for message in reversed(getattr(self.context.session, "messages", [])):
668
-                if getattr(message, "role", None) != Role.USER:
669
-                    continue
670
-                content = str(getattr(message, "content", "") or "").strip().lower()
671
-                if content:
672
-                    current_task = content
673
-                    break
674
-        return html_toc_rule.task_targets_html_toc(current_task)
649
+            "All explicitly planned artifacts already exist. "
650
+            f"Stay within the current output roots under {roots_preview} "
651
+            "and move to verification or final confirmation using the generated files. "
652
+            "Do not reopen earlier reference materials."
653
+        )
654
+
655
+    def _queue_blocked_html_edit_nudge(self, tool_call: ToolCall, event_content: str) -> None:
656
+        """Keep blocked edit feedback generic; avoid task-class-specific steering."""
657
+
658
+        _ = tool_call, event_content
659
+        return
675660
 
676661
     async def _record_successful_execution(
677662
         self,
@@ -704,15 +689,36 @@ class ToolBatchRunner:
704689
         if tool_call.name == "TodoWrite" and outcome.registry_result is not None:
705690
             new_todos = outcome.registry_result.metadata.get("new_todos", [])
706691
             if isinstance(new_todos, list):
707
-                sync_todos_to_definition_of_done(dod, new_todos)
692
+                sync_todos_to_definition_of_done(
693
+                    dod,
694
+                    new_todos,
695
+                    project_root=self.context.project_root,
696
+                )
697
+            self._queue_todowrite_resume_nudge(dod=dod)
708698
         else:
709699
             pending_before = list(dod.pending_items)
710700
             if advance_todos_from_tool_call(dod, tool_call):
701
+                reconcile_aggregate_completion_steps(
702
+                    dod,
703
+                    project_root=self.context.project_root,
704
+                )
711705
                 self._queue_next_pending_todo_nudge(
712706
                     tool_call=tool_call,
713707
                     pending_before=pending_before,
714708
                     dod=dod,
715709
                 )
710
+            self._queue_bookkeeping_resume_nudge(
711
+                tool_call=tool_call,
712
+                dod=dod,
713
+            )
714
+            self._queue_missing_artifact_progress_nudge(
715
+                tool_call=tool_call,
716
+                dod=dod,
717
+            )
718
+            self._queue_planned_artifact_handoff_nudge(
719
+                tool_call=tool_call,
720
+                dod=dod,
721
+            )
716722
         self.dod_store.save(dod)
717723
         recovery_context = self.context.recovery_context
718724
         if recovery_context is not None:
@@ -765,7 +771,10 @@ class ToolBatchRunner:
765771
         next_pending = next(
766772
             (
767773
                 item
768
-                for item in dod.pending_items
774
+                for item in effective_pending_todo_items(
775
+                    dod,
776
+                    project_root=self.context.project_root,
777
+                )
769778
                 if item not in _TODO_NUDGE_EXCLUDED_ITEMS
770779
             ),
771780
             None,
@@ -773,12 +782,36 @@ class ToolBatchRunner:
773782
         if not completed_label or not next_pending or next_pending == completed_label:
774783
             return
775784
 
785
+        missing_artifact = _next_missing_planned_artifact(
786
+            dod,
787
+            project_root=self.context.project_root,
788
+        )
789
+        if _should_prioritize_missing_artifact(
790
+            next_pending=next_pending,
791
+            missing_artifact=missing_artifact,
792
+        ):
793
+            self.context.queue_steering_message(
794
+                f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
795
+                f"`{tool_call.name}` result. One explicitly planned artifact is still missing."
796
+                + _missing_artifact_resume_suffix(
797
+                    missing_artifact,
798
+                    project_root=self.context.project_root,
799
+                )
800
+                + " Do not switch into review or consistency-check mode until the missing artifact exists."
801
+            )
802
+            return
803
+
776804
         mutation_suffix = ""
777805
         if _todo_is_mutation_step(next_pending):
778
-            mutation_suffix = (
779
-                " You already have enough evidence for that step, so stop gathering "
780
-                "more reference material and perform the change now."
806
+            mutation_suffix = _missing_artifact_resume_suffix(
807
+                missing_artifact,
808
+                project_root=self.context.project_root,
781809
             )
810
+            if not mutation_suffix:
811
+                mutation_suffix = (
812
+                    " You already have enough evidence for that step, so stop gathering "
813
+                    "more reference material and perform the change now."
814
+                )
782815
 
783816
         self.context.queue_steering_message(
784817
             f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
@@ -786,6 +819,375 @@ class ToolBatchRunner:
786819
             f"`{next_pending}` instead of rereading the same evidence.{mutation_suffix}"
787820
         )
788821
 
822
+    def _queue_planned_artifact_handoff_nudge(
823
+        self,
824
+        *,
825
+        tool_call: ToolCall,
826
+        dod: DefinitionOfDone,
827
+    ) -> None:
828
+        if not is_state_mutating_tool_call(tool_call):
829
+            return
830
+        if not all_planned_artifacts_exist(dod, project_root=self.context.project_root):
831
+            return
832
+
833
+        next_pending = next(
834
+            (
835
+                item
836
+                for item in effective_pending_todo_items(
837
+                    dod,
838
+                    project_root=self.context.project_root,
839
+                )
840
+                if item not in _TODO_NUDGE_EXCLUDED_ITEMS
841
+            ),
842
+            None,
843
+        )
844
+        verification_commands = dod.verification_commands or derive_verification_commands(
845
+            dod,
846
+            project_root=self.context.project_root,
847
+            task_statement=getattr(self.context.session, "current_task", "") or "",
848
+            supplement_existing=True,
849
+        )
850
+
851
+        if next_pending and _todo_is_consistency_review_step(next_pending):
852
+            verification_suffix = (
853
+                " Move to verification once no specific mismatch remains."
854
+                if verification_commands
855
+                else " Avoid another full reread unless one specific inconsistency is still unknown."
856
+            )
857
+            self.context.queue_steering_message(
858
+                "All explicitly planned artifacts now exist. "
859
+                f"Continue with the next pending item: `{next_pending}`. "
860
+                "Use the files already on disk as the source of truth instead of restarting "
861
+                "discovery or inventing alternate filenames."
862
+                + verification_suffix
863
+            )
864
+            return
865
+
866
+        if verification_commands:
867
+            self.context.queue_steering_message(
868
+                "All explicitly planned artifacts now exist. "
869
+                "Do not expand the artifact set or restart discovery unless a specific gap is "
870
+                "still known. Move to verification or final confirmation using the files that "
871
+                "already exist."
872
+            )
873
+
874
+    def _queue_missing_artifact_progress_nudge(
875
+        self,
876
+        *,
877
+        tool_call: ToolCall,
878
+        dod: DefinitionOfDone,
879
+    ) -> None:
880
+        if not is_state_mutating_tool_call(tool_call):
881
+            return
882
+        missing_artifact = _next_missing_planned_artifact(
883
+            dod,
884
+            project_root=self.context.project_root,
885
+        )
886
+        if missing_artifact is None:
887
+            return
888
+
889
+        current_label = _current_mutation_label(tool_call)
890
+        todo_refresh = _todo_refresh_guidance(
891
+            dod,
892
+            project_root=self.context.project_root,
893
+        )
894
+        self.context.queue_steering_message(
895
+            f"Confirmed progress: {current_label} is now recorded."
896
+            " One explicitly planned artifact is still missing."
897
+            + _missing_artifact_resume_suffix(
898
+                missing_artifact,
899
+                project_root=self.context.project_root,
900
+            )
901
+            + todo_refresh
902
+            + " Do not move to verification, final confirmation, or TodoWrite-only "
903
+            "bookkeeping until that artifact exists."
904
+            + " Do not spend another turn on working notes or rediscovery alone."
905
+        )
906
+
907
+    def _queue_todowrite_resume_nudge(
908
+        self,
909
+        *,
910
+        dod: DefinitionOfDone,
911
+    ) -> None:
912
+        missing_artifact = _next_missing_planned_artifact(
913
+            dod,
914
+            project_root=self.context.project_root,
915
+        )
916
+        next_pending = next(
917
+            (
918
+                item
919
+                for item in effective_pending_todo_items(
920
+                    dod,
921
+                    project_root=self.context.project_root,
922
+                )
923
+                if item not in _TODO_NUDGE_EXCLUDED_ITEMS
924
+            ),
925
+            None,
926
+        )
927
+        if missing_artifact is None:
928
+            if next_pending and _todo_is_mutation_step(next_pending):
929
+                self.context.queue_steering_message(
930
+                    "Todo tracking is updated. Continue with the next pending item: "
931
+                    f"`{next_pending}`. Use the current output files as the source of "
932
+                    "truth, and do not reopen reference materials unless one specific "
933
+                    "fact required for that step is still unknown. Perform the mutation "
934
+                    "now instead of spending another turn on planning, rereads, or "
935
+                    "verification."
936
+                )
937
+                return
938
+
939
+            if (
940
+                next_pending
941
+                and _todo_is_consistency_review_step(next_pending)
942
+                and not all_planned_artifacts_exist(
943
+                    dod,
944
+                    project_root=self.context.project_root,
945
+                )
946
+            ):
947
+                self.context.queue_steering_message(
948
+                    "Todo tracking is updated. Continue with the next pending item: "
949
+                    f"`{next_pending}`. Use the current output files as the source of "
950
+                    "truth, and do not reopen reference materials unless one specific "
951
+                    "mismatch is still unknown."
952
+                )
953
+                return
954
+
955
+            if not all_planned_artifacts_exist(dod, project_root=self.context.project_root):
956
+                return
957
+
958
+            verification_commands = dod.verification_commands or derive_verification_commands(
959
+                dod,
960
+                project_root=self.context.project_root,
961
+                task_statement=getattr(self.context.session, "current_task", "") or "",
962
+                supplement_existing=True,
963
+            )
964
+            if next_pending and _todo_is_consistency_review_step(next_pending):
965
+                verification_suffix = (
966
+                    " Move to verification once no specific mismatch remains."
967
+                    if verification_commands
968
+                    else " Finish the targeted consistency pass without reopening reference materials."
969
+                )
970
+                self.context.queue_steering_message(
971
+                    "Todo tracking is updated. All explicitly planned artifacts now exist. "
972
+                    f"Continue with the next pending item: `{next_pending}`. "
973
+                    "Use the current output files as the source of truth, and do not restart "
974
+                    "early discovery or reopen reference materials."
975
+                    + verification_suffix
976
+                )
977
+                return
978
+
979
+            verification_suffix = (
980
+                " Move to verification or final confirmation using the files already on disk."
981
+                if verification_commands
982
+                else " Finish the task using the files already on disk."
983
+            )
984
+            self.context.queue_steering_message(
985
+                "Todo tracking is updated. All explicitly planned artifacts now exist. "
986
+                "Do not restart discovery, reopen reference materials, or spend another turn "
987
+                "on TodoWrite alone."
988
+                + verification_suffix
989
+            )
990
+            return
991
+
992
+        todo_refresh = _todo_refresh_guidance(
993
+            dod,
994
+            project_root=self.context.project_root,
995
+        )
996
+        next_pending_suffix = (
997
+            f" Continue with the next pending item: `{next_pending}`."
998
+            if next_pending
999
+            else ""
1000
+        )
1001
+        self.context.queue_steering_message(
1002
+            "Todo tracking is updated. An explicitly planned artifact is still missing."
1003
+            + next_pending_suffix
1004
+            + _missing_artifact_resume_suffix(
1005
+                missing_artifact,
1006
+                project_root=self.context.project_root,
1007
+            )
1008
+            + todo_refresh
1009
+            + " Do not spend the next turn on TodoWrite alone, bookkeeping notes, "
1010
+            "verification, or final confirmation until that artifact exists."
1011
+        )
1012
+
1013
+    def _queue_bookkeeping_resume_nudge(
1014
+        self,
1015
+        *,
1016
+        tool_call: ToolCall,
1017
+        dod: DefinitionOfDone,
1018
+    ) -> None:
1019
+        if tool_call.name not in _BOOKKEEPING_NOTE_TOOL_NAMES:
1020
+            return
1021
+
1022
+        missing_artifact = _next_missing_planned_artifact(
1023
+            dod,
1024
+            project_root=self.context.project_root,
1025
+        )
1026
+        if missing_artifact is None:
1027
+            return
1028
+
1029
+        next_pending = next(
1030
+            (
1031
+                item
1032
+                for item in effective_pending_todo_items(
1033
+                    dod,
1034
+                    project_root=self.context.project_root,
1035
+                )
1036
+                if item not in _TODO_NUDGE_EXCLUDED_ITEMS
1037
+            ),
1038
+            None,
1039
+        )
1040
+        todo_refresh = _todo_refresh_guidance(
1041
+            dod,
1042
+            project_root=self.context.project_root,
1043
+        )
1044
+        if (
1045
+            next_pending
1046
+            and not _todo_is_mutation_step(next_pending)
1047
+            and not _todo_is_consistency_review_step(next_pending)
1048
+        ):
1049
+            self.context.queue_steering_message(
1050
+                "Bookkeeping note is recorded. Continue with the next pending item: "
1051
+                f"`{next_pending}`. Make your next response one concrete evidence-gathering "
1052
+                "tool call that advances that step, not another bookkeeping-only turn."
1053
+                + todo_refresh
1054
+                + " Do not jump ahead to later artifact creation, verification, or final "
1055
+                "confirmation until that step is satisfied."
1056
+            )
1057
+            return
1058
+
1059
+        self.context.queue_steering_message(
1060
+            "Bookkeeping note is recorded. An explicitly planned artifact is still missing."
1061
+            + _missing_artifact_resume_suffix(
1062
+                missing_artifact,
1063
+                project_root=self.context.project_root,
1064
+            )
1065
+            + todo_refresh
1066
+            + " Do not spend the next turn on additional notes, rediscovery, "
1067
+            "verification, or final confirmation until that artifact exists."
1068
+        )
1069
+
1070
+
1071
+def _todo_is_consistency_review_step(item: str) -> bool:
1072
+    text = item.lower()
1073
+    return any(hint in text for hint in _CONSISTENCY_REVIEW_HINTS)
1074
+
1075
+
1076
+def _should_prioritize_missing_artifact(
1077
+    *,
1078
+    next_pending: str | None,
1079
+    missing_artifact: tuple[Path, bool] | None,
1080
+) -> bool:
1081
+    if missing_artifact is None:
1082
+        return False
1083
+    if not next_pending:
1084
+        return True
1085
+    if _todo_is_consistency_review_step(next_pending):
1086
+        return True
1087
+    return not _todo_is_mutation_step(next_pending)
1088
+
1089
+
1090
+def _next_missing_planned_artifact(
1091
+    dod: DefinitionOfDone,
1092
+    *,
1093
+    project_root: Path,
1094
+) -> tuple[Path, bool] | None:
1095
+    for target, expect_directory in collect_planned_artifact_targets(
1096
+        dod,
1097
+        project_root=project_root,
1098
+        max_paths=12,
1099
+    ):
1100
+        if not planned_artifact_target_satisfied(
1101
+            dod,
1102
+            target=target,
1103
+            expect_directory=expect_directory,
1104
+            project_root=project_root,
1105
+        ):
1106
+            return target, expect_directory
1107
+    return None
1108
+
1109
+
1110
+def _missing_artifact_resume_suffix(
1111
+    missing_artifact: tuple[Path, bool] | None,
1112
+    *,
1113
+    project_root: Path,
1114
+) -> str:
1115
+    if missing_artifact is None:
1116
+        return ""
1117
+
1118
+    target, expect_directory = missing_artifact
1119
+    label = target.name or str(target)
1120
+    if expect_directory and not label.endswith("/"):
1121
+        label += "/"
1122
+    if expect_directory:
1123
+        next_output_file = infer_next_declared_html_output_file(
1124
+            target=target,
1125
+            project_root=project_root,
1126
+        )
1127
+        if next_output_file is not None:
1128
+            guidance = (
1129
+                f" Resume by creating `{next_output_file.name}` now. It is the next missing "
1130
+                f"declared output under `{label}`. Prefer one `write` call for "
1131
+                f"`{next_output_file}` instead of more rereads."
1132
+            )
1133
+            if not next_output_file.parent.exists():
1134
+                guidance += (
1135
+                    " The `write` tool can create that file's parent directories automatically,"
1136
+                    " so do the write in one step instead of stopping for a separate mkdir."
1137
+                )
1138
+            guidance += (
1139
+                " Make your next response the concrete mutation tool call itself, not another"
1140
+                " bookkeeping-only turn."
1141
+            )
1142
+            return guidance
1143
+        if target.is_dir():
1144
+            return (
1145
+                f" Resume by creating the next output file under `{label}` now. Prefer one "
1146
+                f"concrete `write` call for a file inside `{target}` instead of more rereads."
1147
+                " Make your next response the concrete mutation tool call itself, not another"
1148
+                " bookkeeping-only turn."
1149
+            )
1150
+        return (
1151
+            f" Resume by creating `{label}` now. Prefer one concrete directory-creation "
1152
+            f"step for `{target}` instead of more rereads."
1153
+        )
1154
+    guidance = (
1155
+        f" Resume by creating `{label}` now. Prefer one `write` call for `{target}` "
1156
+        "instead of more rereads."
1157
+    )
1158
+    if not target.parent.exists():
1159
+        guidance += (
1160
+            " The `write` tool can create that file's parent directories automatically,"
1161
+            " so do the write in one step instead of stopping for a separate mkdir."
1162
+        )
1163
+    guidance += (
1164
+        " Make your next response the concrete mutation tool call itself, not another"
1165
+        " bookkeeping-only turn."
1166
+    )
1167
+    return guidance
1168
+
1169
+
1170
+def _todo_refresh_guidance(
1171
+    dod: DefinitionOfDone,
1172
+    *,
1173
+    project_root: Path | None = None,
1174
+) -> str:
1175
+    non_special_pending = [
1176
+        item
1177
+        for item in effective_pending_todo_items(dod, project_root=project_root)
1178
+        if item not in _TODO_NUDGE_EXCLUDED_ITEMS
1179
+    ]
1180
+    non_special_completed = [
1181
+        item for item in dod.completed_items if item not in _TODO_NUDGE_EXCLUDED_ITEMS
1182
+    ]
1183
+    if len(dod.touched_files) < 2 and (len(non_special_pending) + len(non_special_completed)) < 3:
1184
+        return ""
1185
+    return (
1186
+        " If the tracked steps no longer match the confirmed progress, refresh `TodoWrite` "
1187
+        "in the same response as the next concrete step instead of spending a full turn on "
1188
+        "bookkeeping alone."
1189
+    )
1190
+
7891191
 
7901192
 def _mark_verification_stale(
7911193
     *,
@@ -953,6 +1355,18 @@ def _stale_verification_detail(tool_call: ToolCall) -> str:
9531355
     return f"{tool_call.name} changed the workspace"
9541356
 
9551357
 
1358
+def _current_mutation_label(tool_call: ToolCall) -> str:
1359
+    if tool_call.name in {"write", "edit", "patch"}:
1360
+        file_path = str(tool_call.arguments.get("file_path", "")).strip()
1361
+        if file_path:
1362
+            return f"`{Path(file_path).name or file_path}`"
1363
+    if tool_call.name == "bash":
1364
+        command = str(tool_call.arguments.get("command", "")).strip()
1365
+        if command:
1366
+            return f"`{command}`"
1367
+    return f"the successful `{tool_call.name}` result"
1368
+
1369
+
9561370
 def _tool_call_label(tool_call: ToolCall) -> str:
9571371
     """Human-readable label for one tool call."""
9581372
     name = tool_call.name
src/loader/runtime/turn_completion.pymodified
@@ -230,10 +230,6 @@ class TurnCompletionController:
230230
             actions_taken=actions_taken,
231231
         )
232232
 
233
-        final_message = Message(role=Role.ASSISTANT, content=response_content)
234
-        self.context.session.append(final_message)
235
-        summary.assistant_messages.append(final_message)
236
-
237233
         gate_result = await self.finalizer.run_definition_of_done_gate(
238234
             dod=dod,
239235
             candidate_response=final_response,
@@ -261,6 +257,9 @@ class TurnCompletionController:
261257
                 continuation_count=continuation_count,
262258
             )
263259
         final_response = gate_result.final_response
260
+        final_message = Message(role=Role.ASSISTANT, content=response_content)
261
+        self.context.session.append(final_message)
262
+        summary.assistant_messages.append(final_message)
264263
         self._record_completion_decision(
265264
             summary=summary,
266265
             decision_code=gate_result.reason_code,
src/loader/runtime/turn_iteration.pymodified
@@ -135,9 +135,11 @@ class TurnIterationController:
135135
                 extracted_iterations=extracted_iterations,
136136
                 continuation_count=continuation_count,
137137
                 consecutive_errors=consecutive_errors,
138
+                dod=dod,
138139
                 emit=emit,
139140
                 summary=summary,
140141
             )
142
+        reset_empty_retry_count = 0
141143
 
142144
         analysis = self.repairer.analyze_response(
143145
             content=assistant_turn.content,
@@ -196,7 +198,7 @@ class TurnIterationController:
196198
             return TurnIterationDecision(
197199
                 action=TurnIterationAction.CONTINUE,
198200
                 continuation_count=route_decision.continuation_count,
199
-                empty_retry_count=empty_retry_count,
201
+                empty_retry_count=reset_empty_retry_count,
200202
                 extracted_iterations=extracted_iterations,
201203
                 consecutive_errors=route_decision.consecutive_errors,
202204
                 new_actions_taken=route_decision.new_actions_taken,
@@ -205,7 +207,7 @@ class TurnIterationController:
205207
             return TurnIterationDecision(
206208
                 action=TurnIterationAction.FINALIZE,
207209
                 continuation_count=route_decision.continuation_count,
208
-                empty_retry_count=empty_retry_count,
210
+                empty_retry_count=reset_empty_retry_count,
209211
                 extracted_iterations=extracted_iterations,
210212
                 consecutive_errors=route_decision.consecutive_errors,
211213
                 new_actions_taken=route_decision.new_actions_taken,
@@ -215,7 +217,7 @@ class TurnIterationController:
215217
         return TurnIterationDecision(
216218
             action=TurnIterationAction.COMPLETE,
217219
             continuation_count=route_decision.continuation_count,
218
-            empty_retry_count=empty_retry_count,
220
+            empty_retry_count=reset_empty_retry_count,
219221
             extracted_iterations=extracted_iterations,
220222
             consecutive_errors=route_decision.consecutive_errors,
221223
             new_actions_taken=route_decision.new_actions_taken,
@@ -231,6 +233,7 @@ class TurnIterationController:
231233
         extracted_iterations: int,
232234
         continuation_count: int,
233235
         consecutive_errors: int,
236
+        dod: DefinitionOfDone,
234237
         emit: EventSink,
235238
         summary: TurnSummary,
236239
     ) -> TurnIterationDecision:
@@ -247,6 +250,7 @@ class TurnIterationController:
247250
             original_task=original_task,
248251
             empty_retry_count=next_empty_retry_count,
249252
             max_empty_retries=max_empty_retries,
253
+            dod=dod,
250254
         )
251255
         if empty_decision.should_continue and empty_decision.retry_message:
252256
             if empty_decision.reason_code and empty_decision.reason_summary:
@@ -289,9 +293,11 @@ class TurnIterationController:
289293
             )
290294
         await emit(AgentEvent(type="response", content=final_response))
291295
         return TurnIterationDecision(
292
-            action=TurnIterationAction.COMPLETE,
296
+            action=TurnIterationAction.FINALIZE,
293297
             continuation_count=continuation_count,
294298
             empty_retry_count=next_empty_retry_count,
295299
             extracted_iterations=extracted_iterations,
296300
             consecutive_errors=consecutive_errors,
301
+            finalize_reason_code=empty_decision.reason_code,
302
+            finalize_reason_summary=empty_decision.reason_summary,
297303
         )
src/loader/runtime/turn_loop.pymodified
@@ -40,7 +40,7 @@ class TurnLoopState:
4040
     empty_retry_count: int = 0
4141
     extracted_iterations: int = 0
4242
     consecutive_errors: int = 0
43
-    max_empty_retries: int = 5
43
+    max_empty_retries: int = 2
4444
     max_extracted_iterations: int = 3
4545
 
4646
 
src/loader/runtime/turn_preparation.pymodified
@@ -160,6 +160,7 @@ class TurnPreparationController:
160160
                 registry=self.context.registry,
161161
                 rollback_plan=rollback_plan,
162162
                 workspace_root=self.context.project_root,
163
+                session=self.context.session,
163164
             ),
164165
         )
165166
         return executor, rollback_plan
src/loader/runtime/workflow.pymodified
@@ -10,7 +10,12 @@ from typing import ClassVar
1010
 
1111
 from ..llm.base import ToolCall
1212
 from .clarify_grounding import ClarifyGrounding
13
-from .dod import slugify
13
+from .dod import (
14
+    all_planned_artifacts_exist,
15
+    collect_planned_artifact_targets,
16
+    planned_artifact_target_satisfied,
17
+    slugify,
18
+)
1419
 from .workflow_policy import (
1520
     ArtifactEvidence,
1621
     ArtifactEvidenceKind,
@@ -46,12 +51,14 @@ __all__ = [
4651
     "WorkflowTimelineEntryKind",
4752
     "advance_todos_from_tool_call",
4853
     "build_execute_bridge",
54
+    "effective_pending_todo_items",
4955
     "enrich_clarify_brief_with_grounding",
5056
     "extract_verification_commands_from_markdown",
5157
     "load_brief",
5258
     "load_planning_artifacts",
5359
     "merge_refreshed_todos_with_existing_scope",
5460
     "preserve_task_grounded_acceptance_criteria",
61
+    "reconcile_aggregate_completion_steps",
5562
     "sync_todos_to_definition_of_done",
5663
 ]
5764
 
@@ -106,16 +113,35 @@ _PARSE_STEP_HINTS = (
106113
 )
107114
 _MUTATION_STEP_HINTS = (
108115
     "create",
116
+    "creating",
109117
     "update",
118
+    "updating",
110119
     "edit",
120
+    "editing",
111121
     "write",
122
+    "writing",
112123
     "fix",
124
+    "fixing",
113125
     "modify",
126
+    "modifying",
114127
     "change",
128
+    "changing",
115129
     "patch",
130
+    "patching",
116131
     "replace",
132
+    "replacing",
117133
     "correct",
134
+    "correcting",
118135
     "rewrite",
136
+    "rewriting",
137
+)
138
+_CREATION_STEP_HINTS = (
139
+    "create",
140
+    "creating",
141
+    "generate",
142
+    "generating",
143
+    "scaffold",
144
+    "scaffolding",
119145
 )
120146
 _VERIFY_STEP_HINTS = (
121147
     "verify",
@@ -136,6 +162,20 @@ _AGGREGATE_TODO_HINTS = (
136162
     "properly linked",
137163
     "directory structure",
138164
 )
165
+_ARTIFACT_SET_COMPLETION_HINTS = (
166
+    "link",
167
+    "links",
168
+    "linked",
169
+    "navigation",
170
+    "consistency",
171
+    "consistent",
172
+    "formatted",
173
+    "formatting",
174
+    "review",
175
+)
176
+_TODO_FILE_CANDIDATE_PATTERN = re.compile(
177
+    r"(?:[A-Za-z0-9_.-]+/)*[A-Za-z0-9_.-]+\.[A-Za-z0-9]+"
178
+)
139179
 _ACTIONABLE_STEP_VERBS = {
140180
     "add",
141181
     "apply",
@@ -560,6 +600,25 @@ class PlanningArtifacts:
560600
             implementation_steps=list(self.implementation_steps),
561601
         )
562602
 
603
+    def with_file_changes(self, file_changes: list[str]) -> PlanningArtifacts:
604
+        """Return one copy with a rewritten file-changes section."""
605
+
606
+        normalized = [item.strip() for item in file_changes if item.strip()]
607
+        if not normalized:
608
+            return self
609
+
610
+        return PlanningArtifacts(
611
+            implementation_markdown=_replace_markdown_section_items(
612
+                self.implementation_markdown,
613
+                "File Changes",
614
+                normalized,
615
+            ),
616
+            verification_markdown=self.verification_markdown,
617
+            verification_commands=list(self.verification_commands),
618
+            acceptance_criteria=list(self.acceptance_criteria),
619
+            implementation_steps=list(self.implementation_steps),
620
+        )
621
+
563622
     def with_progress_context(
564623
         self,
565624
         *,
@@ -650,6 +709,8 @@ def load_planning_artifacts(
650709
 def sync_todos_to_definition_of_done(
651710
     dod,
652711
     todos: list[dict[str, str]],
712
+    *,
713
+    project_root: Path | None = None,
653714
 ) -> None:
654715
     """Reflect todo state into DoD pending/completed items."""
655716
 
@@ -671,24 +732,100 @@ def sync_todos_to_definition_of_done(
671732
             "Collect verification evidence",
672733
         }
673734
     ]
735
+    existing_completed = {
736
+        item.strip()
737
+        for item in dod.completed_items
738
+        if item.strip() and item not in _SPECIAL_TODO_ITEMS
739
+    }
674740
 
675741
     pending: list[str] = []
676742
     completed: list[str] = []
677743
     for item in todos:
678744
         status = str(item.get("status", "")).strip().lower()
679
-        label = str(
680
-            item.get("active_form") if status == "in_progress" else item.get("content", "")
681
-        ).strip()
682
-        if not label:
745
+        content = str(item.get("content", "")).strip()
746
+        active_form = str(item.get("active_form", "")).strip()
747
+        label = active_form if status == "in_progress" else content
748
+        if not label and not content:
749
+            continue
750
+        # Treat exact todo items as monotonic. If a successful tool call already
751
+        # marked the same todo complete, a stale TodoWrite snapshot should not
752
+        # regress it back to pending / in progress.
753
+        if status != "completed" and (
754
+            content in existing_completed or active_form in existing_completed
755
+        ):
756
+            completed.append(content or active_form or label)
683757
             continue
684758
         if status == "completed":
685
-            completed.append(str(item.get("content", label)).strip())
759
+            completed.append(content or label)
686760
         else:
687761
             pending.append(label)
688762
 
689763
     dod.pending_items = list(dict.fromkeys(pending + special_pending))
690764
     dod.completed_items = list(dict.fromkeys(completed + special_completed))
691765
 
766
+    if project_root is not None:
767
+        _reopen_aggregate_completion_steps_for_missing_artifacts(
768
+            dod,
769
+            project_root=project_root,
770
+        )
771
+        _reopen_directory_content_steps_for_incomplete_artifacts(
772
+            dod,
773
+            project_root=project_root,
774
+        )
775
+        dod.pending_items = effective_pending_todo_items(
776
+            dod,
777
+            project_root=project_root,
778
+        )
779
+
780
+
781
+def effective_pending_todo_items(
782
+    dod,
783
+    *,
784
+    project_root: Path | None = None,
785
+) -> list[str]:
786
+    """Return pending todo items after filtering stale artifact-expansion drift."""
787
+
788
+    pending_items = [item for item in dod.pending_items if item.strip()]
789
+    if not pending_items or project_root is None or dod.status == "fixing":
790
+        return pending_items
791
+
792
+    planned_targets = collect_planned_artifact_targets(
793
+        dod,
794
+        project_root=project_root,
795
+        max_paths=24,
796
+    )
797
+    if not planned_targets:
798
+        return pending_items
799
+    if not all_planned_artifacts_exist(dod, project_root=project_root, max_paths=24):
800
+        return pending_items
801
+
802
+    planned_files = {
803
+        target.name.lower()
804
+        for target, expect_directory in planned_targets
805
+        if not expect_directory
806
+    }
807
+    if not planned_files:
808
+        return pending_items
809
+
810
+    filtered_items = [
811
+        item
812
+        for item in pending_items
813
+        if not _todo_targets_unplanned_artifact(item, planned_files)
814
+    ]
815
+    filtered_items = [
816
+        item
817
+        for item in filtered_items
818
+        if not _todo_describes_stale_creation_after_artifacts_exist(
819
+            item,
820
+            planned_files,
821
+        )
822
+    ]
823
+    return [
824
+        item
825
+        for item in filtered_items
826
+        if not _todo_describes_stale_discovery_after_artifacts_exist(item)
827
+    ]
828
+
692829
 
693830
 def preserve_task_grounded_acceptance_criteria(
694831
     task_statement: str,
@@ -714,6 +851,7 @@ def merge_refreshed_todos_with_existing_scope(
714851
     existing_pending_items: list[str],
715852
     existing_completed_items: list[str],
716853
     refreshed_steps: list[str],
854
+    planned_files: set[str] | None = None,
717855
 ) -> list[dict[str, str]]:
718856
     """Merge one refreshed plan with task-grounded todo scope already in flight."""
719857
 
@@ -740,6 +878,12 @@ def merge_refreshed_todos_with_existing_scope(
740878
             or _looks_actionable_refresh_step(item)
741879
         )
742880
     ]
881
+    if planned_files:
882
+        refreshed_candidates = [
883
+            item
884
+            for item in refreshed_candidates
885
+            if not _todo_targets_unplanned_artifact(item, planned_files)
886
+        ]
743887
 
744888
     todos: list[dict[str, str]] = []
745889
     seen: set[str] = set()
@@ -839,6 +983,12 @@ def _todo_progress_score(item: str, tool_call: ToolCall) -> int:
839983
         if _contains_any(text, _PARSE_STEP_HINTS) and ".html" in combined:
840984
             score += 1
841985
     elif name in {"glob", "grep"}:
986
+        if not (
987
+            _contains_any(text, _SEARCH_STEP_HINTS)
988
+            or _contains_any(text, _READ_STEP_HINTS)
989
+            or _contains_any(text, _PARSE_STEP_HINTS)
990
+        ):
991
+            return 0
842992
         if _contains_any(text, _SEARCH_STEP_HINTS):
843993
             score += 2
844994
         if name == "glob" and _contains_any(text, _READ_STEP_HINTS) and ".html" in combined:
@@ -874,12 +1024,231 @@ def _contains_any(text: str, candidates: tuple[str, ...]) -> bool:
8741024
 
8751025
 
8761026
 def _todo_describes_aggregate_mutation(text: str) -> bool:
877
-    return _contains_any(text, _AGGREGATE_TODO_HINTS) and _contains_any(
1027
+    return (
1028
+        _contains_any(text, _AGGREGATE_TODO_HINTS)
1029
+        or _todo_mentions_plural_output_set(text)
1030
+    ) and _contains_any(
8781031
         text,
8791032
         _MUTATION_STEP_HINTS,
8801033
     )
8811034
 
8821035
 
1036
+def _todo_requires_complete_artifact_set(text: str) -> bool:
1037
+    return (
1038
+        _contains_any(text, _AGGREGATE_TODO_HINTS)
1039
+        or _todo_mentions_plural_output_set(text)
1040
+    ) and _contains_any(
1041
+        text,
1042
+        _ARTIFACT_SET_COMPLETION_HINTS,
1043
+    )
1044
+
1045
+
1046
+def _todo_mentions_plural_output_set(text: str) -> bool:
1047
+    if _TODO_FILE_CANDIDATE_PATTERN.search(text):
1048
+        return False
1049
+    return any(
1050
+        phrase in text
1051
+        for phrase in (
1052
+            "chapter files",
1053
+            "all chapters",
1054
+            "chapters",
1055
+            "files following",
1056
+            "files with",
1057
+            "output files",
1058
+            "artifacts",
1059
+            "documents",
1060
+            "sections",
1061
+            "pages",
1062
+        )
1063
+    )
1064
+
1065
+
1066
+def _todo_targets_unplanned_artifact(item: str, planned_files: set[str]) -> bool:
1067
+    if item in _SPECIAL_TODO_ITEMS:
1068
+        return False
1069
+
1070
+    text = item.strip().lower()
1071
+    if not text or not _contains_any(text, _MUTATION_STEP_HINTS):
1072
+        return False
1073
+
1074
+    candidates = {
1075
+        Path(match).name.lower()
1076
+        for match in _TODO_FILE_CANDIDATE_PATTERN.findall(text)
1077
+    }
1078
+    if not candidates:
1079
+        return False
1080
+
1081
+    return candidates.isdisjoint(planned_files)
1082
+
1083
+
1084
+def _todo_describes_stale_discovery_after_artifacts_exist(item: str) -> bool:
1085
+    text = item.strip().lower()
1086
+    if not text or item in _SPECIAL_TODO_ITEMS:
1087
+        return False
1088
+    if _contains_any(text, _VERIFY_STEP_HINTS):
1089
+        return False
1090
+    if _contains_any(text, _MUTATION_STEP_HINTS):
1091
+        return False
1092
+    if _contains_any(text, _ARTIFACT_SET_COMPLETION_HINTS):
1093
+        return False
1094
+    return (
1095
+        _contains_any(text, _READ_STEP_HINTS)
1096
+        or _contains_any(text, _SEARCH_STEP_HINTS)
1097
+        or _contains_any(text, _PARSE_STEP_HINTS)
1098
+    )
1099
+
1100
+
1101
+def _todo_describes_stale_creation_after_artifacts_exist(
1102
+    item: str,
1103
+    planned_files: set[str],
1104
+) -> bool:
1105
+    text = item.strip().lower()
1106
+    if not text or item in _SPECIAL_TODO_ITEMS:
1107
+        return False
1108
+    if _contains_any(text, _VERIFY_STEP_HINTS):
1109
+        return False
1110
+    if not _contains_any(text, _CREATION_STEP_HINTS):
1111
+        return False
1112
+    candidates = {
1113
+        Path(match).name.lower()
1114
+        for match in _TODO_FILE_CANDIDATE_PATTERN.findall(text)
1115
+    }
1116
+    if not candidates:
1117
+        return False
1118
+    return not candidates.isdisjoint(planned_files)
1119
+
1120
+
1121
+def _todo_describes_directory_content_creation(
1122
+    item: str,
1123
+    directories: list[Path],
1124
+) -> bool:
1125
+    text = item.strip().lower()
1126
+    if not text or item in _SPECIAL_TODO_ITEMS:
1127
+        return False
1128
+    if not _contains_any(text, _CREATION_STEP_HINTS):
1129
+        return False
1130
+    if not any(
1131
+        token in text
1132
+        for token in (
1133
+            "file",
1134
+            "files",
1135
+            "chapter",
1136
+            "chapters",
1137
+            "page",
1138
+            "pages",
1139
+            "artifact",
1140
+            "artifacts",
1141
+            "content",
1142
+            "test",
1143
+            "tests",
1144
+        )
1145
+    ):
1146
+        return False
1147
+
1148
+    for directory in directories:
1149
+        name = directory.name.lower()
1150
+        tokens = {name}
1151
+        if name.endswith("ies") and len(name) > 3:
1152
+            tokens.add(f"{name[:-3]}y")
1153
+        elif name.endswith("s") and len(name) > 3:
1154
+            tokens.add(name[:-1])
1155
+        if any(token in text for token in tokens):
1156
+            return True
1157
+    return False
1158
+
1159
+
1160
+def _reopen_aggregate_completion_steps_for_missing_artifacts(
1161
+    dod,
1162
+    *,
1163
+    project_root: Path,
1164
+) -> None:
1165
+    planned_targets = collect_planned_artifact_targets(
1166
+        dod,
1167
+        project_root=project_root,
1168
+        max_paths=12,
1169
+    )
1170
+    if not planned_targets:
1171
+        return
1172
+
1173
+    if all_planned_artifacts_exist(dod, project_root=project_root, max_paths=12):
1174
+        return
1175
+
1176
+    retained_completed: list[str] = []
1177
+    reopened_pending: list[str] = []
1178
+    for item in dod.completed_items:
1179
+        text = item.strip().lower()
1180
+        if item in _SPECIAL_TODO_ITEMS or not _todo_requires_complete_artifact_set(text):
1181
+            retained_completed.append(item)
1182
+            continue
1183
+        reopened_pending.append(item)
1184
+
1185
+    if not reopened_pending:
1186
+        return
1187
+
1188
+    dod.completed_items = retained_completed
1189
+    dod.pending_items = list(dict.fromkeys(dod.pending_items + reopened_pending))
1190
+
1191
+
1192
+def _reopen_directory_content_steps_for_incomplete_artifacts(
1193
+    dod,
1194
+    *,
1195
+    project_root: Path,
1196
+) -> None:
1197
+    planned_targets = collect_planned_artifact_targets(
1198
+        dod,
1199
+        project_root=project_root,
1200
+        max_paths=12,
1201
+    )
1202
+    if not planned_targets:
1203
+        return
1204
+
1205
+    incomplete_directories = [
1206
+        target
1207
+        for target, expect_directory in planned_targets
1208
+        if expect_directory
1209
+        and not planned_artifact_target_satisfied(
1210
+            dod,
1211
+            target=target,
1212
+            expect_directory=True,
1213
+            project_root=project_root,
1214
+        )
1215
+    ]
1216
+    if not incomplete_directories:
1217
+        return
1218
+
1219
+    retained_completed: list[str] = []
1220
+    reopened_pending: list[str] = []
1221
+    for item in dod.completed_items:
1222
+        if item in _SPECIAL_TODO_ITEMS:
1223
+            retained_completed.append(item)
1224
+            continue
1225
+        if _todo_describes_directory_content_creation(item, incomplete_directories):
1226
+            reopened_pending.append(item)
1227
+            continue
1228
+        retained_completed.append(item)
1229
+
1230
+    if not reopened_pending:
1231
+        return
1232
+
1233
+    dod.completed_items = retained_completed
1234
+    dod.pending_items = list(dict.fromkeys(dod.pending_items + reopened_pending))
1235
+
1236
+
1237
+def reconcile_aggregate_completion_steps(
1238
+    dod,
1239
+    *,
1240
+    project_root: Path | None,
1241
+) -> None:
1242
+    """Reopen aggregate completion steps when planned artifacts are still missing."""
1243
+
1244
+    if project_root is None:
1245
+        return
1246
+    _reopen_aggregate_completion_steps_for_missing_artifacts(
1247
+        dod,
1248
+        project_root=project_root,
1249
+    )
1250
+
1251
+
8831252
 def _looks_like_search_command(command: str) -> bool:
8841253
     return any(token in command for token in (" ls", "ls ", "find ", "rg ", "grep ", "glob "))
8851254
 
src/loader/runtime/workflow_lanes.pymodified
@@ -22,7 +22,7 @@ from .clarify_strategy import (
2222
     describe_clarify_stage,
2323
 )
2424
 from .context import RuntimeContext
25
-from .dod import DefinitionOfDone, DefinitionOfDoneStore
25
+from .dod import DefinitionOfDone, DefinitionOfDoneStore, collect_planned_artifact_targets
2626
 from .events import AgentEvent, TurnSummary
2727
 from .executor import ToolExecutor
2828
 from .workflow import (
@@ -208,6 +208,12 @@ class WorkflowLaneRunner:
208208
                 refreshed_acceptance_criteria=list(artifacts.acceptance_criteria),
209209
             )
210210
             artifacts = artifacts.with_acceptance_criteria(preserved_acceptance)
211
+            preserved_file_changes = _preserved_file_change_items(
212
+                dod,
213
+                project_root=self.context.project_root,
214
+            )
215
+            if preserved_file_changes:
216
+                artifacts = artifacts.with_file_changes(preserved_file_changes)
211217
             artifacts = artifacts.with_progress_context(
212218
                 touched_files=list(dod.touched_files),
213219
                 completed_items=list(dod.completed_items),
@@ -309,11 +315,16 @@ class WorkflowLaneRunner:
309315
         assert executor is not None
310316
 
311317
         if preserve_existing_scope:
318
+            planned_files = _planned_file_names_for_refresh(
319
+                dod,
320
+                project_root=self.context.project_root,
321
+            )
312322
             todos = merge_refreshed_todos_with_existing_scope(
313323
                 task_statement,
314324
                 existing_pending_items=list(dod.pending_items),
315325
                 existing_completed_items=list(dod.completed_items),
316326
                 refreshed_steps=list(artifacts.implementation_steps[:8]),
327
+                planned_files=planned_files,
317328
             )
318329
         else:
319330
             todos = [
@@ -369,7 +380,11 @@ class WorkflowLaneRunner:
369380
         if outcome.registry_result is not None:
370381
             new_todos = outcome.registry_result.metadata.get("new_todos", [])
371382
             if isinstance(new_todos, list):
372
-                sync_todos_to_definition_of_done(dod, new_todos)
383
+                sync_todos_to_definition_of_done(
384
+                    dod,
385
+                    new_todos,
386
+                    project_root=self.context.project_root,
387
+                )
373388
                 self.dod_store.save(dod)
374389
 
375390
     async def _run_clarify_round(
@@ -720,3 +735,37 @@ class WorkflowLaneRunner:
720735
             decision_boundaries=list(brief.decision_boundaries),
721736
             likely_touchpoints=list(brief.likely_touchpoints),
722737
         )
738
+
739
+
740
+def _preserved_file_change_items(
741
+    dod: DefinitionOfDone,
742
+    *,
743
+    project_root: Path,
744
+) -> list[str]:
745
+    items: list[str] = []
746
+    for target, expect_directory in collect_planned_artifact_targets(
747
+        dod,
748
+        project_root=project_root,
749
+        max_paths=24,
750
+    ):
751
+        path_text = str(target)
752
+        if expect_directory and not path_text.endswith("/"):
753
+            path_text += "/"
754
+        items.append(f"`{path_text}`")
755
+    return items
756
+
757
+
758
+def _planned_file_names_for_refresh(
759
+    dod: DefinitionOfDone,
760
+    *,
761
+    project_root: Path,
762
+) -> set[str]:
763
+    return {
764
+        target.name.lower()
765
+        for target, expect_directory in collect_planned_artifact_targets(
766
+            dod,
767
+            project_root=project_root,
768
+            max_paths=24,
769
+        )
770
+        if not expect_directory
771
+    }
src/loader/runtime/workflow_recovery.pymodified
@@ -10,7 +10,7 @@ from .artifact_invalidation import (
1010
     WorkflowRecoveryStrategy,
1111
 )
1212
 from .context import RuntimeContext
13
-from .dod import DefinitionOfDone
13
+from .dod import DefinitionOfDone, collect_planned_artifact_targets
1414
 from .events import AgentEvent, TurnSummary
1515
 from .executor import ToolExecutor
1616
 from .workflow import (
@@ -128,6 +128,10 @@ class WorkflowRecoveryController:
128128
     def plan_freshness(self, dod: DefinitionOfDone) -> ArtifactFreshness:
129129
         """Assess whether the persisted workflow artifacts are stale."""
130130
 
131
+        planned_artifacts_complete = not _first_missing_planned_artifact(
132
+            dod,
133
+            project_root=self.context.project_root,
134
+        )
131135
         return self.artifact_invalidation.assess(
132136
             task_statement=dod.task_statement,
133137
             clarify_text=self._artifact_text(dod.clarify_brief),
@@ -136,6 +140,8 @@ class WorkflowRecoveryController:
136140
             acceptance_criteria=list(dod.acceptance_criteria),
137141
             touched_files=list(dod.touched_files),
138142
             last_verification_result=dod.last_verification_result,
143
+            retry_count=dod.retry_count,
144
+            planned_artifacts_complete=planned_artifacts_complete,
139145
         )
140146
 
141147
     async def _run_plan_refresh_reentry(
@@ -198,6 +204,25 @@ class WorkflowRecoveryController:
198204
             ),
199205
             None,
200206
         )
207
+        missing_artifact = _first_missing_planned_artifact(
208
+            dod,
209
+            project_root=self.context.project_root,
210
+        )
211
+        if _should_prioritize_missing_artifact(
212
+            next_pending=next_pending,
213
+            missing_artifact=missing_artifact,
214
+        ):
215
+            target, expect_directory = missing_artifact
216
+            label = target.name or str(target)
217
+            if expect_directory and not label.endswith("/"):
218
+                label += "/"
219
+            self.context.queue_steering_message(
220
+                "Plan refresh preserved the progress already made. "
221
+                "Reuse the existing files and confirmed facts, then resume by creating "
222
+                f"`{label}`. Prefer one concrete mutation step for `{target}` before "
223
+                "any more review or consistency-check work."
224
+            )
225
+            return True
201226
         if next_pending:
202227
             self.context.queue_steering_message(
203228
                 "Plan refresh preserved the progress already made. "
@@ -350,3 +375,62 @@ class WorkflowRecoveryController:
350375
     @staticmethod
351376
     def _recovery_evidence_summary(freshness: ArtifactFreshness) -> list[str]:
352377
         return list(freshness.evidence_summary)
378
+
379
+
380
+def _first_missing_planned_artifact(
381
+    dod: DefinitionOfDone,
382
+    *,
383
+    project_root: Path,
384
+) -> tuple[Path, bool] | None:
385
+    for target, expect_directory in collect_planned_artifact_targets(
386
+        dod,
387
+        project_root=project_root,
388
+        max_paths=12,
389
+    ):
390
+        exists = target.is_dir() if expect_directory else target.is_file()
391
+        if not exists:
392
+            return target, expect_directory
393
+    return None
394
+
395
+
396
+def _should_prioritize_missing_artifact(
397
+    *,
398
+    next_pending: str | None,
399
+    missing_artifact: tuple[Path, bool] | None,
400
+) -> bool:
401
+    if missing_artifact is None:
402
+        return False
403
+    if not next_pending:
404
+        return True
405
+    lowered = next_pending.lower()
406
+    if any(
407
+        hint in lowered
408
+        for hint in (
409
+            "verify",
410
+            "validation",
411
+            "validate",
412
+            "review",
413
+            "consistent",
414
+            "consistently",
415
+            "linked",
416
+            "format",
417
+            "formatted",
418
+        )
419
+    ):
420
+        return True
421
+    return not any(
422
+        hint in lowered
423
+        for hint in (
424
+            "create",
425
+            "update",
426
+            "edit",
427
+            "write",
428
+            "fix",
429
+            "modify",
430
+            "change",
431
+            "patch",
432
+            "replace",
433
+            "correct",
434
+            "rewrite",
435
+        )
436
+    )
src/loader/tools/workflow_tools.pymodified
@@ -117,6 +117,7 @@ class TodoWriteTool(Tool):
117117
 
118118
         store_path = self._store_path()
119119
         old_todos = await asyncio.to_thread(self._read_existing_items, store_path)
120
+        items = self._merge_partial_update(old_todos, items)
120121
 
121122
         all_done = all(item.status == "completed" for item in items)
122123
         persisted_items = [] if all_done else [item.to_dict() for item in items]
@@ -144,6 +145,29 @@ class TodoWriteTool(Tool):
144145
             metadata=payload,
145146
         )
146147
 
148
+    def _merge_partial_update(
149
+        self,
150
+        old_todos: list[dict[str, Any]],
151
+        items: list[TodoItem],
152
+    ) -> list[TodoItem]:
153
+        """Preserve omitted todos when the model sends a narrow status update."""
154
+
155
+        old_items = [TodoItem.from_dict(item) for item in old_todos if isinstance(item, dict)]
156
+        if not old_items or len(items) >= len(old_items):
157
+            return items
158
+
159
+        old_by_content = {item.content: item for item in old_items if item.content}
160
+        if not old_by_content:
161
+            return items
162
+        if not all(item.content in old_by_content for item in items):
163
+            return items
164
+
165
+        updates = {item.content: item for item in items}
166
+        merged: list[TodoItem] = []
167
+        for old_item in old_items:
168
+            merged.append(updates.get(old_item.content, old_item))
169
+        return merged
170
+
147171
     def _store_path(self) -> Path:
148172
         return active_todo_store_path(self.workspace_root or Path.cwd())
149173
 
tests/test_artifact_invalidation.pymodified
@@ -92,3 +92,49 @@ def test_artifact_invalidation_treats_path_separator_variants_as_same_touchpoint
9292
     assert freshness.stale_plan is False
9393
     assert freshness.stale_brief is False
9494
     assert "touched_files_outside_plan" not in freshness.reason_codes
95
+
96
+
97
+def test_artifact_invalidation_allows_supplemental_repair_files_after_failed_verification() -> None:
98
+    assessor = ArtifactInvalidationAssessor()
99
+
100
+    freshness = assessor.assess(
101
+        task_statement="Build a multi-file nginx guide.",
102
+        clarify_text=None,
103
+        implementation_text=(
104
+            "# Implementation Plan\n"
105
+            "- Create index.html.\n"
106
+            "- Create 01-getting-started.html.\n"
107
+            "- Create 02-installation.html.\n"
108
+        ),
109
+        verification_text=(
110
+            "# Verification Plan\n"
111
+            "## Acceptance Criteria\n"
112
+            "- index.html exists.\n"
113
+            "- 01-getting-started.html exists.\n"
114
+            "- 02-installation.html exists.\n"
115
+        ),
116
+        acceptance_criteria=[
117
+            "index.html exists.",
118
+            "01-getting-started.html exists.",
119
+            "02-installation.html exists.",
120
+        ],
121
+        touched_files=[
122
+            "/tmp/guides/nginx/index.html",
123
+            "/tmp/guides/nginx/chapters/01-getting-started.html",
124
+            "/tmp/guides/nginx/chapters/02-installation.html",
125
+            "/tmp/guides/nginx/styles.css",
126
+        ],
127
+        last_verification_result="planned",
128
+        retry_count=1,
129
+        planned_artifacts_complete=True,
130
+    )
131
+
132
+    assert freshness.stale_plan is False
133
+    assert freshness.stale_brief is False
134
+    assert freshness.recovery_strategy == WorkflowRecoveryStrategy.NONE.value
135
+    assert "touched_files_outside_plan" not in freshness.reason_codes
136
+    assert any(
137
+        item.kind == ArtifactEvidenceKind.CONFIRMED_TOUCHPOINT.value
138
+        and "styles.css" in item.summary
139
+        for item in freshness.evidence
140
+    )
tests/test_compaction.pymodified
@@ -149,12 +149,12 @@ def test_build_session_summary_preserves_confirmed_facts_and_next_step() -> None
149149
 
150150
     assert "Confirmed facts:" in summary
151151
     assert "02-basic-syntax.html -> 02-setup.html" in summary
152
-    assert "02-setup.html = Chapter 2: Setting Up Fortran" in summary
152
+    assert "02-setup.html = Chapter 2: Setting Up Fortran" not in summary
153153
     assert "Preferred next step:" in summary
154154
     assert "`~/Loader/guides/fortran/index.html`" in summary
155155
 
156156
 
157
-def test_summarize_confirmed_facts_extracts_chapter_titles_from_read_results() -> None:
157
+def test_summarize_confirmed_facts_ignores_reference_chapter_title_reads() -> None:
158158
     messages = [
159159
         Message(
160160
             role=Role.ASSISTANT,
@@ -186,10 +186,7 @@ def test_summarize_confirmed_facts_extracts_chapter_titles_from_read_results() -
186186
 
187187
     confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
188188
 
189
-    assert confirmed_facts is not None
190
-    assert "Chapter titles confirmed:" in confirmed_facts
191
-    assert "01-introduction.html = Chapter 1: Introduction to Fortran" in confirmed_facts
192
-    assert "02-setup.html = Chapter 2: Setting Up Fortran" in confirmed_facts
189
+    assert confirmed_facts is None
193190
 
194191
 
195192
 def test_infer_preferred_next_step_uses_confirmed_chapter_pairs() -> None:
@@ -222,10 +219,7 @@ def test_infer_preferred_next_step_uses_confirmed_chapter_pairs() -> None:
222219
         current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
223220
     )
224221
 
225
-    assert next_step == (
226
-        "Update `/tmp/fortran/index.html` using the confirmed chapter file/title pairs "
227
-        "instead of rereading files."
228
-    )
222
+    assert next_step is None
229223
 
230224
 
231225
 def test_infer_preferred_next_step_uses_latest_verification_gap() -> None:
@@ -278,13 +272,8 @@ def test_infer_preferred_next_step_uses_latest_verification_gap() -> None:
278272
         current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
279273
     )
280274
 
281
-    assert confirmed_facts is not None
282
-    assert "Verification gaps: missing TOC links chapters/05-control-structures.html" in confirmed_facts
283
-    assert next_step == (
284
-        "Update `/tmp/fortran/index.html` to fix the specific verification failures "
285
-        "(missing TOC links chapters/05-control-structures.html, "
286
-        "chapters/06-input-output.html) instead of restarting discovery."
287
-    )
275
+    assert confirmed_facts is None
276
+    assert next_step is None
288277
 
289278
 
290279
 def test_compact_session_messages_uses_single_continuation_instruction_block() -> None:
tests/test_dod.pymodified
@@ -6,8 +6,10 @@ from loader.llm.base import ToolCall
66
 from loader.runtime.dod import (
77
     DefinitionOfDoneStore,
88
     VerificationEvidence,
9
+    all_planned_artifacts_exist,
910
     begin_new_verification_attempt,
1011
     build_verification_summary,
12
+    collect_planned_artifact_targets,
1113
     create_definition_of_done,
1214
     derive_verification_commands,
1315
     determine_task_size,
@@ -166,6 +168,172 @@ def test_derive_verification_commands_avoids_repo_defaults_for_external_artifact
166168
     assert commands == [f"test -f {external_index}"]
167169
 
168170
 
171
+def test_derive_verification_commands_adds_generic_local_html_link_check(
172
+    tmp_path: Path,
173
+) -> None:
174
+    docs = tmp_path / "docs"
175
+    docs.mkdir()
176
+    index = docs / "index.html"
177
+    index.write_text('<a href="chapters/01-intro.html">Intro</a>\n')
178
+
179
+    dod = create_definition_of_done("Create a small multi-page HTML guide.")
180
+    dod.touched_files = [str(index)]
181
+
182
+    commands = derive_verification_commands(
183
+        dod,
184
+        project_root=tmp_path,
185
+        task_statement=dod.task_statement,
186
+        supplement_existing=True,
187
+    )
188
+
189
+    assert any("Missing local HTML links:" in command for command in commands)
190
+
191
+
192
+def test_derive_verification_commands_adds_planned_artifact_existence_checks(
193
+    tmp_path: Path,
194
+) -> None:
195
+    implementation_plan = tmp_path / "implementation.md"
196
+    implementation_plan.write_text(
197
+        "\n".join(
198
+            [
199
+                "# Implementation Plan",
200
+                "",
201
+                "## File Changes",
202
+                "- `docs/index.html`",
203
+                "- `docs/chapters/01-intro.html`",
204
+                "- `docs/chapters/02-installation.html`",
205
+                "- `docs/chapters/`",
206
+            ]
207
+        )
208
+    )
209
+
210
+    dod = create_definition_of_done("Create a multi-page HTML guide.")
211
+    dod.implementation_plan = str(implementation_plan)
212
+
213
+    commands = derive_verification_commands(
214
+        dod,
215
+        project_root=tmp_path,
216
+        task_statement=dod.task_statement,
217
+        supplement_existing=True,
218
+    )
219
+
220
+    assert f"test -f {tmp_path / 'docs/index.html'}" in commands
221
+    assert f"test -f {tmp_path / 'docs/chapters/01-intro.html'}" in commands
222
+    assert f"test -f {tmp_path / 'docs/chapters/02-installation.html'}" in commands
223
+    assert f"test -d {tmp_path / 'docs/chapters'}" in commands
224
+
225
+
226
+def test_collect_planned_artifact_targets_ignores_prose_path_fragments_in_refreshed_plan(
227
+    tmp_path: Path,
228
+) -> None:
229
+    implementation_plan = tmp_path / "implementation.md"
230
+    touched_index = tmp_path / "external" / "guides" / "nginx" / "index.html"
231
+    touched_index.parent.mkdir(parents=True)
232
+    touched_index.write_text("<html></html>\n")
233
+    implementation_plan.write_text(
234
+        "\n".join(
235
+            [
236
+                "# Implementation Plan",
237
+                "",
238
+                "## File Changes",
239
+                "- Created main index.html file with proper structure and navigation",
240
+                "- Created the nginx guide directory structure (chapters/)",
241
+                "- Created the first chapter file (01-introduction.html) with appropriate content",
242
+                "",
243
+                "## Confirmed Progress",
244
+                f"- Already touched during execution: `{touched_index}`.",
245
+            ]
246
+        )
247
+    )
248
+
249
+    dod = create_definition_of_done("Create an external nginx guide.")
250
+    dod.implementation_plan = str(implementation_plan)
251
+
252
+    targets = collect_planned_artifact_targets(dod, project_root=tmp_path)
253
+
254
+    assert (tmp_path / "chapters", True) not in targets
255
+    assert (tmp_path / "01-introduction.html", False) not in targets
256
+    assert targets == [(touched_index, False)]
257
+
258
+
259
+def test_all_planned_artifacts_exist_requires_file_contents_for_planned_output_directory(
260
+    tmp_path: Path,
261
+) -> None:
262
+    implementation_plan = tmp_path / "implementation.md"
263
+    implementation_plan.write_text(
264
+        "\n".join(
265
+            [
266
+                "# Implementation Plan",
267
+                "",
268
+                "## File Changes",
269
+                f"- `{tmp_path / 'guide' / 'index.html'}`",
270
+                f"- `{tmp_path / 'guide' / 'chapters'}/` (directory for chapter files)",
271
+                "",
272
+                "## Execution Order",
273
+                "- Create chapter files with appropriate content",
274
+            ]
275
+        )
276
+    )
277
+
278
+    guide_root = tmp_path / "guide"
279
+    chapters = guide_root / "chapters"
280
+    guide_root.mkdir()
281
+    chapters.mkdir()
282
+    (guide_root / "index.html").write_text("<html></html>\n")
283
+
284
+    dod = create_definition_of_done("Create a multi-file guide with chapters.")
285
+    dod.implementation_plan = str(implementation_plan)
286
+    dod.completed_items = ["Create chapter files with appropriate content"]
287
+
288
+    assert all_planned_artifacts_exist(dod, project_root=tmp_path) is False
289
+
290
+    (chapters / "01-getting-started.html").write_text("<h1>Intro</h1>\n")
291
+
292
+    assert all_planned_artifacts_exist(dod, project_root=tmp_path) is True
293
+
294
+
295
+def test_all_planned_artifacts_exist_stays_false_while_touched_html_links_missing(
296
+    tmp_path: Path,
297
+) -> None:
298
+    implementation_plan = tmp_path / "implementation.md"
299
+    implementation_plan.write_text(
300
+        "\n".join(
301
+            [
302
+                "# Implementation Plan",
303
+                "",
304
+                "## File Changes",
305
+                f"- `{tmp_path / 'guide' / 'index.html'}`",
306
+                f"- `{tmp_path / 'guide' / 'chapters'}/` (directory for chapter files)",
307
+                "",
308
+                "## Execution Order",
309
+                "- Create chapter files with appropriate content",
310
+            ]
311
+        )
312
+    )
313
+
314
+    guide_root = tmp_path / "guide"
315
+    chapters = guide_root / "chapters"
316
+    guide_root.mkdir()
317
+    chapters.mkdir()
318
+    index = guide_root / "index.html"
319
+    index.write_text(
320
+        '<a href="chapters/01-introduction.html">Intro</a>\n'
321
+        '<a href="chapters/02-setup.html">Setup</a>\n'
322
+    )
323
+    (chapters / "01-introduction.html").write_text("<h1>Intro</h1>\n")
324
+
325
+    dod = create_definition_of_done("Create a multi-file guide with chapters.")
326
+    dod.implementation_plan = str(implementation_plan)
327
+    dod.touched_files = [str(index), str(chapters / "01-introduction.html")]
328
+    dod.completed_items = ["Create chapter files with appropriate content"]
329
+
330
+    assert all_planned_artifacts_exist(dod, project_root=tmp_path) is False
331
+
332
+    (chapters / "02-setup.html").write_text("<h1>Setup</h1>\n")
333
+
334
+    assert all_planned_artifacts_exist(dod, project_root=tmp_path) is True
335
+
336
+
169337
 def test_build_verification_summary_keeps_concrete_missing_link_details() -> None:
170338
     summary = build_verification_summary(
171339
         [
tests/test_finalization.pymodified
@@ -10,10 +10,17 @@ import pytest
1010
 from loader.llm.base import Message, Role, ToolCall
1111
 from loader.runtime.completion_trace import CompletionTraceEntry
1212
 from loader.runtime.context import RuntimeContext
13
-from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
13
+from loader.runtime.dod import (
14
+    DefinitionOfDoneStore,
15
+    VerificationEvidence,
16
+    create_definition_of_done,
17
+)
1418
 from loader.runtime.events import TurnSummary
1519
 from loader.runtime.executor import ToolExecutionOutcome, ToolExecutionState
16
-from loader.runtime.finalization import TurnFinalizer
20
+from loader.runtime.finalization import (
21
+    TurnFinalizer,
22
+    _build_verification_repair_guidance,
23
+)
1724
 from loader.runtime.permissions import (
1825
     PermissionMode,
1926
     build_permission_policy,
@@ -129,6 +136,25 @@ class RecordingExecutor:
129136
         )
130137
 
131138
 
139
+class SelectiveRecordingExecutor:
140
+    def __init__(self, failing_match: str) -> None:
141
+        self.commands: list[str] = []
142
+        self.failing_match = failing_match
143
+
144
+    async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
145
+        command = str(tool_call.arguments.get("command", ""))
146
+        self.commands.append(command)
147
+        failed = self.failing_match in command
148
+        return tool_outcome(
149
+            tool_call=tool_call,
150
+            output="failed" if failed else "ok",
151
+            is_error=failed,
152
+            exit_code=1 if failed else 0,
153
+            stdout="" if failed else "ok",
154
+            stderr="failed" if failed else "",
155
+        )
156
+
157
+
132158
 def build_context(temp_dir: Path, session: FakeSession) -> RuntimeContext:
133159
     registry = create_default_registry(temp_dir)
134160
     registry.configure_workspace_root(temp_dir)
@@ -260,6 +286,65 @@ def test_turn_finalizer_finalize_summary_uses_runtime_context(
260286
     ]
261287
 
262288
 
289
+def test_verification_repair_guidance_uses_existing_artifacts_as_source_of_truth(
290
+    temp_dir: Path,
291
+) -> None:
292
+    guide_root = temp_dir / "guides" / "nginx"
293
+    chapters = guide_root / "chapters"
294
+    chapters.mkdir(parents=True)
295
+    index_path = guide_root / "index.html"
296
+    chapter_one = chapters / "01-getting-started.html"
297
+    chapter_two = chapters / "02-installation.html"
298
+    chapter_three = chapters / "03-first-website.html"
299
+    chapter_four = chapters / "04-configuration-basics.html"
300
+
301
+    for path in (index_path, chapter_one, chapter_two, chapter_three, chapter_four):
302
+        path.write_text("<html></html>\n")
303
+
304
+    implementation_plan = temp_dir / "implementation.md"
305
+    implementation_plan.write_text(
306
+        "\n".join(
307
+            [
308
+                "# Implementation Plan",
309
+                "",
310
+                "## File Changes",
311
+                f"- `{guide_root}/`",
312
+                f"- `{chapters}/`",
313
+                f"- `{index_path}`",
314
+                f"- `{chapter_one}`",
315
+                f"- `{chapter_two}`",
316
+                f"- `{chapter_three}`",
317
+                f"- `{chapter_four}`",
318
+                "",
319
+            ]
320
+        )
321
+    )
322
+
323
+    dod = create_definition_of_done("Repair the nginx guide index.")
324
+    dod.implementation_plan = str(implementation_plan)
325
+    dod.evidence = [
326
+        VerificationEvidence(
327
+            command="verify-links",
328
+            passed=False,
329
+            output=(
330
+                "Missing local HTML links:\n"
331
+                f"{index_path}:chapters/01-introduction.html -> {chapters / '01-introduction.html'}\n"
332
+                f"{index_path}:chapters/04-server-blocks.html -> {chapters / '04-server-blocks.html'}\n"
333
+            ),
334
+        )
335
+    ]
336
+
337
+    guidance = _build_verification_repair_guidance(
338
+        dod,
339
+        project_root=temp_dir,
340
+    )
341
+
342
+    assert "Use the existing artifact files as the source of truth" in guidance
343
+    assert str(chapter_one) in guidance
344
+    assert str(chapter_two) in guidance
345
+    assert str(chapter_four) in guidance
346
+
347
+
263348
 @pytest.mark.asyncio
264349
 async def test_turn_finalizer_records_skipped_verification_observation(
265350
     temp_dir: Path,
@@ -296,6 +381,8 @@ async def test_turn_finalizer_records_skipped_verification_observation(
296381
         "verification was skipped because no mutating work required checks"
297382
     ]
298383
     assert summary.verification_status == "skipped"
384
+    assert "Complete the requested work" not in dod.pending_items
385
+    assert "Complete the requested work" in dod.completed_items
299386
     assert session.workflow_timeline[-1].kind == "verify_skip"
300387
     assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
301388
         VerificationObservationStatus.SKIPPED.value
@@ -481,6 +568,76 @@ async def test_turn_finalizer_does_not_append_repo_defaults_to_external_verifica
481568
     ]
482569
 
483570
 
571
+@pytest.mark.asyncio
572
+async def test_turn_finalizer_blocks_completion_when_planned_artifacts_are_missing(
573
+    temp_dir: Path,
574
+) -> None:
575
+    docs = temp_dir / "docs"
576
+    chapters = docs / "chapters"
577
+    chapters.mkdir(parents=True)
578
+    index = docs / "index.html"
579
+    first = chapters / "01-intro.html"
580
+    second = chapters / "02-installation.html"
581
+    index.write_text(
582
+        "\n".join(
583
+            [
584
+                '<a href="chapters/01-intro.html">Intro</a>',
585
+                '<a href="chapters/02-installation.html">Installation</a>',
586
+            ]
587
+        )
588
+    )
589
+    first.write_text("<h1>Intro</h1>\n")
590
+    implementation_plan = temp_dir / "implementation.md"
591
+    implementation_plan.write_text(
592
+        "\n".join(
593
+            [
594
+                "# Implementation Plan",
595
+                "",
596
+                "## File Changes",
597
+                f"- `{index}`",
598
+                f"- `{first}`",
599
+                f"- `{second}`",
600
+            ]
601
+        )
602
+    )
603
+
604
+    session = FakeSession()
605
+    context = build_context(temp_dir, session)
606
+    finalizer = TurnFinalizer(
607
+        context,
608
+        RuntimeTracer(),
609
+        DefinitionOfDoneStore(temp_dir),
610
+        set_workflow_mode=_noop_set_workflow_mode,
611
+    )
612
+    dod = create_definition_of_done("Create a small multi-page HTML guide.")
613
+    dod.mutating_actions.append("write")
614
+    dod.touched_files.extend([str(index), str(first)])
615
+    dod.implementation_plan = str(implementation_plan)
616
+    dod.verification_commands = [f"ls -la {docs}"]
617
+    summary = TurnSummary(final_response="")
618
+    executor = RecordingExecutor()
619
+
620
+    async def capture(event) -> None:
621
+        return None
622
+
623
+    result = await finalizer.run_definition_of_done_gate(
624
+        dod=dod,
625
+        candidate_response="Finished the guide.",
626
+        emit=capture,
627
+        summary=summary,
628
+        executor=executor,  # type: ignore[arg-type]
629
+    )
630
+
631
+    assert result.should_continue is True
632
+    assert result.reason_code == "planned_artifacts_missing_continue"
633
+    assert executor.commands == []
634
+    assert dod.status == "draft"
635
+    assert "Complete the requested work" in dod.pending_items
636
+    assert "Complete the requested work" not in dod.completed_items
637
+    assert session.messages[-1].content.startswith("[PLANNED ARTIFACTS STILL MISSING]")
638
+    assert "`02-installation.html`" in session.messages[-1].content
639
+
640
+
484641
 @pytest.mark.asyncio
485642
 async def test_turn_finalizer_records_missing_verification_observation(
486643
     temp_dir: Path,
@@ -532,6 +689,146 @@ async def test_turn_finalizer_records_missing_verification_observation(
532689
     assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
533690
 
534691
 
692
+@pytest.mark.asyncio
693
+async def test_turn_finalizer_ignores_unplanned_expansion_pending_items_once_plan_exists(
694
+    temp_dir: Path,
695
+) -> None:
696
+    session = FakeSession()
697
+    context = build_context(temp_dir, session)
698
+    finalizer = TurnFinalizer(
699
+        context,
700
+        RuntimeTracer(),
701
+        DefinitionOfDoneStore(temp_dir),
702
+        set_workflow_mode=_noop_set_workflow_mode,
703
+    )
704
+
705
+    docs = temp_dir / "guides" / "nginx"
706
+    chapters = docs / "chapters"
707
+    docs.mkdir(parents=True)
708
+    chapters.mkdir()
709
+    index = docs / "index.html"
710
+    first = chapters / "01-getting-started.html"
711
+    second = chapters / "02-installation.html"
712
+    index.write_text("<html></html>\n")
713
+    first.write_text("<h1>One</h1>\n")
714
+    second.write_text("<h1>Two</h1>\n")
715
+
716
+    implementation_plan = temp_dir / "implementation.md"
717
+    implementation_plan.write_text(
718
+        "\n".join(
719
+            [
720
+                "# Implementation Plan",
721
+                "",
722
+                "## File Changes",
723
+                f"- `{docs}/`",
724
+                f"- `{chapters}/`",
725
+                f"- `{index}`",
726
+                f"- `{first}`",
727
+                f"- `{second}`",
728
+                "",
729
+            ]
730
+        )
731
+    )
732
+
733
+    dod = create_definition_of_done("Create a small multi-page HTML guide.")
734
+    dod.implementation_plan = str(implementation_plan)
735
+    dod.pending_items = [
736
+        "Create 07-performance-tuning.html",
737
+        "Complete the requested work",
738
+    ]
739
+    summary = TurnSummary(final_response="")
740
+
741
+    async def capture(event) -> None:
742
+        return None
743
+
744
+    result = await finalizer.run_definition_of_done_gate(
745
+        dod=dod,
746
+        candidate_response="Finished the guide.",
747
+        emit=capture,
748
+        summary=summary,
749
+        executor=FakeExecutor([]),  # type: ignore[arg-type]
750
+    )
751
+
752
+    assert result.should_continue is False
753
+    assert result.reason_code == "non_mutating_response_accepted"
754
+
755
+
756
+@pytest.mark.asyncio
757
+async def test_turn_finalizer_verification_failure_reentry_points_at_concrete_repair(
758
+    temp_dir: Path,
759
+    monkeypatch: pytest.MonkeyPatch,
760
+) -> None:
761
+    session = FakeSession()
762
+    context = build_context(temp_dir, session)
763
+    queued_messages: list[str] = []
764
+    context.queue_steering_message_callback = queued_messages.append
765
+    finalizer = TurnFinalizer(
766
+        context,
767
+        RuntimeTracer(),
768
+        DefinitionOfDoneStore(temp_dir),
769
+        set_workflow_mode=_noop_set_workflow_mode,
770
+    )
771
+    broken_file = temp_dir / "guides" / "nginx" / "chapters" / "05-advanced-configurations.html"
772
+    broken_file.parent.mkdir(parents=True, exist_ok=True)
773
+    broken_file.write_text('<link rel="stylesheet" href="../styles.css">\n')
774
+    missing_target = temp_dir / "guides" / "nginx" / "styles.css"
775
+    dod = create_definition_of_done("Create the nginx guide.")
776
+    dod.mutating_actions.append("write")
777
+    dod.touched_files.append(str(broken_file))
778
+    dod.verification_commands = ["python3 verify_links.py"]
779
+    summary = TurnSummary(final_response="")
780
+    verify_call = ToolCall(
781
+        id="verify-1-1",
782
+        name="bash",
783
+        arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
784
+    )
785
+    failure_output = (
786
+        "Missing local HTML links:\n"
787
+        f"{broken_file}:../styles.css -> {missing_target}\n"
788
+    )
789
+
790
+    async def capture(event) -> None:
791
+        return None
792
+
793
+    monkeypatch.setattr(
794
+        "loader.runtime.finalization.derive_verification_commands",
795
+        lambda *args, **kwargs: [],
796
+    )
797
+
798
+    result = await finalizer.run_definition_of_done_gate(
799
+        dod=dod,
800
+        candidate_response="The guide is complete.",
801
+        emit=capture,
802
+        summary=summary,
803
+        executor=FakeExecutor(
804
+            [
805
+                tool_outcome(
806
+                    tool_call=verify_call,
807
+                    output=failure_output,
808
+                    is_error=True,
809
+                    exit_code=1,
810
+                    stdout=failure_output,
811
+                )
812
+            ]
813
+        ),  # type: ignore[arg-type]
814
+    )
815
+
816
+    assert result.should_continue is True
817
+    assert result.reason_code == "verification_failed_reentry"
818
+    assert queued_messages
819
+    assert str(broken_file) in queued_messages[-1]
820
+    assert "../styles.css" in queued_messages[-1]
821
+    assert str(missing_target) in queued_messages[-1]
822
+    assert "Do not restart discovery or reread unrelated references." in queued_messages[-1]
823
+    assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
824
+    assert f"Immediate next step: edit `{broken_file}`." in session.messages[-1].content
825
+    assert f"create `{missing_target}`" in session.messages[-1].content
826
+    assert (
827
+        "Do not reread unrelated reference materials or restart discovery"
828
+        in session.messages[-1].content
829
+    )
830
+
831
+
535832
 @pytest.mark.asyncio
536833
 async def test_turn_finalizer_does_not_reverify_without_new_changes(
537834
     temp_dir: Path,
tests/test_permissions.pymodified
1036 lines changed — click to load
@@ -6,15 +6,20 @@ from pathlib import Path
66
 
77
 import pytest
88
 
9
-from loader.llm.base import ToolCall
9
+from loader.llm.base import Message, Role, ToolCall
10
+from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
1011
 from loader.runtime.executor import ToolExecutionState, ToolExecutor
1112
 from loader.runtime.hooks import (
13
+    ActiveRepairMutationScopeHook,
14
+    ActiveRepairScopeHook,
1215
     BaseToolHook,
1316
     FilePathAliasHook,
14
-    HookDecision,
1517
     HookContext,
18
+    HookDecision,
1619
     HookManager,
1720
     HookResult,
21
+    LateReferenceDriftHook,
22
+    RelativePathContextHook,
1823
     SearchPathAliasHook,
1924
 )
2025
 from loader.runtime.permissions import (
@@ -24,6 +29,7 @@ from loader.runtime.permissions import (
2429
     PermissionRuleSet,
2530
     build_permission_policy,
2631
 )
32
+from loader.runtime.safeguard_services import ActionTracker
2733
 from loader.runtime.tracing import RuntimeTracer
2834
 from loader.tools.base import create_default_registry
2935
 
@@ -413,3 +419,1007 @@ async def test_search_path_alias_hook_splits_full_glob_pattern(
413419
     assert result.updated_arguments is not None
414420
     assert result.updated_arguments["path"] == str(chapters)
415421
     assert result.updated_arguments["pattern"] == "*.html"
422
+
423
+
424
+@pytest.mark.asyncio
425
+async def test_relative_path_context_hook_remaps_workspace_mirror_of_external_root(
426
+    temp_dir: Path,
427
+) -> None:
428
+    workspace_root = temp_dir / "workspace"
429
+    workspace_root.mkdir()
430
+    external_root = temp_dir / "external-home"
431
+    external_fortran = external_root / "Loader" / "guides" / "fortran"
432
+    external_fortran.mkdir(parents=True)
433
+    (external_fortran / "index.html").write_text("<html></html>\n")
434
+    (external_root / "Loader" / "guides").mkdir(exist_ok=True)
435
+
436
+    registry = create_default_registry(workspace_root)
437
+    policy = build_permission_policy(
438
+        active_mode=PermissionMode.WORKSPACE_WRITE,
439
+        workspace_root=workspace_root,
440
+        tool_requirements=registry.get_tool_requirements(),
441
+    )
442
+    action_tracker = ActionTracker()
443
+    action_tracker.record_tool_call(
444
+        "read",
445
+        {"file_path": str(external_fortran / "index.html")},
446
+    )
447
+    hook = RelativePathContextHook(action_tracker, workspace_root)
448
+
449
+    mirrored_workspace_path = workspace_root / "Loader" / "guides" / "nginx" / "index.html"
450
+    expected_external_path = external_root / "Loader" / "guides" / "nginx" / "index.html"
451
+
452
+    result = await hook.pre_tool_use(
453
+        HookContext(
454
+            tool_call=ToolCall(
455
+                id="write-1",
456
+                name="write",
457
+                arguments={
458
+                    "file_path": str(mirrored_workspace_path),
459
+                    "content": "<html></html>\n",
460
+                },
461
+            ),
462
+            tool=registry.get("write"),
463
+            registry=registry,
464
+            permission_policy=policy,
465
+            source="native",
466
+        )
467
+    )
468
+
469
+    assert result.updated_arguments is not None
470
+    assert Path(result.updated_arguments["file_path"]).resolve() == expected_external_path.resolve()
471
+
472
+
473
+class FakeSession:
474
+    def __init__(self, *, active_dod_path: str, messages: list[Message]) -> None:
475
+        self.active_dod_path = active_dod_path
476
+        self.messages = messages
477
+
478
+
479
+@pytest.mark.asyncio
480
+async def test_active_repair_scope_hook_blocks_reference_reads_while_fixing(
481
+    temp_dir: Path,
482
+) -> None:
483
+    registry = create_default_registry(temp_dir)
484
+    policy = build_permission_policy(
485
+        active_mode=PermissionMode.WORKSPACE_WRITE,
486
+        workspace_root=temp_dir,
487
+        tool_requirements=registry.get_tool_requirements(),
488
+    )
489
+    dod_store = DefinitionOfDoneStore(temp_dir)
490
+    dod = create_definition_of_done("Repair the active artifact set")
491
+    dod.status = "fixing"
492
+    dod_path = dod_store.save(dod)
493
+    repair_target = temp_dir / "guide" / "index.html"
494
+    session = FakeSession(
495
+        active_dod_path=str(dod_path),
496
+        messages=[
497
+            Message(
498
+                role=Role.ASSISTANT,
499
+                content=(
500
+                    "Repair focus:\n"
501
+                    f"- Fix the broken local reference `chapters/01-introduction.html` in `{repair_target}`.\n"
502
+                    f"- Immediate next step: edit `{repair_target}`.\n"
503
+                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '01-introduction.html'}`; otherwise remove or replace `chapters/01-introduction.html`.\n"
504
+                ),
505
+            )
506
+        ],
507
+    )
508
+    hook = ActiveRepairScopeHook(
509
+        dod_store=dod_store,
510
+        project_root=temp_dir,
511
+        session=session,
512
+    )
513
+
514
+    result = await hook.pre_tool_use(
515
+        HookContext(
516
+            tool_call=ToolCall(
517
+                id="read-1",
518
+                name="read",
519
+                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
520
+            ),
521
+            tool=registry.get("read"),
522
+            registry=registry,
523
+            permission_policy=policy,
524
+            source="native",
525
+        )
526
+    )
527
+
528
+    assert result.decision == HookDecision.DENY
529
+    assert result.terminal_state == "blocked"
530
+    assert result.message is not None
531
+    assert "active repair scope" in result.message
532
+    assert str(repair_target) in result.message
533
+
534
+
535
+@pytest.mark.asyncio
536
+async def test_active_repair_scope_hook_allows_reads_inside_active_artifact_set(
537
+    temp_dir: Path,
538
+) -> None:
539
+    registry = create_default_registry(temp_dir)
540
+    policy = build_permission_policy(
541
+        active_mode=PermissionMode.WORKSPACE_WRITE,
542
+        workspace_root=temp_dir,
543
+        tool_requirements=registry.get_tool_requirements(),
544
+    )
545
+    dod_store = DefinitionOfDoneStore(temp_dir)
546
+    dod = create_definition_of_done("Repair the active artifact set")
547
+    dod.status = "fixing"
548
+    dod_path = dod_store.save(dod)
549
+    repair_target = temp_dir / "guide" / "index.html"
550
+    chapter_path = temp_dir / "guide" / "chapters" / "01-getting-started.html"
551
+    session = FakeSession(
552
+        active_dod_path=str(dod_path),
553
+        messages=[
554
+            Message(
555
+                role=Role.ASSISTANT,
556
+                content=(
557
+                    "Repair focus:\n"
558
+                    f"- Fix the broken local reference `chapters/01-getting-started.html` in `{repair_target}`.\n"
559
+                    f"- Fix the broken local reference `../styles.css` in `{chapter_path}`.\n"
560
+                    f"- Immediate next step: edit `{repair_target}`.\n"
561
+                    f"- If the broken reference should remain, create `{chapter_path}`; otherwise remove or replace `chapters/01-getting-started.html`.\n"
562
+                ),
563
+            )
564
+        ],
565
+    )
566
+    hook = ActiveRepairScopeHook(
567
+        dod_store=dod_store,
568
+        project_root=temp_dir,
569
+        session=session,
570
+    )
571
+
572
+    result = await hook.pre_tool_use(
573
+        HookContext(
574
+            tool_call=ToolCall(
575
+                id="read-1",
576
+                name="read",
577
+                arguments={"file_path": str(chapter_path)},
578
+            ),
579
+            tool=registry.get("read"),
580
+            registry=registry,
581
+            permission_policy=policy,
582
+            source="native",
583
+        )
584
+    )
585
+
586
+    assert result.decision == HookDecision.CONTINUE
587
+
588
+
589
+@pytest.mark.asyncio
590
+async def test_active_repair_scope_hook_allows_verification_source_outside_repair_target(
591
+    temp_dir: Path,
592
+) -> None:
593
+    registry = create_default_registry(temp_dir)
594
+    policy = build_permission_policy(
595
+        active_mode=PermissionMode.WORKSPACE_WRITE,
596
+        workspace_root=temp_dir,
597
+        tool_requirements=registry.get_tool_requirements(),
598
+    )
599
+    dod_store = DefinitionOfDoneStore(temp_dir)
600
+    dod = create_definition_of_done("Repair the active artifact set")
601
+    dod.status = "in_progress"
602
+    dod_path = dod_store.save(dod)
603
+    repair_target = temp_dir / "guide" / "chapters" / "06-troubleshooting.html"
604
+    session = FakeSession(
605
+        active_dod_path=str(dod_path),
606
+        messages=[
607
+            Message(
608
+                role=Role.ASSISTANT,
609
+                content=(
610
+                    "Repair focus:\n"
611
+                    f"- Fix the broken local reference `01-introduction.html` in `{repair_target}`.\n"
612
+                    f"- Immediate next step: edit `{repair_target}`.\n"
613
+                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
614
+                ),
615
+            )
616
+        ],
617
+    )
618
+    hook = ActiveRepairScopeHook(
619
+        dod_store=dod_store,
620
+        project_root=temp_dir,
621
+        session=session,
622
+    )
623
+
624
+    result = await hook.pre_tool_use(
625
+        HookContext(
626
+            tool_call=ToolCall(
627
+                id="verify-1",
628
+                name="read",
629
+                arguments={"file_path": str(temp_dir / "guide" / "index.html")},
630
+            ),
631
+            tool=registry.get("read"),
632
+            registry=registry,
633
+            permission_policy=policy,
634
+            source="verification",
635
+        )
636
+    )
637
+
638
+    assert result.decision == HookDecision.CONTINUE
639
+
640
+
641
+@pytest.mark.asyncio
642
+async def test_active_repair_scope_hook_blocks_local_rereads_outside_concrete_repair_files(
643
+    temp_dir: Path,
644
+) -> None:
645
+    registry = create_default_registry(temp_dir)
646
+    policy = build_permission_policy(
647
+        active_mode=PermissionMode.WORKSPACE_WRITE,
648
+        workspace_root=temp_dir,
649
+        tool_requirements=registry.get_tool_requirements(),
650
+    )
651
+    dod_store = DefinitionOfDoneStore(temp_dir)
652
+    dod = create_definition_of_done("Repair the active artifact set")
653
+    dod.status = "in_progress"
654
+    dod_path = dod_store.save(dod)
655
+    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
656
+    stylesheet = temp_dir / "guide" / "styles.css"
657
+    other_chapter = temp_dir / "guide" / "chapters" / "01-getting-started.html"
658
+    session = FakeSession(
659
+        active_dod_path=str(dod_path),
660
+        messages=[
661
+            Message(
662
+                role=Role.ASSISTANT,
663
+                content=(
664
+                    "Repair focus:\n"
665
+                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
666
+                    f"- Fix the broken local reference `../styles.css` in `{temp_dir / 'guide' / 'chapters' / '06-troubleshooting.html'}`.\n"
667
+                    f"- Immediate next step: edit `{repair_target}`.\n"
668
+                    f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
669
+                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
670
+                ),
671
+            )
672
+        ],
673
+    )
674
+    hook = ActiveRepairScopeHook(
675
+        dod_store=dod_store,
676
+        project_root=temp_dir,
677
+        session=session,
678
+    )
679
+
680
+    result = await hook.pre_tool_use(
681
+        HookContext(
682
+            tool_call=ToolCall(
683
+                id="read-1",
684
+                name="read",
685
+                arguments={"file_path": str(other_chapter)},
686
+            ),
687
+            tool=registry.get("read"),
688
+            registry=registry,
689
+            permission_policy=policy,
690
+            source="native",
691
+        )
692
+    )
693
+
694
+    assert result.decision == HookDecision.DENY
695
+    assert result.terminal_state == "blocked"
696
+    assert result.message is not None
697
+    assert "active repair scope" in result.message
698
+    assert str(repair_target) in result.message
699
+    assert str(stylesheet) in result.message
700
+
701
+
702
+@pytest.mark.asyncio
703
+async def test_active_repair_scope_hook_allows_scoped_glob_within_active_artifact_roots(
704
+    temp_dir: Path,
705
+) -> None:
706
+    registry = create_default_registry(temp_dir)
707
+    policy = build_permission_policy(
708
+        active_mode=PermissionMode.WORKSPACE_WRITE,
709
+        workspace_root=temp_dir,
710
+        tool_requirements=registry.get_tool_requirements(),
711
+    )
712
+    dod_store = DefinitionOfDoneStore(temp_dir)
713
+    dod = create_definition_of_done("Repair the active artifact set")
714
+    dod.status = "in_progress"
715
+    dod_path = dod_store.save(dod)
716
+    repair_target = temp_dir / "guide" / "index.html"
717
+    guide_root = temp_dir / "guide"
718
+    session = FakeSession(
719
+        active_dod_path=str(dod_path),
720
+        messages=[
721
+            Message(
722
+                role=Role.ASSISTANT,
723
+                content=(
724
+                    "Repair focus:\n"
725
+                    f"- Fix the broken local reference `chapters/troubleshooting.html` in `{repair_target}`.\n"
726
+                    f"- Immediate next step: edit `{repair_target}`.\n"
727
+                    f"- If the broken reference should remain, create `{guide_root / 'chapters' / 'troubleshooting.html'}`; otherwise remove or replace `chapters/troubleshooting.html`.\n"
728
+                    "- Use the existing artifact files as the source of truth while repairing this file: "
729
+                    f"`{guide_root / 'chapters' / 'introduction.html'}`, `{guide_root / 'chapters' / 'installation.html'}`, `{guide_root / 'chapters' / 'configuration.html'}`.\n"
730
+                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
731
+                ),
732
+            )
733
+        ],
734
+    )
735
+    hook = ActiveRepairScopeHook(
736
+        dod_store=dod_store,
737
+        project_root=temp_dir,
738
+        session=session,
739
+    )
740
+
741
+    result = await hook.pre_tool_use(
742
+        HookContext(
743
+            tool_call=ToolCall(
744
+                id="glob-1",
745
+                name="glob",
746
+                arguments={
747
+                    "path": str(temp_dir),
748
+                    "pattern": "**/guide/chapters/*.html",
749
+                },
750
+            ),
751
+            tool=registry.get("glob"),
752
+            registry=registry,
753
+            permission_policy=policy,
754
+            source="native",
755
+        )
756
+    )
757
+
758
+    assert result.decision == HookDecision.CONTINUE
759
+
760
+
761
+@pytest.mark.asyncio
762
+async def test_active_repair_scope_hook_allows_declared_missing_sibling_reads(
763
+    temp_dir: Path,
764
+) -> None:
765
+    registry = create_default_registry(temp_dir)
766
+    policy = build_permission_policy(
767
+        active_mode=PermissionMode.WORKSPACE_WRITE,
768
+        workspace_root=temp_dir,
769
+        tool_requirements=registry.get_tool_requirements(),
770
+    )
771
+    dod_store = DefinitionOfDoneStore(temp_dir)
772
+    dod = create_definition_of_done("Repair the active artifact set")
773
+    dod.status = "in_progress"
774
+    dod_path = dod_store.save(dod)
775
+    guide_root = temp_dir / "guide"
776
+    chapters = guide_root / "chapters"
777
+    chapters.mkdir(parents=True)
778
+    repair_target = guide_root / "index.html"
779
+    existing_chapter = chapters / "overview.html"
780
+    next_chapter = chapters / "installation.html"
781
+    repair_target.write_text(
782
+        "\n".join(
783
+            [
784
+                "<html>",
785
+                '<a href="chapters/overview.html">Overview</a>',
786
+                '<a href="chapters/installation.html">Installation</a>',
787
+                "</html>",
788
+            ]
789
+        )
790
+        + "\n"
791
+    )
792
+    existing_chapter.write_text("<h1>Overview</h1>\n")
793
+
794
+    session = FakeSession(
795
+        active_dod_path=str(dod_path),
796
+        messages=[
797
+            Message(
798
+                role=Role.ASSISTANT,
799
+                content=(
800
+                    "Repair focus:\n"
801
+                    f"- Fix the broken local reference `chapters/overview.html` in `{repair_target}`.\n"
802
+                    f"- Immediate next step: edit `{repair_target}`.\n"
803
+                    f"- If the broken reference should remain, create `{existing_chapter}`; otherwise remove or replace `chapters/overview.html`.\n"
804
+                    "- Use the existing artifact files as the source of truth while repairing this file: "
805
+                    f"`{existing_chapter}`.\n"
806
+                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
807
+                ),
808
+            )
809
+        ],
810
+    )
811
+    hook = ActiveRepairScopeHook(
812
+        dod_store=dod_store,
813
+        project_root=temp_dir,
814
+        session=session,
815
+    )
816
+
817
+    result = await hook.pre_tool_use(
818
+        HookContext(
819
+            tool_call=ToolCall(
820
+                id="read-allowed-sibling",
821
+                name="read",
822
+                arguments={"file_path": str(next_chapter)},
823
+            ),
824
+            tool=registry.get("read"),
825
+            registry=registry,
826
+            permission_policy=policy,
827
+            source="native",
828
+        )
829
+    )
830
+
831
+    assert result.decision == HookDecision.CONTINUE
832
+
833
+
834
+@pytest.mark.asyncio
835
+async def test_active_repair_scope_hook_blocks_reference_reads_during_in_progress_repair(
836
+    temp_dir: Path,
837
+) -> None:
838
+    registry = create_default_registry(temp_dir)
839
+    policy = build_permission_policy(
840
+        active_mode=PermissionMode.WORKSPACE_WRITE,
841
+        workspace_root=temp_dir,
842
+        tool_requirements=registry.get_tool_requirements(),
843
+    )
844
+    dod_store = DefinitionOfDoneStore(temp_dir)
845
+    dod = create_definition_of_done("Repair the active artifact set")
846
+    dod.status = "in_progress"
847
+    dod_path = dod_store.save(dod)
848
+    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
849
+    session = FakeSession(
850
+        active_dod_path=str(dod_path),
851
+        messages=[
852
+            Message(
853
+                role=Role.ASSISTANT,
854
+                content=(
855
+                    "Repair focus:\n"
856
+                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
857
+                    f"- Immediate next step: edit `{repair_target}`.\n"
858
+                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
859
+                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
860
+                ),
861
+            )
862
+        ],
863
+    )
864
+    hook = ActiveRepairScopeHook(
865
+        dod_store=dod_store,
866
+        project_root=temp_dir,
867
+        session=session,
868
+    )
869
+
870
+    result = await hook.pre_tool_use(
871
+        HookContext(
872
+            tool_call=ToolCall(
873
+                id="read-1",
874
+                name="read",
875
+                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
876
+            ),
877
+            tool=registry.get("read"),
878
+            registry=registry,
879
+            permission_policy=policy,
880
+            source="native",
881
+        )
882
+    )
883
+
884
+    assert result.decision == HookDecision.DENY
885
+    assert result.terminal_state == "blocked"
886
+    assert result.message is not None
887
+    assert "active repair scope" in result.message
888
+
889
+
890
+@pytest.mark.asyncio
891
+async def test_active_repair_mutation_scope_hook_blocks_writes_outside_named_repair_files(
892
+    temp_dir: Path,
893
+) -> None:
894
+    registry = create_default_registry(temp_dir)
895
+    policy = build_permission_policy(
896
+        active_mode=PermissionMode.WORKSPACE_WRITE,
897
+        workspace_root=temp_dir,
898
+        tool_requirements=registry.get_tool_requirements(),
899
+    )
900
+    dod_store = DefinitionOfDoneStore(temp_dir)
901
+    dod = create_definition_of_done("Repair the active artifact set")
902
+    dod.status = "in_progress"
903
+    dod_path = dod_store.save(dod)
904
+    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
905
+    chapter_path = temp_dir / "guide" / "chapters" / "01-getting-started.html"
906
+    session = FakeSession(
907
+        active_dod_path=str(dod_path),
908
+        messages=[
909
+            Message(
910
+                role=Role.ASSISTANT,
911
+                content=(
912
+                    "Repair focus:\n"
913
+                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
914
+                    f"- Immediate next step: edit `{repair_target}`.\n"
915
+                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
916
+                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
917
+                ),
918
+            )
919
+        ],
920
+    )
921
+    hook = ActiveRepairMutationScopeHook(
922
+        dod_store=dod_store,
923
+        project_root=temp_dir,
924
+        session=session,
925
+    )
926
+
927
+    result = await hook.pre_tool_use(
928
+        HookContext(
929
+            tool_call=ToolCall(
930
+                id="edit-1",
931
+                name="edit",
932
+                arguments={"file_path": str(chapter_path), "old_string": "old", "new_string": "new"},
933
+            ),
934
+            tool=registry.get("edit"),
935
+            registry=registry,
936
+            permission_policy=policy,
937
+            source="native",
938
+        )
939
+    )
940
+
941
+    assert result.decision == HookDecision.DENY
942
+    assert result.terminal_state == "blocked"
943
+    assert result.message is not None
944
+    assert "active repair mutation scope" in result.message
945
+    assert str(repair_target) in result.message
946
+
947
+
948
+@pytest.mark.asyncio
949
+async def test_active_repair_mutation_scope_hook_allows_expected_repair_file_writes(
950
+    temp_dir: Path,
951
+) -> None:
952
+    registry = create_default_registry(temp_dir)
953
+    policy = build_permission_policy(
954
+        active_mode=PermissionMode.WORKSPACE_WRITE,
955
+        workspace_root=temp_dir,
956
+        tool_requirements=registry.get_tool_requirements(),
957
+    )
958
+    dod_store = DefinitionOfDoneStore(temp_dir)
959
+    dod = create_definition_of_done("Repair the active artifact set")
960
+    dod.status = "in_progress"
961
+    dod_path = dod_store.save(dod)
962
+    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
963
+    stylesheet = temp_dir / "guide" / "styles.css"
964
+    session = FakeSession(
965
+        active_dod_path=str(dod_path),
966
+        messages=[
967
+            Message(
968
+                role=Role.ASSISTANT,
969
+                content=(
970
+                    "Repair focus:\n"
971
+                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
972
+                    f"- Immediate next step: edit `{repair_target}`.\n"
973
+                    f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
974
+                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
975
+                ),
976
+            )
977
+        ],
978
+    )
979
+    hook = ActiveRepairMutationScopeHook(
980
+        dod_store=dod_store,
981
+        project_root=temp_dir,
982
+        session=session,
983
+    )
984
+
985
+    result = await hook.pre_tool_use(
986
+        HookContext(
987
+            tool_call=ToolCall(
988
+                id="write-1",
989
+                name="write",
990
+                arguments={"file_path": str(stylesheet), "content": "body { color: #222; }\n"},
991
+            ),
992
+            tool=registry.get("write"),
993
+            registry=registry,
994
+            permission_policy=policy,
995
+            source="native",
996
+        )
997
+    )
998
+
999
+    assert result.decision == HookDecision.CONTINUE
1000
+
1001
+
1002
+@pytest.mark.asyncio
1003
+async def test_active_repair_mutation_scope_hook_allows_declared_missing_sibling_outputs(
1004
+    temp_dir: Path,
1005
+) -> None:
1006
+    registry = create_default_registry(temp_dir)
1007
+    policy = build_permission_policy(
1008
+        active_mode=PermissionMode.WORKSPACE_WRITE,
1009
+        workspace_root=temp_dir,
1010
+        tool_requirements=registry.get_tool_requirements(),
1011
+    )
1012
+    dod_store = DefinitionOfDoneStore(temp_dir)
1013
+    dod = create_definition_of_done("Repair the active artifact set")
1014
+    dod.status = "in_progress"
1015
+    dod_path = dod_store.save(dod)
1016
+    guide_root = temp_dir / "guide"
1017
+    chapters = guide_root / "chapters"
1018
+    chapters.mkdir(parents=True)
1019
+    repair_target = guide_root / "index.html"
1020
+    existing_chapter = chapters / "01-introduction.html"
1021
+    next_chapter = chapters / "02-installation.html"
1022
+    repair_target.write_text(
1023
+        "\n".join(
1024
+            [
1025
+                "<html>",
1026
+                '<a href="chapters/01-introduction.html">Introduction</a>',
1027
+                '<a href="chapters/02-installation.html">Installation</a>',
1028
+                "</html>",
1029
+            ]
1030
+        )
1031
+        + "\n"
1032
+    )
1033
+    existing_chapter.write_text("<h1>Introduction</h1>\n")
1034
+
1035
+    session = FakeSession(
1036
+        active_dod_path=str(dod_path),
1037
+        messages=[
1038
+            Message(
1039
+                role=Role.ASSISTANT,
1040
+                content=(
1041
+                    "Repair focus:\n"
1042
+                    f"- Fix the broken local reference `chapters/01-introduction.html` in `{repair_target}`.\n"
1043
+                    f"- Immediate next step: edit `{repair_target}`.\n"
1044
+                    f"- If the broken reference should remain, create `{existing_chapter}`; otherwise remove or replace `chapters/01-introduction.html`.\n"
1045
+                    "- Use the existing artifact files as the source of truth while repairing this file: "
1046
+                    f"`{existing_chapter}`.\n"
1047
+                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1048
+                ),
1049
+            )
1050
+        ],
1051
+    )
1052
+    hook = ActiveRepairMutationScopeHook(
1053
+        dod_store=dod_store,
1054
+        project_root=temp_dir,
1055
+        session=session,
1056
+    )
1057
+
1058
+    result = await hook.pre_tool_use(
1059
+        HookContext(
1060
+            tool_call=ToolCall(
1061
+                id="write-2",
1062
+                name="write",
1063
+                arguments={"file_path": str(next_chapter), "content": "<h1>Installation</h1>\n"},
1064
+            ),
1065
+            tool=registry.get("write"),
1066
+            registry=registry,
1067
+            permission_policy=policy,
1068
+            source="native",
1069
+        )
1070
+    )
1071
+
1072
+    assert result.decision == HookDecision.CONTINUE
1073
+
1074
+
1075
+@pytest.mark.asyncio
1076
+async def test_active_repair_mutation_scope_hook_blocks_broad_mutating_bash(
1077
+    temp_dir: Path,
1078
+) -> None:
1079
+    registry = create_default_registry(temp_dir)
1080
+    policy = build_permission_policy(
1081
+        active_mode=PermissionMode.WORKSPACE_WRITE,
1082
+        workspace_root=temp_dir,
1083
+        tool_requirements=registry.get_tool_requirements(),
1084
+    )
1085
+    dod_store = DefinitionOfDoneStore(temp_dir)
1086
+    dod = create_definition_of_done("Repair the active artifact set")
1087
+    dod.status = "in_progress"
1088
+    dod_path = dod_store.save(dod)
1089
+    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
1090
+    session = FakeSession(
1091
+        active_dod_path=str(dod_path),
1092
+        messages=[
1093
+            Message(
1094
+                role=Role.ASSISTANT,
1095
+                content=(
1096
+                    "Repair focus:\n"
1097
+                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
1098
+                    f"- Immediate next step: edit `{repair_target}`.\n"
1099
+                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
1100
+                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
1101
+                ),
1102
+            )
1103
+        ],
1104
+    )
1105
+    hook = ActiveRepairMutationScopeHook(
1106
+        dod_store=dod_store,
1107
+        project_root=temp_dir,
1108
+        session=session,
1109
+    )
1110
+
1111
+    result = await hook.pre_tool_use(
1112
+        HookContext(
1113
+            tool_call=ToolCall(
1114
+                id="bash-1",
1115
+                name="bash",
1116
+                arguments={"command": f"mkdir -p {temp_dir / 'guide' / 'assets'}"},
1117
+            ),
1118
+            tool=registry.get("bash"),
1119
+            registry=registry,
1120
+            permission_policy=policy,
1121
+            source="native",
1122
+        )
1123
+    )
1124
+
1125
+    assert result.decision == HookDecision.DENY
1126
+    assert result.terminal_state == "blocked"
1127
+    assert result.message is not None
1128
+    assert "active repair mutation scope" in result.message
1129
+    assert str(repair_target) in result.message
1130
+
1131
+
1132
+@pytest.mark.asyncio
1133
+async def test_late_reference_drift_hook_blocks_out_of_scope_reference_reads(
1134
+    temp_dir: Path,
1135
+) -> None:
1136
+    registry = create_default_registry(temp_dir)
1137
+    policy = build_permission_policy(
1138
+        active_mode=PermissionMode.WORKSPACE_WRITE,
1139
+        workspace_root=temp_dir,
1140
+        tool_requirements=registry.get_tool_requirements(),
1141
+    )
1142
+    dod_store = DefinitionOfDoneStore(temp_dir)
1143
+    dod = create_definition_of_done("Create a multi-file guide from a reference")
1144
+    dod.status = "in_progress"
1145
+    plan_path = temp_dir / "implementation.md"
1146
+    plan_path.write_text(
1147
+        "# File Changes\n"
1148
+        "- `guide/index.html`\n"
1149
+        "- `guide/chapters/01-getting-started.html`\n"
1150
+        "- `guide/chapters/02-installation.html`\n"
1151
+        "- `guide/chapters/03-first-website.html`\n"
1152
+    )
1153
+    dod.implementation_plan = str(plan_path)
1154
+    dod_path = dod_store.save(dod)
1155
+    guide_dir = temp_dir / "guide" / "chapters"
1156
+    guide_dir.mkdir(parents=True, exist_ok=True)
1157
+    (temp_dir / "guide" / "index.html").write_text("index")
1158
+    (guide_dir / "01-getting-started.html").write_text("one")
1159
+    (guide_dir / "02-installation.html").write_text("two")
1160
+    session = FakeSession(active_dod_path=str(dod_path), messages=[])
1161
+    hook = LateReferenceDriftHook(
1162
+        dod_store=dod_store,
1163
+        project_root=temp_dir,
1164
+        session=session,
1165
+    )
1166
+
1167
+    result = await hook.pre_tool_use(
1168
+        HookContext(
1169
+            tool_call=ToolCall(
1170
+                id="read-1",
1171
+                name="read",
1172
+                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1173
+            ),
1174
+            tool=registry.get("read"),
1175
+            registry=registry,
1176
+            permission_policy=policy,
1177
+            source="native",
1178
+        )
1179
+    )
1180
+
1181
+    assert result.decision == HookDecision.DENY
1182
+    assert result.terminal_state == "blocked"
1183
+    assert result.message is not None
1184
+    assert "late reference drift" in result.message
1185
+    assert "03-first-website.html" in result.message
1186
+
1187
+
1188
+@pytest.mark.asyncio
1189
+async def test_late_reference_drift_hook_allows_reads_inside_planned_artifact_set(
1190
+    temp_dir: Path,
1191
+) -> None:
1192
+    registry = create_default_registry(temp_dir)
1193
+    policy = build_permission_policy(
1194
+        active_mode=PermissionMode.WORKSPACE_WRITE,
1195
+        workspace_root=temp_dir,
1196
+        tool_requirements=registry.get_tool_requirements(),
1197
+    )
1198
+    dod_store = DefinitionOfDoneStore(temp_dir)
1199
+    dod = create_definition_of_done("Create a multi-file guide from a reference")
1200
+    dod.status = "in_progress"
1201
+    plan_path = temp_dir / "implementation.md"
1202
+    plan_path.write_text(
1203
+        "# File Changes\n"
1204
+        "- `guide/index.html`\n"
1205
+        "- `guide/chapters/01-getting-started.html`\n"
1206
+        "- `guide/chapters/02-installation.html`\n"
1207
+        "- `guide/chapters/03-first-website.html`\n"
1208
+    )
1209
+    dod.implementation_plan = str(plan_path)
1210
+    dod_path = dod_store.save(dod)
1211
+    guide_dir = temp_dir / "guide" / "chapters"
1212
+    guide_dir.mkdir(parents=True, exist_ok=True)
1213
+    target = guide_dir / "02-installation.html"
1214
+    (temp_dir / "guide" / "index.html").write_text("index")
1215
+    (guide_dir / "01-getting-started.html").write_text("one")
1216
+    target.write_text("two")
1217
+    session = FakeSession(active_dod_path=str(dod_path), messages=[])
1218
+    hook = LateReferenceDriftHook(
1219
+        dod_store=dod_store,
1220
+        project_root=temp_dir,
1221
+        session=session,
1222
+    )
1223
+
1224
+    result = await hook.pre_tool_use(
1225
+        HookContext(
1226
+            tool_call=ToolCall(
1227
+                id="read-1",
1228
+                name="read",
1229
+                arguments={"file_path": str(target)},
1230
+            ),
1231
+            tool=registry.get("read"),
1232
+            registry=registry,
1233
+            permission_policy=policy,
1234
+            source="native",
1235
+        )
1236
+    )
1237
+
1238
+    assert result.decision == HookDecision.CONTINUE
1239
+
1240
+
1241
+@pytest.mark.asyncio
1242
+async def test_late_reference_drift_hook_blocks_reference_reads_after_artifacts_exist(
1243
+    temp_dir: Path,
1244
+) -> None:
1245
+    registry = create_default_registry(temp_dir)
1246
+    policy = build_permission_policy(
1247
+        active_mode=PermissionMode.WORKSPACE_WRITE,
1248
+        workspace_root=temp_dir,
1249
+        tool_requirements=registry.get_tool_requirements(),
1250
+    )
1251
+    dod_store = DefinitionOfDoneStore(temp_dir)
1252
+    dod = create_definition_of_done("Create a multi-file guide from a reference")
1253
+    dod.status = "in_progress"
1254
+    plan_path = temp_dir / "implementation.md"
1255
+    plan_path.write_text(
1256
+        "\n".join(
1257
+            [
1258
+                "# Implementation Plan",
1259
+                "",
1260
+                "## File Changes",
1261
+                f"- `{temp_dir / 'guide'}`",
1262
+                f"- `{temp_dir / 'guide' / 'chapters'}`",
1263
+                f"- `{temp_dir / 'guide' / 'index.html'}`",
1264
+                f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
1265
+                f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
1266
+                "",
1267
+            ]
1268
+        )
1269
+    )
1270
+    dod.implementation_plan = str(plan_path)
1271
+    guide_dir = temp_dir / "guide" / "chapters"
1272
+    guide_dir.mkdir(parents=True, exist_ok=True)
1273
+    (temp_dir / "guide" / "index.html").write_text("index")
1274
+    (guide_dir / "01-getting-started.html").write_text("one")
1275
+    (guide_dir / "02-installation.html").write_text("two")
1276
+    dod_path = dod_store.save(dod)
1277
+    session = FakeSession(active_dod_path=str(dod_path), messages=[])
1278
+    hook = LateReferenceDriftHook(
1279
+        dod_store=dod_store,
1280
+        project_root=temp_dir,
1281
+        session=session,
1282
+    )
1283
+
1284
+    result = await hook.pre_tool_use(
1285
+        HookContext(
1286
+            tool_call=ToolCall(
1287
+                id="read-1",
1288
+                name="read",
1289
+                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1290
+            ),
1291
+            tool=registry.get("read"),
1292
+            registry=registry,
1293
+            permission_policy=policy,
1294
+            source="native",
1295
+        )
1296
+    )
1297
+
1298
+    assert result.decision == HookDecision.DENY
1299
+    assert result.terminal_state == "blocked"
1300
+    assert result.message is not None
1301
+    assert "completed artifact set scope" in result.message
1302
+    assert str(temp_dir / "guide") in result.message
1303
+
1304
+
1305
+@pytest.mark.asyncio
1306
+async def test_late_reference_drift_hook_does_not_treat_empty_output_dir_as_complete_artifact_set(
1307
+    temp_dir: Path,
1308
+) -> None:
1309
+    registry = create_default_registry(temp_dir)
1310
+    policy = build_permission_policy(
1311
+        active_mode=PermissionMode.WORKSPACE_WRITE,
1312
+        workspace_root=temp_dir,
1313
+        tool_requirements=registry.get_tool_requirements(),
1314
+    )
1315
+    dod_store = DefinitionOfDoneStore(temp_dir)
1316
+    dod = create_definition_of_done("Create a multi-file guide from a reference")
1317
+    dod.status = "in_progress"
1318
+    dod.completed_items = ["Create chapter files with appropriate content"]
1319
+    plan_path = temp_dir / "implementation.md"
1320
+    plan_path.write_text(
1321
+        "\n".join(
1322
+            [
1323
+                "# Implementation Plan",
1324
+                "",
1325
+                "## File Changes",
1326
+                f"- `{temp_dir / 'guide' / 'index.html'}`",
1327
+                f"- `{temp_dir / 'guide' / 'chapters'}/` (directory for chapter files)",
1328
+                "",
1329
+                "## Execution Order",
1330
+                "- Create chapter files with appropriate content",
1331
+            ]
1332
+        )
1333
+    )
1334
+    dod.implementation_plan = str(plan_path)
1335
+    guide_dir = temp_dir / "guide" / "chapters"
1336
+    guide_dir.mkdir(parents=True, exist_ok=True)
1337
+    (temp_dir / "guide" / "index.html").write_text("index")
1338
+    dod_path = dod_store.save(dod)
1339
+    session = FakeSession(active_dod_path=str(dod_path), messages=[])
1340
+    hook = LateReferenceDriftHook(
1341
+        dod_store=dod_store,
1342
+        project_root=temp_dir,
1343
+        session=session,
1344
+    )
1345
+
1346
+    result = await hook.pre_tool_use(
1347
+        HookContext(
1348
+            tool_call=ToolCall(
1349
+                id="read-1",
1350
+                name="read",
1351
+                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1352
+            ),
1353
+            tool=registry.get("read"),
1354
+            registry=registry,
1355
+            permission_policy=policy,
1356
+            source="native",
1357
+        )
1358
+    )
1359
+
1360
+    assert result.decision == HookDecision.CONTINUE
1361
+
1362
+
1363
+@pytest.mark.asyncio
1364
+async def test_late_reference_drift_hook_does_not_block_when_html_outputs_still_link_to_missing_files(
1365
+    temp_dir: Path,
1366
+) -> None:
1367
+    registry = create_default_registry(temp_dir)
1368
+    policy = build_permission_policy(
1369
+        active_mode=PermissionMode.WORKSPACE_WRITE,
1370
+        workspace_root=temp_dir,
1371
+        tool_requirements=registry.get_tool_requirements(),
1372
+    )
1373
+    dod_store = DefinitionOfDoneStore(temp_dir)
1374
+    dod = create_definition_of_done("Create a multi-file guide from a reference")
1375
+    dod.status = "in_progress"
1376
+    dod.completed_items = ["Create chapter files with appropriate content"]
1377
+    plan_path = temp_dir / "implementation.md"
1378
+    plan_path.write_text(
1379
+        "\n".join(
1380
+            [
1381
+                "# Implementation Plan",
1382
+                "",
1383
+                "## File Changes",
1384
+                f"- `{temp_dir / 'guide' / 'index.html'}`",
1385
+                f"- `{temp_dir / 'guide' / 'chapters'}/` (directory for chapter files)",
1386
+                "",
1387
+                "## Execution Order",
1388
+                "- Create chapter files with appropriate content",
1389
+            ]
1390
+        )
1391
+    )
1392
+    dod.implementation_plan = str(plan_path)
1393
+    guide_dir = temp_dir / "guide"
1394
+    chapters = guide_dir / "chapters"
1395
+    chapters.mkdir(parents=True, exist_ok=True)
1396
+    index = guide_dir / "index.html"
1397
+    index.write_text(
1398
+        '<a href="chapters/01-getting-started.html">One</a>\n'
1399
+        '<a href="chapters/02-installation.html">Two</a>\n'
1400
+    )
1401
+    (chapters / "01-getting-started.html").write_text("one")
1402
+    dod.touched_files = [str(index), str(chapters / "01-getting-started.html")]
1403
+    dod_path = dod_store.save(dod)
1404
+    session = FakeSession(active_dod_path=str(dod_path), messages=[])
1405
+    hook = LateReferenceDriftHook(
1406
+        dod_store=dod_store,
1407
+        project_root=temp_dir,
1408
+        session=session,
1409
+    )
1410
+
1411
+    result = await hook.pre_tool_use(
1412
+        HookContext(
1413
+            tool_call=ToolCall(
1414
+                id="read-1",
1415
+                name="read",
1416
+                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
1417
+            ),
1418
+            tool=registry.get("read"),
1419
+            registry=registry,
1420
+            permission_policy=policy,
1421
+            source="native",
1422
+        )
1423
+    )
1424
+
1425
+    assert result.decision == HookDecision.CONTINUE
tests/test_repair.pymodified
@@ -8,6 +8,7 @@ from types import SimpleNamespace
88
 
99
 from loader.llm.base import ToolCall
1010
 from loader.runtime.context import RuntimeContext
11
+from loader.runtime.dod import create_definition_of_done
1112
 from loader.runtime.permissions import (
1213
     PermissionMode,
1314
     build_permission_policy,
@@ -201,3 +202,569 @@ def test_response_repairer_fails_honestly_when_raw_tool_budget_is_exhausted(
201202
     )
202203
     assert analysis.failure == "raw-text tool recovery budget exhausted"
203204
     assert "Let me know if you'd like me to continue" not in analysis.final_response
205
+
206
+
207
+def test_empty_response_retry_message_surfaces_missing_planned_artifacts_and_working_note(
208
+    temp_dir: Path,
209
+) -> None:
210
+    context = build_context(
211
+        temp_dir=temp_dir,
212
+        use_react=False,
213
+    )
214
+    repairer = ResponseRepairer(context)
215
+    implementation_plan = temp_dir / "implementation.md"
216
+    implementation_plan.write_text(
217
+        "\n".join(
218
+            [
219
+                "# Implementation Plan",
220
+                "",
221
+                "## File Changes",
222
+                f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
223
+                f"- `{temp_dir / 'guides' / 'nginx' / 'chapters' / '01-getting-started.html'}`",
224
+                f"- `{temp_dir / 'guides' / 'nginx' / 'chapters' / '02-installation.html'}`",
225
+                "",
226
+            ]
227
+        )
228
+    )
229
+    first_artifact = temp_dir / "guides" / "nginx" / "index.html"
230
+    first_artifact.parent.mkdir(parents=True)
231
+    first_artifact.write_text("<html></html>\n")
232
+
233
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
234
+    dod.implementation_plan = str(implementation_plan)
235
+    dod.touched_files.append(str(first_artifact))
236
+    dod.completed_items.append("Create the main index.html file")
237
+    dod.pending_items.append("Create each chapter file in sequence")
238
+
239
+    context.session.append(
240
+        SimpleNamespace(
241
+            role="tool",
242
+            content=(
243
+                "Observation [notepad_write_working]: Result: "
244
+                "- [2026-04-21T19:17:34Z] Creating fifth chapter file: Advanced configurations"
245
+            ),
246
+        )
247
+    )
248
+
249
+    decision = repairer.handle_empty_response(
250
+        task="Create a multi-file nginx guide.",
251
+        original_task=None,
252
+        empty_retry_count=1,
253
+        max_empty_retries=2,
254
+        dod=dod,
255
+    )
256
+
257
+    assert decision.should_continue is True
258
+    assert decision.retry_message is not None
259
+    assert "Latest working note: Creating fifth chapter file: Advanced configurations" in decision.retry_message
260
+    assert "Next missing planned artifact: `01-getting-started.html`" in decision.retry_message
261
+    assert "Remaining planned artifacts: `01-getting-started.html`, `02-installation.html`" in decision.retry_message
262
+    assert "Resume with this exact next step: create `01-getting-started.html`." in decision.retry_message
263
+    assert f"Prefer one `write` call for `{temp_dir / 'guides' / 'nginx' / 'chapters' / '01-getting-started.html'}` before any more reference reads." in decision.retry_message
264
+    assert (
265
+        "Shape the next response as one concrete `write(file_path=..., content=...)` "
266
+        "tool call for that exact path."
267
+        in decision.retry_message
268
+    )
269
+    assert (
270
+        "Your next response should be the concrete mutation tool call itself, "
271
+        "not TodoWrite alone, verification, or a completion summary."
272
+        in decision.retry_message
273
+    )
274
+    assert "Do not restart discovery unless one specific missing fact blocks this step." in decision.retry_message
275
+
276
+
277
+def test_empty_response_retry_mentions_write_can_create_missing_parent_directories(
278
+    temp_dir: Path,
279
+) -> None:
280
+    context = build_context(
281
+        temp_dir=temp_dir,
282
+        use_react=False,
283
+    )
284
+    repairer = ResponseRepairer(context)
285
+
286
+    guide_root = temp_dir / "guides" / "nginx"
287
+    index_path = guide_root / "index.html"
288
+
289
+    implementation_plan = temp_dir / "implementation.md"
290
+    implementation_plan.write_text(
291
+        "\n".join(
292
+            [
293
+                "# Implementation Plan",
294
+                "",
295
+                "## File Changes",
296
+                f"- `{index_path}`",
297
+                "",
298
+            ]
299
+        )
300
+    )
301
+
302
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
303
+    dod.implementation_plan = str(implementation_plan)
304
+    dod.pending_items.extend(
305
+        [
306
+            "Create nginx guide directory structure",
307
+            "Write main index.html for nginx guide",
308
+        ]
309
+    )
310
+
311
+    decision = repairer.handle_empty_response(
312
+        task="Create a multi-file nginx guide.",
313
+        original_task=None,
314
+        empty_retry_count=1,
315
+        max_empty_retries=2,
316
+        dod=dod,
317
+    )
318
+
319
+    assert decision.should_continue is True
320
+    assert decision.retry_message is not None
321
+    assert "Resume with this exact next step: create `index.html`." in decision.retry_message
322
+    assert (
323
+        "The `write` tool can create that file's parent directories automatically"
324
+        in decision.retry_message
325
+    )
326
+    assert (
327
+        "Shape the next response as one concrete `write(file_path=..., content=...)` "
328
+        "tool call for that exact path."
329
+        in decision.retry_message
330
+    )
331
+
332
+
333
+def test_empty_response_retry_respects_discovery_first_pending_step(
334
+    temp_dir: Path,
335
+) -> None:
336
+    context = build_context(
337
+        temp_dir=temp_dir,
338
+        use_react=False,
339
+    )
340
+    repairer = ResponseRepairer(context)
341
+
342
+    implementation_plan = temp_dir / "implementation.md"
343
+    implementation_plan.write_text(
344
+        "\n".join(
345
+            [
346
+                "# Implementation Plan",
347
+                "",
348
+                "## File Changes",
349
+                f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
350
+                f"- `{temp_dir / 'guides' / 'nginx' / 'chapters'}`",
351
+                "",
352
+            ]
353
+        )
354
+    )
355
+
356
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
357
+    dod.implementation_plan = str(implementation_plan)
358
+    dod.pending_items.extend(
359
+        [
360
+            "First, examine the existing fortran guide structure and content to understand the format",
361
+            "Create the nginx directory structure",
362
+            "Develop the main index.html file for the nginx guide",
363
+        ]
364
+    )
365
+
366
+    context.session.append(
367
+        SimpleNamespace(
368
+            role="tool",
369
+            content=(
370
+                "Observation [notepad_write_working]: Result: "
371
+                "- [2026-04-22T22:42:18Z] Analyzing the fortran guide structure before creating nginx guide"
372
+            ),
373
+        )
374
+    )
375
+
376
+    decision = repairer.handle_empty_response(
377
+        task="Create a multi-file nginx guide.",
378
+        original_task=None,
379
+        empty_retry_count=1,
380
+        max_empty_retries=2,
381
+        dod=dod,
382
+    )
383
+
384
+    assert decision.should_continue is True
385
+    assert decision.retry_message is not None
386
+    assert (
387
+        "Resume with this exact next step: advance `First, examine the existing fortran guide structure and content to understand the format`."
388
+        in decision.retry_message
389
+    )
390
+    assert "one concrete evidence-gathering tool call" in decision.retry_message
391
+    assert "Resume with this exact next step: create `index.html`." not in decision.retry_message
392
+
393
+
394
+def test_empty_response_retry_budget_extends_for_late_stage_multi_artifact_progress(
395
+    temp_dir: Path,
396
+) -> None:
397
+    context = build_context(
398
+        temp_dir=temp_dir,
399
+        use_react=False,
400
+    )
401
+    repairer = ResponseRepairer(context)
402
+
403
+    guide_root = temp_dir / "guides" / "nginx"
404
+    chapters = guide_root / "chapters"
405
+    chapters.mkdir(parents=True)
406
+    index_path = guide_root / "index.html"
407
+    chapter_one = chapters / "01-getting-started.html"
408
+    chapter_two = chapters / "02-installation.html"
409
+    chapter_three = chapters / "03-first-website.html"
410
+    chapter_four = chapters / "04-configuration-basics.html"
411
+    index_path.write_text("<html></html>\n")
412
+    chapter_one.write_text("<h1>One</h1>\n")
413
+    chapter_two.write_text("<h1>Two</h1>\n")
414
+    chapter_three.write_text("<h1>Three</h1>\n")
415
+
416
+    implementation_plan = temp_dir / "implementation.md"
417
+    implementation_plan.write_text(
418
+        "\n".join(
419
+            [
420
+                "# Implementation Plan",
421
+                "",
422
+                "## File Changes",
423
+                f"- `{guide_root}/`",
424
+                f"- `{chapters}/`",
425
+                f"- `{index_path}`",
426
+                f"- `{chapter_one}`",
427
+                f"- `{chapter_two}`",
428
+                f"- `{chapter_three}`",
429
+                f"- `{chapter_four}`",
430
+                "",
431
+            ]
432
+        )
433
+    )
434
+
435
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
436
+    dod.implementation_plan = str(implementation_plan)
437
+    dod.touched_files.extend(
438
+        [str(index_path), str(chapter_one), str(chapter_two), str(chapter_three)]
439
+    )
440
+    dod.completed_items.extend(
441
+        [
442
+            "Create the directory structure for the new nginx guide",
443
+            "Create the main index.html file with proper structure",
444
+        ]
445
+    )
446
+    dod.pending_items.append("Create each chapter file in sequence")
447
+
448
+    decision = repairer.handle_empty_response(
449
+        task="Create a multi-file nginx guide.",
450
+        original_task=None,
451
+        empty_retry_count=3,
452
+        max_empty_retries=2,
453
+        dod=dod,
454
+    )
455
+
456
+    assert decision.should_continue is True
457
+    assert decision.retry_message is not None
458
+    assert "retry 3/4" in decision.retry_message
459
+    assert "Follow the same one-file-at-a-time mutation pattern" in decision.retry_message
460
+
461
+
462
+def test_empty_response_retry_points_at_next_output_file_when_planned_directory_is_empty(
463
+    temp_dir: Path,
464
+) -> None:
465
+    context = build_context(
466
+        temp_dir=temp_dir,
467
+        use_react=False,
468
+    )
469
+    repairer = ResponseRepairer(context)
470
+
471
+    guide_root = temp_dir / "guides" / "nginx"
472
+    chapters = guide_root / "chapters"
473
+    chapters.mkdir(parents=True)
474
+    index_path = guide_root / "index.html"
475
+    index_path.write_text("<html></html>\n")
476
+
477
+    implementation_plan = temp_dir / "implementation.md"
478
+    implementation_plan.write_text(
479
+        "\n".join(
480
+            [
481
+                "# Implementation Plan",
482
+                "",
483
+                "## File Changes",
484
+                f"- `{guide_root}/`",
485
+                f"- `{chapters}/`",
486
+                f"- `{index_path}`",
487
+                "",
488
+            ]
489
+        )
490
+    )
491
+
492
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
493
+    dod.implementation_plan = str(implementation_plan)
494
+    dod.touched_files.append(str(index_path))
495
+    dod.pending_items.append("Write the introduction chapter")
496
+
497
+    decision = repairer.handle_empty_response(
498
+        task="Create a multi-file nginx guide.",
499
+        original_task=None,
500
+        empty_retry_count=1,
501
+        max_empty_retries=2,
502
+        dod=dod,
503
+    )
504
+
505
+    assert decision.should_continue is True
506
+    assert decision.retry_message is not None
507
+    assert "Next missing planned artifact: `chapters/`" in decision.retry_message
508
+    assert (
509
+        "Resume with this exact next step: continue `Write the introduction chapter` "
510
+        "by creating the next output file under `chapters/`."
511
+        in decision.retry_message
512
+    )
513
+    assert (
514
+        f"Prefer one concrete `write` call for a file inside `{chapters}` before more research."
515
+        in decision.retry_message
516
+    )
517
+
518
+
519
+def test_empty_response_retry_points_at_declared_child_file_within_incomplete_output_directory(
520
+    temp_dir: Path,
521
+) -> None:
522
+    context = build_context(
523
+        temp_dir=temp_dir,
524
+        use_react=False,
525
+    )
526
+    repairer = ResponseRepairer(context)
527
+
528
+    guide_root = temp_dir / "guides" / "nginx"
529
+    chapters = guide_root / "chapters"
530
+    chapters.mkdir(parents=True)
531
+    index_path = guide_root / "index.html"
532
+    index_path.write_text(
533
+        "\n".join(
534
+            [
535
+                "<html>",
536
+                '<a href="chapters/introduction.html">Introduction</a>',
537
+                '<a href="chapters/installation.html">Installation</a>',
538
+                "</html>",
539
+            ]
540
+        )
541
+        + "\n"
542
+    )
543
+
544
+    implementation_plan = temp_dir / "implementation.md"
545
+    implementation_plan.write_text(
546
+        "\n".join(
547
+            [
548
+                "# Implementation Plan",
549
+                "",
550
+                "## File Changes",
551
+                f"- `{guide_root}/`",
552
+                f"- `{chapters}/`",
553
+                f"- `{index_path}`",
554
+                "",
555
+            ]
556
+        )
557
+    )
558
+
559
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
560
+    dod.implementation_plan = str(implementation_plan)
561
+    dod.touched_files.append(str(index_path))
562
+    dod.pending_items.append("Write the introduction chapter")
563
+
564
+    decision = repairer.handle_empty_response(
565
+        task="Create a multi-file nginx guide.",
566
+        original_task=None,
567
+        empty_retry_count=1,
568
+        max_empty_retries=2,
569
+        dod=dod,
570
+    )
571
+
572
+    assert decision.should_continue is True
573
+    assert decision.retry_message is not None
574
+    assert "Next missing planned artifact: `chapters/`" in decision.retry_message
575
+    assert "Next declared output under `chapters/`: `introduction.html`" in decision.retry_message
576
+    assert (
577
+        "Resume with this exact next step: continue `Write the introduction chapter` "
578
+        "by creating `introduction.html`."
579
+        in decision.retry_message
580
+    )
581
+    assert "It is the next missing declared output under `chapters/`." in decision.retry_message
582
+    assert "Prefer one `write` call for `" in decision.retry_message
583
+    assert "introduction.html` before more research." in decision.retry_message
584
+
585
+
586
+def test_empty_response_retry_fails_after_extended_late_stage_budget_is_exhausted(
587
+    temp_dir: Path,
588
+) -> None:
589
+    context = build_context(
590
+        temp_dir=temp_dir,
591
+        use_react=False,
592
+    )
593
+    repairer = ResponseRepairer(context)
594
+
595
+    guide_root = temp_dir / "guides" / "nginx"
596
+    chapters = guide_root / "chapters"
597
+    chapters.mkdir(parents=True)
598
+    index_path = guide_root / "index.html"
599
+    chapter_one = chapters / "01-getting-started.html"
600
+    chapter_two = chapters / "02-installation.html"
601
+    chapter_three = chapters / "03-first-website.html"
602
+    chapter_four = chapters / "04-configuration-basics.html"
603
+    index_path.write_text("<html></html>\n")
604
+    chapter_one.write_text("<h1>One</h1>\n")
605
+    chapter_two.write_text("<h1>Two</h1>\n")
606
+    chapter_three.write_text("<h1>Three</h1>\n")
607
+
608
+    implementation_plan = temp_dir / "implementation.md"
609
+    implementation_plan.write_text(
610
+        "\n".join(
611
+            [
612
+                "# Implementation Plan",
613
+                "",
614
+                "## File Changes",
615
+                f"- `{guide_root}/`",
616
+                f"- `{chapters}/`",
617
+                f"- `{index_path}`",
618
+                f"- `{chapter_one}`",
619
+                f"- `{chapter_two}`",
620
+                f"- `{chapter_three}`",
621
+                f"- `{chapter_four}`",
622
+                "",
623
+            ]
624
+        )
625
+    )
626
+
627
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
628
+    dod.implementation_plan = str(implementation_plan)
629
+    dod.touched_files.extend(
630
+        [str(index_path), str(chapter_one), str(chapter_two), str(chapter_three)]
631
+    )
632
+    dod.completed_items.extend(
633
+        [
634
+            "Create the directory structure for the new nginx guide",
635
+            "Create the main index.html file with proper structure",
636
+        ]
637
+    )
638
+    dod.pending_items.append("Create each chapter file in sequence")
639
+
640
+    decision = repairer.handle_empty_response(
641
+        task="Create a multi-file nginx guide.",
642
+        original_task=None,
643
+        empty_retry_count=5,
644
+        max_empty_retries=2,
645
+        dod=dod,
646
+    )
647
+
648
+    assert decision.should_continue is False
649
+    assert decision.final_response is not None
650
+    assert "retrying 4 times" in decision.final_response
651
+
652
+
653
+def test_empty_response_retry_mentions_todowrite_when_progress_has_outpaced_tracking(
654
+    temp_dir: Path,
655
+) -> None:
656
+    context = build_context(
657
+        temp_dir=temp_dir,
658
+        use_react=False,
659
+    )
660
+    repairer = ResponseRepairer(context)
661
+
662
+    guide_root = temp_dir / "guides" / "nginx"
663
+    chapters = guide_root / "chapters"
664
+    chapters.mkdir(parents=True)
665
+    implementation_plan = temp_dir / "implementation.md"
666
+    implementation_plan.write_text(
667
+        "\n".join(
668
+            [
669
+                "# Implementation Plan",
670
+                "",
671
+                "## File Changes",
672
+                f"- `{guide_root / 'index.html'}`",
673
+                f"- `{chapters / '01-getting-started.html'}`",
674
+                f"- `{chapters / '02-installation.html'}`",
675
+                "",
676
+            ]
677
+        )
678
+    )
679
+
680
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
681
+    dod.implementation_plan = str(implementation_plan)
682
+    dod.touched_files.extend(
683
+        [
684
+            str(guide_root / "index.html"),
685
+            str(chapters / "01-getting-started.html"),
686
+        ]
687
+    )
688
+    dod.completed_items.extend(
689
+        [
690
+            "Create the directory structure for the new nginx guide",
691
+            "Create the main index.html file with proper structure",
692
+        ]
693
+    )
694
+    dod.pending_items.append("Create each chapter file in sequence")
695
+
696
+    decision = repairer.handle_empty_response(
697
+        task="Create a multi-file nginx guide.",
698
+        original_task=None,
699
+        empty_retry_count=1,
700
+        max_empty_retries=2,
701
+        dod=dod,
702
+    )
703
+
704
+    assert decision.retry_message is not None
705
+    assert (
706
+        "refresh `TodoWrite` alongside the next concrete mutation"
707
+        in decision.retry_message
708
+    )
709
+
710
+
711
+def test_empty_response_retry_omits_stale_aggregate_completed_work_when_artifacts_missing(
712
+    temp_dir: Path,
713
+) -> None:
714
+    context = build_context(
715
+        temp_dir=temp_dir,
716
+        use_react=False,
717
+    )
718
+    repairer = ResponseRepairer(context)
719
+
720
+    guide_root = temp_dir / "guides" / "nginx"
721
+    chapters = guide_root / "chapters"
722
+    chapters.mkdir(parents=True)
723
+    index_path = guide_root / "index.html"
724
+    chapter_one = chapters / "01-getting-started.html"
725
+    chapter_two = chapters / "02-installation.html"
726
+    chapter_three = chapters / "03-first-website.html"
727
+    index_path.write_text("<html></html>\n")
728
+    chapter_one.write_text("<h1>One</h1>\n")
729
+    chapter_two.write_text("<h1>Two</h1>\n")
730
+
731
+    implementation_plan = temp_dir / "implementation.md"
732
+    implementation_plan.write_text(
733
+        "\n".join(
734
+            [
735
+                "# Implementation Plan",
736
+                "",
737
+                "## File Changes",
738
+                f"- `{guide_root}/`",
739
+                f"- `{chapters}/`",
740
+                f"- `{index_path}`",
741
+                f"- `{chapter_one}`",
742
+                f"- `{chapter_two}`",
743
+                f"- `{chapter_three}`",
744
+                "",
745
+            ]
746
+        )
747
+    )
748
+
749
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
750
+    dod.implementation_plan = str(implementation_plan)
751
+    dod.touched_files.extend([str(index_path), str(chapter_one), str(chapter_two)])
752
+    dod.completed_items.extend(
753
+        [
754
+            "Create the main index.html file with proper structure",
755
+            "Link all chapters together properly",
756
+        ]
757
+    )
758
+    dod.pending_items.append("Create each chapter file in sequence")
759
+
760
+    decision = repairer.handle_empty_response(
761
+        task="Create a multi-file nginx guide.",
762
+        original_task=None,
763
+        empty_retry_count=1,
764
+        max_empty_retries=2,
765
+        dod=dod,
766
+    )
767
+
768
+    assert decision.retry_message is not None
769
+    assert "Link all chapters together properly" not in decision.retry_message
770
+    assert "Create the main index.html file with proper structure" in decision.retry_message
tests/test_runtime_harness.pymodified
@@ -2020,26 +2020,8 @@ async def test_blocked_html_index_edit_queues_inventory_reuse_steering(
20202020
         if event.type == "steering" and event.content
20212021
     ]
20222022
 
2023
-    assert any("TOC references chapter files that do not exist" in message for message in messages)
2024
-    assert any(
2025
-        "Use the current TOC target contents plus the verified sibling inventory" in message
2026
-        for message in steering_messages
2027
-    )
2028
-    assert any(str(index_file) in message for message in steering_messages)
2029
-    assert any(
2030
-        "chapters/05-input-output.html = Chapter 5: Input and Output" in message
2031
-        for message in steering_messages
2032
-    )
2033
-    assert any("<ul class=\"chapter-list\">" in message for message in steering_messages)
2034
-    assert any("Suggested replacement block:" in message for message in steering_messages)
2035
-    assert any("Do not rewrite the whole document." in message for message in steering_messages)
2036
-    assert any("set `old_string` to the current TOC block above exactly" in message for message in steering_messages)
2037
-    assert any("Suggested edit call:" in message for message in steering_messages)
2038
-    assert any('old_string="""' in message for message in steering_messages)
2039
-    assert any(
2040
-        '<li><a href="chapters/05-input-output.html">Chapter 5: Input and Output</a></li>' in message
2041
-        for message in steering_messages
2042
-    )
2023
+    assert any("Edited HTML links point to files that do not exist" in message for message in messages)
2024
+    assert steering_messages == []
20432025
 
20442026
 
20452027
 @pytest.mark.asyncio
@@ -2080,15 +2062,7 @@ async def test_full_path_glob_pattern_still_injects_verified_html_inventory(
20802062
 
20812063
     assert tool_event_names(run) == ["glob"]
20822064
     messages = tool_result_messages(run)
2083
-    assert any(
2084
-        "Verified chapter inventory: chapters/01-introduction.html = Chapter 1: Introduction to Fortran"
2085
-        in message
2086
-        for message in messages
2087
-    )
2088
-    assert any(
2089
-        "chapters/02-setup.html = Chapter 2: Setting Up Fortran" in message
2090
-        for message in messages
2091
-    )
2065
+    assert all("Verified chapter inventory:" not in message for message in messages)
20922066
 
20932067
 
20942068
 @pytest.mark.asyncio
@@ -2136,16 +2110,8 @@ async def test_verified_html_inventory_blocks_redundant_chapter_reread(
21362110
     )
21372111
 
21382112
     messages = tool_result_messages(run)
2139
-    assert any(
2140
-        "Verified chapter inventory: chapters/01-introduction.html = Chapter 1: Introduction to Fortran"
2141
-        in message
2142
-        for message in messages
2143
-    )
2144
-    assert any(
2145
-        "verified sibling chapter inventory"
2146
-        in message
2147
-        for message in messages
2148
-    )
2113
+    assert all("Verified chapter inventory:" not in message for message in messages)
2114
+    assert all("verified sibling chapter inventory" not in message for message in messages)
21492115
 
21502116
 
21512117
 @pytest.mark.asyncio
@@ -2235,24 +2201,12 @@ async def test_successful_html_toc_edit_blocks_post_success_reread_and_steers_to
22352201
         if event.type == "steering" and event.content
22362202
     ]
22372203
 
2238
-    assert any(
2239
-        "Semantic verification preview: validated 2 toc links in index.html"
2240
-        in message
2241
-        for message in messages
2242
-    )
2243
-    assert any(
2244
-        "already passed semantic link validation" in message
2204
+    assert all(
2205
+        "Semantic verification preview:" not in message
22452206
         for message in messages
22462207
     )
2247
-    assert any(
2248
-        "already satisfies the verified link/title constraints" in message
2249
-        for message in steering_messages
2250
-    )
2251
-    assert any(
2252
-        "Do not reread" in message and "chapters" in message
2253
-        for message in steering_messages
2254
-    )
2255
-    assert "validated 2 toc links in index.html" in run.response
2208
+    assert steering_messages == []
2209
+    assert "updated index.html" in run.response.lower()
22562210
 
22572211
 
22582212
 @pytest.mark.asyncio
@@ -2325,17 +2279,11 @@ async def test_exact_prompt_finishes_when_index_toc_is_already_correct(
23252279
         if event.type == "steering" and event.content
23262280
     ]
23272281
 
2328
-    assert any(
2329
-        "Semantic verification preview: validated 2 toc links in index.html"
2330
-        in message
2282
+    assert all(
2283
+        "Semantic verification preview:" not in message
23312284
         for message in messages
23322285
     )
2333
-    assert any(
2334
-        "No TOC edit is required unless you can point to one specific incorrect href or title"
2335
-        in message
2336
-        for message in steering_messages
2337
-    )
2338
-    assert any(str(index_file) in message for message in steering_messages)
2286
+    assert steering_messages == []
23392287
     assert (
23402288
         sum(
23412289
             1
tests/test_runtime_repair_flows.pymodified
@@ -99,6 +99,111 @@ async def test_empty_response_retry_injects_honest_user_reminder_and_recovers(
9999
     )
100100
 
101101
 
102
+@pytest.mark.asyncio
103
+async def test_empty_response_retry_carries_forward_confirmed_progress(
104
+    temp_dir: Path,
105
+) -> None:
106
+    target = temp_dir / "hello.py"
107
+    backend = ScriptedBackend(
108
+        completions=[
109
+            CompletionResponse(
110
+                content="I'll create the file now.",
111
+                tool_calls=[
112
+                    ToolCall(
113
+                        id="write-1",
114
+                        name="write",
115
+                        arguments={
116
+                            "file_path": str(target),
117
+                            "content": "print('hello')\n",
118
+                        },
119
+                    )
120
+                ],
121
+            ),
122
+            CompletionResponse(content=""),
123
+            CompletionResponse(content="Recovered after the empty response."),
124
+        ]
125
+    )
126
+
127
+    run = await run_scenario(
128
+        "Create hello.py with a greeting.",
129
+        backend,
130
+        config=non_streaming_config(),
131
+        project_root=temp_dir,
132
+    )
133
+
134
+    assert "Recovered after the empty response." in run.response
135
+    retry_messages = [
136
+        message.content
137
+        for message in backend.invocations[2].messages
138
+        if message.role == Role.USER and "[EMPTY ASSISTANT RESPONSE]" in message.content
139
+    ]
140
+    assert retry_messages
141
+    assert "retry 1/2" in retry_messages[0]
142
+    assert "Continue from the confirmed progress below instead of restarting." in retry_messages[0]
143
+    assert "hello.py" in retry_messages[0]
144
+
145
+
146
+@pytest.mark.asyncio
147
+async def test_empty_response_retry_budget_resets_after_successful_turn(
148
+    temp_dir: Path,
149
+) -> None:
150
+    first = temp_dir / "one.txt"
151
+    second = temp_dir / "two.txt"
152
+    backend = ScriptedBackend(
153
+        completions=[
154
+            CompletionResponse(content=""),
155
+            CompletionResponse(
156
+                content="I'll create the first file now.",
157
+                tool_calls=[
158
+                    ToolCall(
159
+                        id="write-1",
160
+                        name="write",
161
+                        arguments={
162
+                            "file_path": str(first),
163
+                            "content": "one\n",
164
+                        },
165
+                    )
166
+                ],
167
+            ),
168
+            CompletionResponse(content=""),
169
+            CompletionResponse(
170
+                content="I'll create the second file now.",
171
+                tool_calls=[
172
+                    ToolCall(
173
+                        id="write-2",
174
+                        name="write",
175
+                        arguments={
176
+                            "file_path": str(second),
177
+                            "content": "two\n",
178
+                        },
179
+                    )
180
+                ],
181
+            ),
182
+            CompletionResponse(content="Both files are created."),
183
+        ]
184
+    )
185
+
186
+    run = await run_scenario(
187
+        "Create one.txt and two.txt.",
188
+        backend,
189
+        config=non_streaming_config(),
190
+        project_root=temp_dir,
191
+    )
192
+
193
+    assert run.response.startswith("Both files are created.")
194
+    retry_messages: list[str] = []
195
+    for invocation in backend.invocations:
196
+        for message in invocation.messages:
197
+            if message.role != Role.USER or "[EMPTY ASSISTANT RESPONSE]" not in message.content:
198
+                continue
199
+            if retry_messages and retry_messages[-1] == message.content:
200
+                continue
201
+            retry_messages.append(message.content)
202
+    assert len(retry_messages) >= 2
203
+    assert all("retry 2/2" not in message for message in retry_messages)
204
+    assert sum("retry 1/2" in message for message in retry_messages) >= 2
205
+
206
+
102207
 @pytest.mark.asyncio
103208
 async def test_repeated_empty_responses_fail_honestly_after_one_retry(
104209
     temp_dir: Path,
@@ -107,6 +212,7 @@ async def test_repeated_empty_responses_fail_honestly_after_one_retry(
107212
         completions=[
108213
             CompletionResponse(content=""),
109214
             CompletionResponse(content=""),
215
+            CompletionResponse(content=""),
110216
         ]
111217
     )
112218
 
@@ -119,17 +225,22 @@ async def test_repeated_empty_responses_fail_honestly_after_one_retry(
119225
 
120226
     assert tool_event_names(run) == []
121227
     assert run.response == (
122
-        "I didn't get a usable response from the model after retrying once. "
228
+        "I didn't get a usable response from the model after retrying 2 times. "
123229
         "Please try again or switch to a different backend/model."
124230
     )
125
-    assert len(backend.invocations) == 2
126
-    assert [entry.kind for entry in run.agent.last_turn_summary.workflow_timeline[-2:]] == [
231
+    assert len(backend.invocations) == 3
232
+    assert [entry.kind for entry in run.agent.last_turn_summary.workflow_timeline[-3:]] == [
233
+        "repair_retry",
127234
         "repair_retry",
128235
         "repair_fail",
129236
     ]
130237
     assert run.agent.last_turn_summary.workflow_timeline[-1].reason_code == (
131238
         "empty_response_retry_exhausted"
132239
     )
240
+    assert run.agent.session.last_turn_transition_kind == "terminal"
241
+    assert run.agent.session.last_turn_transition_reason_code == (
242
+        "empty_response_retry_exhausted"
243
+    )
133244
 
134245
 
135246
 @pytest.mark.asyncio
tests/test_safeguard_services.pymodified
@@ -16,8 +16,6 @@ from loader.runtime.safeguards import RuntimeSafeguards
1616
 from loader.runtime.semantic_rules.html_toc import (
1717
     build_html_toc_edit_call_template,
1818
     build_html_toc_replacement_block,
19
-    build_validated_html_toc_observation_reason,
20
-    build_verified_html_inventory_observation_reason,
2119
     format_html_inventory_entry,
2220
     task_targets_html_toc,
2321
     validate_html_toc,
@@ -214,88 +212,6 @@ def test_action_tracker_blocks_repeated_read_without_changes(tmp_path) -> None:
214212
     assert str(file_path) in reason
215213
 
216214
 
217
-def test_action_tracker_blocks_post_validation_html_rereads_until_new_mutation(tmp_path) -> None:
218
-    tracker = ActionTracker()
219
-    chapters = tmp_path / "chapters"
220
-    chapters.mkdir()
221
-    chapter_path = chapters / "01-introduction.html"
222
-    chapter_path.write_text("<h1>Chapter 1: Introduction to Fortran</h1>\n")
223
-    index_path = tmp_path / "index.html"
224
-    index_path.write_text(
225
-        '<ul class="chapter-list">\n'
226
-        '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
227
-        "</ul>\n"
228
-    )
229
-
230
-    tracker.note_validated_html_toc(str(index_path))
231
-
232
-    assert tracker.check_tool_call("read", {"file_path": str(index_path)}) == (
233
-        True,
234
-        build_validated_html_toc_observation_reason(index_path),
235
-    )
236
-    assert tracker.check_tool_call("read", {"file_path": str(chapter_path)}) == (
237
-        True,
238
-        build_validated_html_toc_observation_reason(chapter_path),
239
-    )
240
-    assert tracker.check_tool_call(
241
-        "glob",
242
-        {"path": str(chapters), "pattern": "*.html"},
243
-    ) == (
244
-        True,
245
-        build_validated_html_toc_observation_reason(chapters),
246
-    )
247
-    assert tracker.check_tool_call(
248
-        "bash",
249
-        {"command": f"cat {index_path}"},
250
-    ) == (
251
-        True,
252
-        build_validated_html_toc_observation_reason(index_path),
253
-    )
254
-
255
-    tracker.record_tool_call(
256
-        "edit",
257
-        {
258
-            "file_path": str(index_path),
259
-            "old_string": "Chapter 1",
260
-            "new_string": "Chapter One",
261
-        },
262
-    )
263
-
264
-    assert tracker.check_tool_call("read", {"file_path": str(index_path)}) == (False, "")
265
-
266
-
267
-def test_action_tracker_blocks_chapter_rereads_after_verified_inventory(tmp_path) -> None:
268
-    tracker = ActionTracker()
269
-    chapters = tmp_path / "chapters"
270
-    chapters.mkdir()
271
-    chapter_path = chapters / "01-introduction.html"
272
-    chapter_path.write_text("<h1>Chapter 1: Introduction to Fortran</h1>\n")
273
-    index_path = tmp_path / "index.html"
274
-    index_path.write_text("<ul></ul>\n")
275
-
276
-    tracker.note_verified_html_inventory(str(index_path))
277
-
278
-    assert tracker.check_tool_call("read", {"file_path": str(index_path)}) == (False, "")
279
-    assert tracker.check_tool_call("read", {"file_path": str(chapter_path)}) == (
280
-        True,
281
-        build_verified_html_inventory_observation_reason(chapter_path),
282
-    )
283
-    assert tracker.check_tool_call(
284
-        "glob",
285
-        {"path": str(chapters), "pattern": "*.html"},
286
-    ) == (
287
-        True,
288
-        build_verified_html_inventory_observation_reason(chapters),
289
-    )
290
-    assert tracker.check_tool_call(
291
-        "bash",
292
-        {"command": f"head -20 {chapter_path}"},
293
-    ) == (
294
-        True,
295
-        build_verified_html_inventory_observation_reason(chapter_path),
296
-    )
297
-
298
-
299215
 def test_action_tracker_allows_one_interleaved_reread_without_changes(tmp_path) -> None:
300216
     tracker = ActionTracker()
301217
     index_path = tmp_path / "index.html"
@@ -372,7 +288,7 @@ def test_action_tracker_blocks_second_target_index_reread_after_chapter_discover
372288
     is_duplicate, reason = tracker.check_tool_call("read", {"file_path": str(index_path)})
373289
 
374290
     assert is_duplicate is True
375
-    assert "reuse that file/title evidence" in reason
291
+    assert "reuse the earlier read result instead of rereading" in reason
376292
 
377293
 
378294
 def test_action_tracker_blocks_repeated_chapter_directory_search_once_titles_are_known(
@@ -383,14 +299,12 @@ def test_action_tracker_blocks_repeated_chapter_directory_search_once_titles_are
383299
     search_args = {"pattern": "*.html", "path": str(chapters)}
384300
 
385301
     tracker.record_tool_call("glob", search_args)
386
-    tracker.record_tool_call("read", {"file_path": str(chapters / "01-introduction.html")})
387
-    tracker.record_tool_call("read", {"file_path": str(chapters / "02-setup.html")})
388
-    tracker.record_tool_call("read", {"file_path": str(chapters / "03-basics.html")})
302
+    tracker.record_tool_call("glob", search_args)
389303
 
390304
     is_duplicate, reason = tracker.check_tool_call("glob", search_args)
391305
 
392306
     assert is_duplicate is True
393
-    assert "reuse that filename/title evidence" in reason
307
+    assert "reuse the earlier search result instead of rerunning it" in reason
394308
 
395309
 
396310
 def test_action_tracker_allows_repeated_read_after_mutation(tmp_path) -> None:
@@ -490,8 +404,8 @@ def test_pre_action_validator_blocks_index_edit_with_missing_chapter_href(tmp_pa
490404
     )
491405
 
492406
     assert result.valid is False
493
-    assert result.reason == "Edited TOC references chapter files that do not exist"
494
-    assert "chapters/05-input-output.html = Chapter 5: Input and Output" in result.suggestion
407
+    assert result.reason == "Edited HTML links point to files that do not exist"
408
+    assert "chapters/05-control-structures.html" in result.suggestion
495409
 
496410
 
497411
 def test_pre_action_validator_blocks_index_edit_with_title_mismatch(tmp_path) -> None:
@@ -512,12 +426,111 @@ def test_pre_action_validator_blocks_index_edit_with_title_mismatch(tmp_path) ->
512426
         },
513427
     )
514428
 
429
+    assert result.valid is True
430
+
431
+
432
+def test_pre_action_validator_allows_chapter_write_with_future_target_declared_by_index(
433
+    tmp_path: Path,
434
+) -> None:
435
+    validator = PreActionValidator()
436
+    guide = tmp_path / "guide"
437
+    chapters = guide / "chapters"
438
+    chapters.mkdir(parents=True)
439
+    (guide / "index.html").write_text(
440
+        "\n".join(
441
+            [
442
+                '<a href="chapters/introduction.html">Introduction</a>',
443
+                '<a href="chapters/installation.html">Installation</a>',
444
+                "",
445
+            ]
446
+        )
447
+    )
448
+
449
+    result = validator.validate(
450
+        "write",
451
+        {
452
+            "file_path": str(chapters / "introduction.html"),
453
+            "content": '<a href="installation.html">Next</a>\n',
454
+        },
455
+    )
456
+
457
+    assert result.valid is True
458
+
459
+
460
+def test_pre_action_validator_blocks_chapter_write_with_undeclared_missing_sibling(
461
+    tmp_path: Path,
462
+) -> None:
463
+    validator = PreActionValidator()
464
+    guide = tmp_path / "guide"
465
+    chapters = guide / "chapters"
466
+    chapters.mkdir(parents=True)
467
+    (guide / "index.html").write_text(
468
+        "\n".join(
469
+            [
470
+                '<a href="chapters/introduction.html">Introduction</a>',
471
+                '<a href="chapters/installation.html">Installation</a>',
472
+                '<a href="chapters/configuration.html">Configuration</a>',
473
+                '<a href="chapters/usage.html">Usage</a>',
474
+                '<a href="chapters/troubleshooting.html">Troubleshooting</a>',
475
+                "",
476
+            ]
477
+        )
478
+    )
479
+    (chapters / "introduction.html").write_text('<a href="installation.html">Next</a>\n')
480
+    (chapters / "installation.html").write_text('<a href="configuration.html">Next</a>\n')
481
+    (chapters / "configuration.html").write_text('<a href="usage.html">Next</a>\n')
482
+
483
+    result = validator.validate(
484
+        "write",
485
+        {
486
+            "file_path": str(chapters / "usage.html"),
487
+            "content": '<a href="advanced.html">Next</a>\n',
488
+        },
489
+    )
490
+
515491
     assert result.valid is False
516
-    assert result.reason == "Edited TOC labels do not match the linked chapter titles"
517492
     assert (
518
-        "chapters/12-troubleshooting-tips.html = Chapter 12: Troubleshooting and Tips"
519
-        in result.suggestion
493
+        result.reason
494
+        == "HTML page introduces new local targets outside the current declared artifact set"
520495
     )
496
+    assert "advanced.html" in result.suggestion
497
+
498
+
499
+def test_pre_action_validator_blocks_missing_numbered_read_with_existing_sibling(
500
+    tmp_path: Path,
501
+) -> None:
502
+    validator = PreActionValidator()
503
+    chapters = tmp_path / "chapters"
504
+    chapters.mkdir()
505
+    (chapters / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
506
+
507
+    result = validator.validate(
508
+        "read",
509
+        {"file_path": str(chapters / "01-introduction.html")},
510
+    )
511
+
512
+    assert result.valid is False
513
+    assert result.reason == "Read target conflicts with an existing numbered sibling"
514
+    assert "01-getting-started.html" in result.suggestion
515
+
516
+
517
+def test_pre_action_validator_blocks_new_numbered_sibling_drift(tmp_path) -> None:
518
+    validator = PreActionValidator()
519
+    chapters = tmp_path / "chapters"
520
+    chapters.mkdir()
521
+    (chapters / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
522
+
523
+    result = validator.validate(
524
+        "write",
525
+        {
526
+            "file_path": str(chapters / "01-intro.html"),
527
+            "content": "<h1>Intro</h1>\n",
528
+        },
529
+    )
530
+
531
+    assert result.valid is False
532
+    assert result.reason == "New file conflicts with an existing numbered sibling"
533
+    assert "01-getting-started.html" in result.suggestion
521534
 
522535
 
523536
 def test_format_html_inventory_entry_handles_tmp_alias_paths() -> None:
tests/test_tool_batch_policies.pymodified
@@ -373,8 +373,6 @@ async def test_tool_batch_recovery_controller_includes_known_state_for_missing_f
373373
     assert "Prefer edit/write/patch on the target file" in follow_up.content
374374
     assert "04-variables.html" in follow_up.content
375375
     assert "02-basic-syntax.html -> 02-setup.html" in follow_up.content
376
-    assert "02-setup.html = Chapter 2: Setting Up Fortran" in follow_up.content
377
-    assert "/Users/mfwolffe/Loader/guides/fortran/index.html" in follow_up.content
378376
     assert any(event.type == "recovery" for event in events)
379377
 
380378
 
@@ -430,7 +428,6 @@ async def test_tool_batch_recovery_controller_suggests_known_sibling_files(
430428
     assert follow_up is not None
431429
     assert "## LIKELY FILE CANDIDATES" in follow_up.content
432430
     assert "`04-variables.html`" in follow_up.content
433
-    assert "Chapter 4: Variables and Data Types" in follow_up.content
434431
     assert "instead of retrying the missing path" in follow_up.content
435432
 
436433
 
@@ -506,17 +503,79 @@ async def test_tool_batch_recovery_controller_includes_current_html_target_excer
506503
 
507504
     assert follow_up is not None
508505
     assert "## CURRENT TARGET EXCERPT" in follow_up.content
509
-    assert "Verified chapter inventory:" in follow_up.content
510
-    assert "<ul class=\"chapter-list\">" in follow_up.content
511
-    assert "chapters/02-setup.html = Chapter 2: Setting Up Your Environment" in follow_up.content
512
-    assert "Suggested replacement block:" in follow_up.content
513
-    assert '<li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>' in follow_up.content
514
-    assert "Exact edit guidance:" in follow_up.content
515
-    assert "old_string: use the Current TOC block above exactly" in follow_up.content
516
-    assert "new_string: use the Suggested replacement block above exactly" in follow_up.content
517
-    assert "Do not rewrite the whole file." in follow_up.content
518
-    assert "Suggested edit call:" in follow_up.content
519
-    assert 'old_string="""' in follow_up.content
506
+    assert "- Target file:" in follow_up.content
507
+    assert "index.html" in follow_up.content
508
+    assert (
509
+        "Closest on-disk block to the requested patch:" in follow_up.content
510
+        or "Current file contents near the requested patch location:" in follow_up.content
511
+    )
512
+    assert '1 | <h2>Table of Contents</h2>' in follow_up.content
513
+    assert (
514
+        '3 |     <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>'
515
+        in follow_up.content
516
+    )
517
+    assert "Use the exact on-disk text above" in follow_up.content
518
+    assert "Verified chapter inventory:" not in follow_up.content
519
+
520
+
521
+@pytest.mark.asyncio
522
+async def test_tool_batch_recovery_controller_includes_current_target_excerpt_for_edit_mismatch(
523
+    temp_dir: Path,
524
+) -> None:
525
+    async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
526
+        raise AssertionError("Confidence should not run here")
527
+
528
+    async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
529
+        raise AssertionError("Verification should not run here")
530
+
531
+    guide = temp_dir / "guide.md"
532
+    guide.write_text(
533
+        "# Loader Guide\n"
534
+        "\n"
535
+        "## Overview\n"
536
+        "Loader helps agentic coding workflows.\n"
537
+        "\n"
538
+        "## Status\n"
539
+        "The runtime is stable.\n"
540
+    )
541
+
542
+    context = build_context(
543
+        temp_dir=temp_dir,
544
+        messages=[],
545
+        assess_confidence=assess_confidence,
546
+        verify_action=verify_action,
547
+    )
548
+    context.session.current_task = "Update guide.md to mention the runtime is resilient."
549
+    controller = ToolBatchRecoveryController(context)
550
+    tool_call = ToolCall(
551
+        id="edit-guide",
552
+        name="edit",
553
+        arguments={
554
+            "file_path": str(guide),
555
+            "old_string": "## Runtime\nThe runtime is stable.\n",
556
+            "new_string": "## Runtime\nThe runtime is resilient.\n",
557
+        },
558
+    )
559
+    outcome = tool_outcome(
560
+        tool_call=tool_call,
561
+        output="old_string not found in file. Make sure it matches exactly.",
562
+        is_error=True,
563
+    )
564
+
565
+    follow_up = await controller.build_follow_up(
566
+        tool_call=tool_call,
567
+        outcome=outcome,
568
+        emit=lambda event: _noop_emit(event),
569
+    )
570
+
571
+    assert follow_up is not None
572
+    assert "## CURRENT TARGET EXCERPT" in follow_up.content
573
+    assert "- Target file:" in follow_up.content
574
+    assert "guide.md" in follow_up.content
575
+    assert "Closest on-disk block to the requested edit:" in follow_up.content
576
+    assert "6 | ## Status" in follow_up.content
577
+    assert "7 | The runtime is stable." in follow_up.content
578
+    assert "replace the containing block in one edit" in follow_up.content
520579
 
521580
 
522581
 @pytest.mark.asyncio
@@ -610,6 +669,94 @@ async def test_tool_batch_recovery_controller_scopes_known_state_to_active_targe
610669
     ) not in follow_up.content
611670
 
612671
 
672
+@pytest.mark.asyncio
673
+async def test_tool_batch_recovery_controller_prioritizes_active_verification_repair_target(
674
+    temp_dir: Path,
675
+) -> None:
676
+    async def assess_confidence(
677
+        tool_name: str,
678
+        tool_args: dict,
679
+        context: str,
680
+    ) -> ConfidenceAssessment:
681
+        raise AssertionError("Confidence should not run here")
682
+
683
+    async def verify_action(
684
+        tool_name: str,
685
+        tool_args: dict,
686
+        result: str,
687
+        expected: str = "",
688
+    ) -> ActionVerification:
689
+        raise AssertionError("Verification should not run here")
690
+
691
+    nginx_root = temp_dir / "Loader" / "guides" / "nginx"
692
+    chapters = nginx_root / "chapters"
693
+    chapters.mkdir(parents=True)
694
+    index = nginx_root / "index.html"
695
+    index.write_text(
696
+        "<ul>\n"
697
+        '  <li><a href="chapters/01-introduction.html">Introduction</a></li>\n'
698
+        "</ul>\n"
699
+    )
700
+    (chapters / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
701
+
702
+    repair_message = (
703
+        "[DEFINITION OF DONE CHECK FAILED]\n"
704
+        "Repair focus:\n"
705
+        f"- Fix the broken local reference `chapters/01-introduction.html` in `{index}`.\n"
706
+        f"- Immediate next step: edit `{index}`.\n"
707
+        f"- If the broken reference should remain, create `{chapters / '01-introduction.html'}`; "
708
+        "otherwise remove or replace `chapters/01-introduction.html`.\n"
709
+        "- Do not reread unrelated reference materials or restart discovery while this "
710
+        "concrete repair target is unresolved.\n"
711
+    )
712
+
713
+    context = build_context(
714
+        temp_dir=temp_dir,
715
+        messages=[
716
+            Message(role=Role.USER, content=repair_message),
717
+            Message(
718
+                role=Role.TOOL,
719
+                content=(
720
+                    "Observation [glob]: Result: "
721
+                    f"{chapters / '01-getting-started.html'}"
722
+                ),
723
+            ),
724
+        ],
725
+        assess_confidence=assess_confidence,
726
+        verify_action=verify_action,
727
+    )
728
+    context.session.current_task = (  # type: ignore[attr-defined]
729
+        "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
730
+        "for the structure and cadence of the guide. We are going to make an all "
731
+        "new equally thorough guide on how to use the nginx tool."
732
+    )
733
+    controller = ToolBatchRecoveryController(context)
734
+    tool_call = ToolCall(
735
+        id="read-bad-path",
736
+        name="read",
737
+        arguments={"path": "~/nginx-guide/chapter1.html"},
738
+    )
739
+    outcome = tool_outcome(
740
+        tool_call=tool_call,
741
+        output="File not found: ~/nginx-guide/chapter1.html",
742
+        is_error=True,
743
+    )
744
+
745
+    follow_up = await controller.build_follow_up(
746
+        tool_call=tool_call,
747
+        outcome=outcome,
748
+        emit=lambda event: _noop_emit(event),
749
+    )
750
+
751
+    assert follow_up is not None
752
+    assert "## ACTIVE REPAIR TARGET" in follow_up.content
753
+    assert str(index) in follow_up.content
754
+    assert "chapters/01-introduction.html" in follow_up.content
755
+    assert "Do not go back to the original reference guide" in follow_up.content
756
+    assert "Current task: Have a look at ~/Loader/guides/fortran" not in follow_up.content
757
+    assert "~/nginx-guide/chapter1.html" in follow_up.content
758
+
759
+
613760
 @pytest.mark.asyncio
614761
 async def test_tool_batch_recovery_controller_reuses_context_for_related_missing_files(
615762
     temp_dir: Path,
@@ -671,6 +818,71 @@ async def test_tool_batch_recovery_controller_reuses_context_for_related_missing
671818
     assert "02-basic-syntax.html" in follow_up.content
672819
 
673820
 
821
+@pytest.mark.asyncio
822
+async def test_tool_batch_recovery_controller_uses_generic_loop_guidance(
823
+    temp_dir: Path,
824
+) -> None:
825
+    async def assess_confidence(
826
+        tool_name: str,
827
+        tool_args: dict,
828
+        context: str,
829
+    ) -> ConfidenceAssessment:
830
+        raise AssertionError("Confidence should not run here")
831
+
832
+    async def verify_action(
833
+        tool_name: str,
834
+        tool_args: dict,
835
+        result: str,
836
+        expected: str = "",
837
+    ) -> ActionVerification:
838
+        raise AssertionError("Verification should not run here")
839
+
840
+    existing = RecoveryContext(
841
+        original_tool="read",
842
+        original_args={"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
843
+        max_retries=3,
844
+    )
845
+    existing.add_attempt(
846
+        "read",
847
+        {"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
848
+        "File not found: ~/Loader/guides/nginx/chapters/01-introduction.html",
849
+    )
850
+    context = build_context(
851
+        temp_dir=temp_dir,
852
+        messages=[],
853
+        assess_confidence=assess_confidence,
854
+        verify_action=verify_action,
855
+        recovery_context=existing,
856
+    )
857
+    controller = ToolBatchRecoveryController(context)
858
+    tool_call = ToolCall(
859
+        id="read-missing-repeat",
860
+        name="read",
861
+        arguments={"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
862
+    )
863
+    outcome = tool_outcome(
864
+        tool_call=tool_call,
865
+        output="File not found: ~/Loader/guides/nginx/chapters/01-introduction.html",
866
+        is_error=True,
867
+    )
868
+    events: list[AgentEvent] = []
869
+
870
+    async def emit(event: AgentEvent) -> None:
871
+        events.append(event)
872
+
873
+    follow_up = await controller.build_follow_up(
874
+        tool_call=tool_call,
875
+        outcome=outcome,
876
+        emit=emit,
877
+    )
878
+
879
+    assert follow_up is not None
880
+    assert any(event.type == "error" for event in events)
881
+    error_event = next(event for event in events if event.type == "error")
882
+    assert "read a config file first" not in error_event.content
883
+    assert "verify the current result" in error_event.content
884
+
885
+
674886
 @pytest.mark.asyncio
675887
 async def test_tool_batch_recovery_controller_resets_context_for_unrelated_failures(
676888
     temp_dir: Path,
tests/test_tool_batches.pymodified
2354 lines changed — click to load
@@ -27,7 +27,12 @@ from loader.runtime.reasoning_types import (
2727
     ConfidenceLevel,
2828
 )
2929
 from loader.runtime.recovery import RecoveryContext
30
-from loader.runtime.tool_batches import ToolBatchRunner
30
+from loader.runtime.tool_batches import (
31
+    ToolBatchRunner,
32
+)
33
+from loader.runtime.tool_batches import (
34
+    _should_prioritize_missing_artifact as tool_batches_should_prioritize_missing_artifact,
35
+)
3136
 from loader.runtime.workflow import sync_todos_to_definition_of_done
3237
 from loader.tools.base import ToolResult as RegistryToolResult
3338
 from loader.tools.base import create_default_registry
@@ -610,6 +615,26 @@ async def test_tool_batch_runner_queues_duplicate_observation_nudge(
610615
         verify_action=verify_action,
611616
         auto_recover=False,
612617
     )
618
+    (temp_dir / "chapters").mkdir()
619
+    (temp_dir / "index.html").write_text("<ul></ul>\n")
620
+    (temp_dir / "chapters" / "01-introduction.html").write_text("<h1>Intro</h1>\n")
621
+    (temp_dir / "chapters" / "02-setup.html").write_text("<h1>Setup</h1>\n")
622
+    (temp_dir / "chapters" / "03-basics.html").write_text("<h1>Basics</h1>\n")
623
+    implementation_plan = temp_dir / "implementation.md"
624
+    implementation_plan.write_text(
625
+        "\n".join(
626
+            [
627
+                "# Implementation Plan",
628
+                "",
629
+                "## File Changes",
630
+                f"- `{temp_dir / 'index.html'}`",
631
+                f"- `{temp_dir / 'chapters' / '01-introduction.html'}`",
632
+                f"- `{temp_dir / 'chapters' / '02-setup.html'}`",
633
+                f"- `{temp_dir / 'chapters' / '03-basics.html'}`",
634
+                f"- `{temp_dir / 'chapters' / '04-variables.html'}`",
635
+            ]
636
+        )
637
+    )
613638
     context.session.current_task = (
614639
         f"Update {temp_dir / 'index.html'} with the right chapter links."
615640
     )
@@ -644,13 +669,16 @@ async def test_tool_batch_runner_queues_duplicate_observation_nudge(
644669
     )
645670
 
646671
     summary = TurnSummary(final_response="")
672
+    dod = create_definition_of_done("Fix the chapter links")
673
+    dod.implementation_plan = str(implementation_plan)
674
+    dod.pending_items.append("Create the remaining chapter files")
647675
     await runner.execute_batch(
648676
         tool_calls=[tool_call],
649677
         tool_source="assistant",
650678
         pending_tool_calls_seen=set(),
651679
         emit=_noop_emit,
652680
         summary=summary,
653
-        dod=create_definition_of_done("Fix the chapter links"),
681
+        dod=dod,
654682
         executor=executor,  # type: ignore[arg-type]
655683
         on_confirmation=None,
656684
         on_user_question=None,
@@ -660,8 +688,128 @@ async def test_tool_batch_runner_queues_duplicate_observation_nudge(
660688
 
661689
     assert len(queued_messages) == 1
662690
     assert "Reuse the earlier observation instead of repeating it." in queued_messages[0]
663
-    assert "01-introduction.html = Chapter 1: Introduction to Fortran" in queued_messages[0]
664
-    assert "index.html" in queued_messages[0]
691
+    assert "Continue with the next pending item: `Create the remaining chapter files`." in queued_messages[0]
692
+    assert "Resume by creating `04-variables.html` now." in queued_messages[0]
693
+    assert f"Prefer one `write` call for `{temp_dir / 'chapters' / '04-variables.html'}` instead of more rereads." in queued_messages[0]
694
+
695
+
696
+@pytest.mark.asyncio
697
+async def test_tool_batch_runner_todo_write_does_not_regress_completed_file_todo(
698
+    temp_dir: Path,
699
+) -> None:
700
+    async def assess_confidence(
701
+        tool_name: str,
702
+        tool_args: dict,
703
+        context: str,
704
+    ) -> ConfidenceAssessment:
705
+        raise AssertionError("Confidence scoring should not run for this scenario")
706
+
707
+    async def verify_action(
708
+        tool_name: str,
709
+        tool_args: dict,
710
+        result: str,
711
+        expected: str = "",
712
+    ) -> ActionVerification:
713
+        raise AssertionError("Verification should not run for this scenario")
714
+
715
+    context = build_context(
716
+        temp_dir=temp_dir,
717
+        messages=[],
718
+        safeguards=FakeSafeguards(),
719
+        assess_confidence=assess_confidence,
720
+        verify_action=verify_action,
721
+        auto_recover=False,
722
+    )
723
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
724
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
725
+    sync_todos_to_definition_of_done(
726
+        dod,
727
+        [
728
+            {
729
+                "content": "Create 03-first-website.html",
730
+                "active_form": "Creating 03-first-website.html",
731
+                "status": "pending",
732
+            },
733
+            {
734
+                "content": "Create 04-configuration-basics.html",
735
+                "active_form": "Creating 04-configuration-basics.html",
736
+                "status": "pending",
737
+            },
738
+        ],
739
+    )
740
+
741
+    chapter_path = temp_dir / "guides" / "nginx" / "chapters" / "03-first-website.html"
742
+    chapter_path.parent.mkdir(parents=True)
743
+    write_call = ToolCall(
744
+        id="write-ch3",
745
+        name="write",
746
+        arguments={"file_path": str(chapter_path), "content": "<html></html>\n"},
747
+    )
748
+    stale_todo_call = ToolCall(
749
+        id="todo-stale",
750
+        name="TodoWrite",
751
+        arguments={
752
+            "todos": [
753
+                {
754
+                    "content": "Create 03-first-website.html",
755
+                    "active_form": "Creating 03-first-website.html",
756
+                    "status": "pending",
757
+                },
758
+                {
759
+                    "content": "Create 04-configuration-basics.html",
760
+                    "active_form": "Creating 04-configuration-basics.html",
761
+                    "status": "pending",
762
+                },
763
+            ]
764
+        },
765
+    )
766
+    executor = FakeExecutor(
767
+        [
768
+            tool_outcome(
769
+                tool_call=write_call,
770
+                output=f"Successfully wrote {chapter_path}",
771
+                is_error=False,
772
+            ),
773
+            tool_outcome(
774
+                tool_call=stale_todo_call,
775
+                output="Todos updated",
776
+                is_error=False,
777
+                metadata={
778
+                    "new_todos": [
779
+                        {
780
+                            "content": "Create 03-first-website.html",
781
+                            "active_form": "Creating 03-first-website.html",
782
+                            "status": "pending",
783
+                        },
784
+                        {
785
+                            "content": "Create 04-configuration-basics.html",
786
+                            "active_form": "Creating 04-configuration-basics.html",
787
+                            "status": "pending",
788
+                        },
789
+                    ]
790
+                },
791
+            ),
792
+        ]
793
+    )
794
+
795
+    summary = TurnSummary(final_response="")
796
+    await runner.execute_batch(
797
+        tool_calls=[write_call, stale_todo_call],
798
+        tool_source="assistant",
799
+        pending_tool_calls_seen=set(),
800
+        emit=_noop_emit,
801
+        summary=summary,
802
+        dod=dod,
803
+        executor=executor,  # type: ignore[arg-type]
804
+        on_confirmation=None,
805
+        on_user_question=None,
806
+        emit_confirmation=None,
807
+        consecutive_errors=0,
808
+    )
809
+
810
+    assert "Create 03-first-website.html" in dod.completed_items
811
+    assert "Create 03-first-website.html" not in dod.pending_items
812
+    assert "Create 04-configuration-basics.html" in dod.pending_items
665813
 
666814
 
667815
 @pytest.mark.asyncio
@@ -742,15 +890,9 @@ async def test_tool_batch_runner_proactively_queues_verified_html_inventory(
742890
         consecutive_errors=0,
743891
     )
744892
 
745
-    assert len(queued_messages) == 1
746
-    assert "verified sibling inventory" in queued_messages[0]
747
-    assert "chapters/01-introduction.html = Chapter 1: Introduction to Fortran" in queued_messages[0]
748
-    assert str(temp_dir / "index.html") in queued_messages[0]
893
+    assert queued_messages == []
749894
     assert len(summary.tool_result_messages) == 1
750
-    assert (
751
-        "Verified chapter inventory: chapters/01-introduction.html = Chapter 1: Introduction to Fortran"
752
-        in summary.tool_result_messages[0].content
753
-    )
895
+    assert "Verified chapter inventory:" not in summary.tool_result_messages[0].content
754896
 
755897
 
756898
 @pytest.mark.asyncio
@@ -845,15 +987,11 @@ async def test_tool_batch_runner_marks_validated_html_toc_completion_after_succe
845987
         consecutive_errors=0,
846988
     )
847989
 
848
-    assert any(
849
-        "Semantic verification preview: validated 2 toc links in index.html"
850
-        in message.content
990
+    assert all(
991
+        "Semantic verification preview:" not in message.content
851992
         for message in summary.tool_result_messages
852993
     )
853
-    assert len(queued_messages) == 1
854
-    assert "already satisfies the verified link/title constraints" in queued_messages[0]
855
-    assert f"`{index_path}`" in queued_messages[0]
856
-    assert f"`{chapters}`" in queued_messages[0]
994
+    assert queued_messages == []
857995
 
858996
 
859997
 @pytest.mark.asyncio
@@ -1166,7 +1304,7 @@ async def test_tool_batch_runner_duplicate_reference_read_prefers_next_pending_t
11661304
 
11671305
 
11681306
 @pytest.mark.asyncio
1169
-async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
1307
+async def test_tool_batch_runner_duplicate_read_ignores_unplanned_expansion_after_plan_complete(
11701308
     temp_dir: Path,
11711309
 ) -> None:
11721310
     async def assess_confidence(
@@ -1174,7 +1312,7 @@ async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
11741312
         tool_args: dict,
11751313
         context: str,
11761314
     ) -> ConfidenceAssessment:
1177
-        raise AssertionError("Confidence scoring should be disabled in this scenario")
1315
+        raise AssertionError("Confidence scoring should not run for this scenario")
11781316
 
11791317
     async def verify_action(
11801318
         tool_name: str,
@@ -1184,9 +1322,33 @@ async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
11841322
     ) -> ActionVerification:
11851323
         raise AssertionError("Verification should not run for this scenario")
11861324
 
1187
-    reference = temp_dir / "fortran" / "index.html"
1188
-    reference.parent.mkdir(parents=True)
1189
-    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
1325
+    guide_root = temp_dir / "guides" / "nginx"
1326
+    chapters = guide_root / "chapters"
1327
+    guide_root.mkdir(parents=True)
1328
+    chapters.mkdir()
1329
+    index_path = guide_root / "index.html"
1330
+    chapter_one = chapters / "01-getting-started.html"
1331
+    chapter_two = chapters / "02-installation.html"
1332
+    index_path.write_text("<html></html>\n")
1333
+    chapter_one.write_text("<h1>One</h1>\n")
1334
+    chapter_two.write_text("<h1>Two</h1>\n")
1335
+
1336
+    implementation_plan = temp_dir / "implementation.md"
1337
+    implementation_plan.write_text(
1338
+        "\n".join(
1339
+            [
1340
+                "# Implementation Plan",
1341
+                "",
1342
+                "## File Changes",
1343
+                f"- `{guide_root}/`",
1344
+                f"- `{chapters}/`",
1345
+                f"- `{index_path}`",
1346
+                f"- `{chapter_one}`",
1347
+                f"- `{chapter_two}`",
1348
+                "",
1349
+            ]
1350
+        )
1351
+    )
11901352
 
11911353
     context = build_context(
11921354
         temp_dir=temp_dir,
@@ -1200,32 +1362,36 @@ async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
12001362
     context.queue_steering_message_callback = queued_messages.append
12011363
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
12021364
     dod = create_definition_of_done("Create a multi-file nginx guide.")
1203
-    sync_todos_to_definition_of_done(
1204
-        dod,
1205
-        [
1206
-            {
1207
-                "content": "Examine the existing Fortran guide structure to understand the cadence and format",
1208
-                "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
1209
-                "status": "pending",
1210
-            },
1211
-            {
1212
-                "content": "Create the nginx index.html file",
1213
-                "active_form": "Working on: Create the nginx index.html file",
1214
-                "status": "pending",
1215
-            },
1216
-        ],
1217
-    )
1365
+    dod.implementation_plan = str(implementation_plan)
1366
+    dod.pending_items = [
1367
+        "Create 07-performance-tuning.html",
1368
+        "Verify all guide files are linked and complete",
1369
+        "Complete the requested work",
1370
+    ]
1371
+
12181372
     tool_call = ToolCall(
1219
-        id="read-reference",
1373
+        id="read-dup",
12201374
         name="read",
1221
-        arguments={"file_path": str(reference)},
1375
+        arguments={"file_path": str(chapter_one)},
1376
+    )
1377
+    duplicate_message = (
1378
+        "[Skipped - duplicate action: Already read "
1379
+        f"{chapter_one} recently without any intervening changes; "
1380
+        "reuse the earlier read result instead of rereading]"
12221381
     )
12231382
     executor = FakeExecutor(
12241383
         [
1225
-            tool_outcome(
1384
+            ToolExecutionOutcome(
12261385
                 tool_call=tool_call,
1227
-                output="<h1>Fortran Beginner's Guide</h1>\n",
1386
+                state=ToolExecutionState.DUPLICATE,
1387
+                message=Message.tool_result_message(
1388
+                    tool_call_id=tool_call.id,
1389
+                    display_content=duplicate_message,
1390
+                    result_content=duplicate_message,
1391
+                ),
1392
+                event_content=duplicate_message,
12281393
                 is_error=False,
1394
+                result_output=duplicate_message,
12291395
             )
12301396
         ]
12311397
     )
@@ -1245,19 +1411,13 @@ async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
12451411
         consecutive_errors=0,
12461412
     )
12471413
 
1248
-    assert any(
1249
-        "Continue with the next pending item: `Create the nginx index.html file`"
1250
-        in message
1251
-        for message in queued_messages
1252
-    )
1253
-    assert any(
1254
-        "stop gathering more reference material and perform the change now" in message
1255
-        for message in queued_messages
1256
-    )
1414
+    assert len(queued_messages) == 1
1415
+    assert "Verify all guide files are linked and complete" in queued_messages[0]
1416
+    assert "Create 07-performance-tuning.html" not in queued_messages[0]
12571417
 
12581418
 
12591419
 @pytest.mark.asyncio
1260
-async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_valid(
1420
+async def test_tool_batch_runner_duplicate_read_after_plan_complete_pushes_verification_handoff(
12611421
     temp_dir: Path,
12621422
 ) -> None:
12631423
     async def assess_confidence(
@@ -1265,7 +1425,7 @@ async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_va
12651425
         tool_args: dict,
12661426
         context: str,
12671427
     ) -> ConfidenceAssessment:
1268
-        raise AssertionError("Confidence scoring should not run in this scenario")
1428
+        raise AssertionError("Confidence scoring should not run for this scenario")
12691429
 
12701430
     async def verify_action(
12711431
         tool_name: str,
@@ -1273,31 +1433,35 @@ async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_va
12731433
         result: str,
12741434
         expected: str = "",
12751435
     ) -> ActionVerification:
1276
-        raise AssertionError("Verification should not run in this scenario")
1436
+        raise AssertionError("Verification should not run for this scenario")
12771437
 
1278
-    prompt = (
1279
-        "Have a look at ~/Loader/guides/fortran/index.html, then "
1280
-        "~/Loader/guides/fortran/chapters. The table of contents links in "
1281
-        "index.html are inaccurate and the href’s are wrong. Let’s update the "
1282
-        "links and their link texts to be correct."
1283
-    )
1284
-    chapters = temp_dir / "chapters"
1438
+    guide_root = temp_dir / "guides" / "nginx"
1439
+    chapters = guide_root / "chapters"
1440
+    guide_root.mkdir(parents=True)
12851441
     chapters.mkdir()
1286
-    (chapters / "01-introduction.html").write_text(
1287
-        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
1288
-    )
1289
-    (chapters / "02-setup.html").write_text(
1290
-        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
1291
-    )
1292
-    current_block = (
1293
-        "<h2>Table of Contents</h2>\n"
1294
-        '        <ul class="chapter-list">\n'
1295
-        '            <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
1296
-        '            <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
1297
-        "        </ul>\n"
1442
+    index_path = guide_root / "index.html"
1443
+    chapter_one = chapters / "01-getting-started.html"
1444
+    chapter_two = chapters / "02-installation.html"
1445
+    index_path.write_text("<html></html>\n")
1446
+    chapter_one.write_text("<h1>One</h1>\n")
1447
+    chapter_two.write_text("<h1>Two</h1>\n")
1448
+
1449
+    implementation_plan = temp_dir / "implementation.md"
1450
+    implementation_plan.write_text(
1451
+        "\n".join(
1452
+            [
1453
+                "# Implementation Plan",
1454
+                "",
1455
+                "## File Changes",
1456
+                f"- `{guide_root}/`",
1457
+                f"- `{chapters}/`",
1458
+                f"- `{index_path}`",
1459
+                f"- `{chapter_one}`",
1460
+                f"- `{chapter_two}`",
1461
+                "",
1462
+            ]
1463
+        )
12981464
     )
1299
-    index_path = temp_dir / "index.html"
1300
-    index_path.write_text(current_block)
13011465
 
13021466
     context = build_context(
13031467
         temp_dir=temp_dir,
@@ -1307,40 +1471,52 @@ async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_va
13071471
         verify_action=verify_action,
13081472
         auto_recover=False,
13091473
     )
1310
-    context.session.current_task = prompt  # type: ignore[attr-defined]
13111474
     queued_messages: list[str] = []
13121475
     context.queue_steering_message_callback = queued_messages.append
13131476
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
1477
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
1478
+    dod.implementation_plan = str(implementation_plan)
1479
+    dod.verification_commands = [f"ls -la {guide_root}"]
1480
+    dod.pending_items = [
1481
+        "Create 07-performance-tuning.html",
1482
+        "Complete the requested work",
1483
+    ]
1484
+
13141485
     tool_call = ToolCall(
1315
-        id="edit-1",
1316
-        name="edit",
1317
-        arguments={
1318
-            "file_path": str(index_path),
1319
-            "old_string": current_block,
1320
-            "new_string": current_block,
1321
-        },
1486
+        id="read-dup",
1487
+        name="read",
1488
+        arguments={"file_path": str(chapter_one)},
1489
+    )
1490
+    duplicate_message = (
1491
+        "[Skipped - duplicate action: Already read "
1492
+        f"{chapter_one} recently without any intervening changes; "
1493
+        "reuse the earlier read result instead of rereading]"
13221494
     )
13231495
     executor = FakeExecutor(
13241496
         [
1325
-            tool_outcome(
1497
+            ToolExecutionOutcome(
13261498
                 tool_call=tool_call,
1327
-                output=(
1328
-                    "[Blocked - old_string and new_string are identical - no change "
1329
-                    "would occur] Suggestion: Provide different old and new strings"
1499
+                state=ToolExecutionState.DUPLICATE,
1500
+                message=Message.tool_result_message(
1501
+                    tool_call_id=tool_call.id,
1502
+                    display_content=duplicate_message,
1503
+                    result_content=duplicate_message,
13301504
                 ),
1331
-                is_error=True,
1332
-                state=ToolExecutionState.BLOCKED,
1505
+                event_content=duplicate_message,
1506
+                is_error=False,
1507
+                result_output=duplicate_message,
13331508
             )
13341509
         ]
13351510
     )
13361511
 
1512
+    summary = TurnSummary(final_response="")
13371513
     await runner.execute_batch(
13381514
         tool_calls=[tool_call],
13391515
         tool_source="assistant",
13401516
         pending_tool_calls_seen=set(),
13411517
         emit=_noop_emit,
1342
-        summary=TurnSummary(final_response=""),
1343
-        dod=create_definition_of_done(prompt),
1518
+        summary=summary,
1519
+        dod=dod,
13441520
         executor=executor,  # type: ignore[arg-type]
13451521
         on_confirmation=None,
13461522
         on_user_question=None,
@@ -1349,18 +1525,13 @@ async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_va
13491525
     )
13501526
 
13511527
     assert len(queued_messages) == 1
1352
-    assert "already matches the validated replacement block" in queued_messages[0]
1353
-    assert "validated 2 linked entries" in queued_messages[0]
1354
-    assert f"`{index_path}`" in queued_messages[0]
1355
-    assert "Do not call `edit`, `patch`, or reread the same TOC again" in queued_messages[0]
1356
-
1357
-
1358
-async def _noop_emit(event: AgentEvent) -> None:
1359
-    return None
1528
+    assert "All explicitly planned artifacts already exist." in queued_messages[0]
1529
+    assert "Move to verification or final confirmation using the files already on disk." in queued_messages[0]
1530
+    assert "Create 07-performance-tuning.html" not in queued_messages[0]
13601531
 
13611532
 
13621533
 @pytest.mark.asyncio
1363
-async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
1534
+async def test_tool_batch_runner_duplicate_read_after_plan_complete_ignores_stale_creation_todos(
13641535
     temp_dir: Path,
13651536
 ) -> None:
13661537
     async def assess_confidence(
@@ -1368,7 +1539,7 @@ async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
13681539
         tool_args: dict,
13691540
         context: str,
13701541
     ) -> ConfidenceAssessment:
1371
-        raise AssertionError("Confidence scoring should be disabled in this scenario")
1542
+        raise AssertionError("Confidence scoring should not run for this scenario")
13721543
 
13731544
     async def verify_action(
13741545
         tool_name: str,
@@ -1378,34 +1549,87 @@ async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
13781549
     ) -> ActionVerification:
13791550
         raise AssertionError("Verification should not run for this scenario")
13801551
 
1552
+    guide_root = temp_dir / "guides" / "nginx"
1553
+    chapters = guide_root / "chapters"
1554
+    guide_root.mkdir(parents=True)
1555
+    chapters.mkdir()
1556
+    index_path = guide_root / "index.html"
1557
+    chapter_one = chapters / "01-getting-started.html"
1558
+    chapter_two = chapters / "02-installation.html"
1559
+    index_path.write_text("<html></html>\n")
1560
+    chapter_one.write_text("<h1>One</h1>\n")
1561
+    chapter_two.write_text("<h1>Two</h1>\n")
1562
+
1563
+    implementation_plan = temp_dir / "implementation.md"
1564
+    implementation_plan.write_text(
1565
+        "\n".join(
1566
+            [
1567
+                "# Implementation Plan",
1568
+                "",
1569
+                "## File Changes",
1570
+                f"- `{guide_root}/`",
1571
+                f"- `{chapters}/`",
1572
+                f"- `{index_path}`",
1573
+                f"- `{chapter_one}`",
1574
+                f"- `{chapter_two}`",
1575
+                "",
1576
+            ]
1577
+        )
1578
+    )
1579
+
13811580
     context = build_context(
13821581
         temp_dir=temp_dir,
13831582
         messages=[],
13841583
         safeguards=FakeSafeguards(),
13851584
         assess_confidence=assess_confidence,
13861585
         verify_action=verify_action,
1586
+        auto_recover=False,
13871587
     )
1588
+    queued_messages: list[str] = []
1589
+    context.queue_steering_message_callback = queued_messages.append
13881590
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
1591
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
1592
+    dod.implementation_plan = str(implementation_plan)
1593
+    dod.verification_commands = [f"ls -la {guide_root}"]
1594
+    dod.pending_items = [
1595
+        "Create 01-getting-started.html",
1596
+        "Creating 02-installation.html",
1597
+        "Complete the requested work",
1598
+    ]
1599
+
13891600
     tool_call = ToolCall(
1390
-        id="write-1",
1391
-        name="write",
1392
-        arguments={"file_path": str(temp_dir / "README.md"), "content": "updated\n"},
1601
+        id="read-dup-built-stale",
1602
+        name="read",
1603
+        arguments={"file_path": str(chapter_one)},
1604
+    )
1605
+    duplicate_message = (
1606
+        "[Skipped - duplicate action: Already read "
1607
+        f"{chapter_one} recently without any intervening changes; "
1608
+        "reuse the earlier read result instead of rereading]"
13931609
     )
13941610
     executor = FakeExecutor(
1395
-        [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
1611
+        [
1612
+            ToolExecutionOutcome(
1613
+                tool_call=tool_call,
1614
+                state=ToolExecutionState.DUPLICATE,
1615
+                message=Message.tool_result_message(
1616
+                    tool_call_id=tool_call.id,
1617
+                    display_content=duplicate_message,
1618
+                    result_content=duplicate_message,
1619
+                ),
1620
+                event_content=duplicate_message,
1621
+                is_error=False,
1622
+                result_output=duplicate_message,
1623
+            )
1624
+        ]
13961625
     )
1397
-    summary = TurnSummary(final_response="")
1398
-    dod = create_definition_of_done("Update README and verify it still works.")
1399
-    events: list[AgentEvent] = []
1400
-
1401
-    async def emit(event: AgentEvent) -> None:
1402
-        events.append(event)
14031626
 
1627
+    summary = TurnSummary(final_response="")
14041628
     await runner.execute_batch(
14051629
         tool_calls=[tool_call],
14061630
         tool_source="assistant",
14071631
         pending_tool_calls_seen=set(),
1408
-        emit=emit,
1632
+        emit=_noop_emit,
14091633
         summary=summary,
14101634
         dod=dod,
14111635
         executor=executor,  # type: ignore[arg-type]
@@ -1415,10 +1639,1500 @@ async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
14151639
         consecutive_errors=0,
14161640
     )
14171641
 
1418
-    assert dod.last_verification_result == "planned"
1419
-    assert dod.verification_commands
1420
-    assert "Collect verification evidence" in dod.pending_items
1421
-    assert dod.active_verification_attempt_id == "verification-attempt-1"
1642
+    assert len(queued_messages) == 1
1643
+    assert "All explicitly planned artifacts already exist." in queued_messages[0]
1644
+    assert "Move to verification or final confirmation using the files already on disk." in queued_messages[0]
1645
+    assert "Create 01-getting-started.html" not in queued_messages[0]
1646
+    assert "Creating 02-installation.html" not in queued_messages[0]
1647
+
1648
+
1649
+@pytest.mark.asyncio
1650
+async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
1651
+    temp_dir: Path,
1652
+) -> None:
1653
+    async def assess_confidence(
1654
+        tool_name: str,
1655
+        tool_args: dict,
1656
+        context: str,
1657
+    ) -> ConfidenceAssessment:
1658
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
1659
+
1660
+    async def verify_action(
1661
+        tool_name: str,
1662
+        tool_args: dict,
1663
+        result: str,
1664
+        expected: str = "",
1665
+    ) -> ActionVerification:
1666
+        raise AssertionError("Verification should not run for this scenario")
1667
+
1668
+    reference = temp_dir / "fortran" / "index.html"
1669
+    reference.parent.mkdir(parents=True)
1670
+    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
1671
+
1672
+    context = build_context(
1673
+        temp_dir=temp_dir,
1674
+        messages=[],
1675
+        safeguards=FakeSafeguards(),
1676
+        assess_confidence=assess_confidence,
1677
+        verify_action=verify_action,
1678
+        auto_recover=False,
1679
+    )
1680
+    queued_messages: list[str] = []
1681
+    context.queue_steering_message_callback = queued_messages.append
1682
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
1683
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
1684
+    sync_todos_to_definition_of_done(
1685
+        dod,
1686
+        [
1687
+            {
1688
+                "content": "Examine the existing Fortran guide structure to understand the cadence and format",
1689
+                "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
1690
+                "status": "pending",
1691
+            },
1692
+            {
1693
+                "content": "Create the nginx index.html file",
1694
+                "active_form": "Working on: Create the nginx index.html file",
1695
+                "status": "pending",
1696
+            },
1697
+        ],
1698
+    )
1699
+    tool_call = ToolCall(
1700
+        id="read-reference",
1701
+        name="read",
1702
+        arguments={"file_path": str(reference)},
1703
+    )
1704
+    executor = FakeExecutor(
1705
+        [
1706
+            tool_outcome(
1707
+                tool_call=tool_call,
1708
+                output="<h1>Fortran Beginner's Guide</h1>\n",
1709
+                is_error=False,
1710
+            )
1711
+        ]
1712
+    )
1713
+
1714
+    summary = TurnSummary(final_response="")
1715
+    await runner.execute_batch(
1716
+        tool_calls=[tool_call],
1717
+        tool_source="assistant",
1718
+        pending_tool_calls_seen=set(),
1719
+        emit=_noop_emit,
1720
+        summary=summary,
1721
+        dod=dod,
1722
+        executor=executor,  # type: ignore[arg-type]
1723
+        on_confirmation=None,
1724
+        on_user_question=None,
1725
+        emit_confirmation=None,
1726
+        consecutive_errors=0,
1727
+    )
1728
+
1729
+    assert any(
1730
+        "Continue with the next pending item: `Create the nginx index.html file`"
1731
+        in message
1732
+        for message in queued_messages
1733
+    )
1734
+    assert any(
1735
+        "stop gathering more reference material and perform the change now" in message
1736
+        for message in queued_messages
1737
+    )
1738
+
1739
+
1740
+@pytest.mark.asyncio
1741
+async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_review(
1742
+    temp_dir: Path,
1743
+) -> None:
1744
+    async def assess_confidence(
1745
+        tool_name: str,
1746
+        tool_args: dict,
1747
+        context: str,
1748
+    ) -> ConfidenceAssessment:
1749
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
1750
+
1751
+    async def verify_action(
1752
+        tool_name: str,
1753
+        tool_args: dict,
1754
+        result: str,
1755
+        expected: str = "",
1756
+    ) -> ActionVerification:
1757
+        raise AssertionError("Verification should not run for this scenario")
1758
+
1759
+    guide_root = temp_dir / "guides" / "nginx"
1760
+    chapters = guide_root / "chapters"
1761
+    chapters.mkdir(parents=True)
1762
+    index_path = guide_root / "index.html"
1763
+    chapter_one = chapters / "01-getting-started.html"
1764
+    chapter_one.write_text("<h1>One</h1>\n")
1765
+    index_path.write_text("<a href=\"chapters/01-getting-started.html\">One</a>\n")
1766
+
1767
+    implementation_plan = temp_dir / "implementation.md"
1768
+    implementation_plan.write_text(
1769
+        "\n".join(
1770
+            [
1771
+                "# Implementation Plan",
1772
+                "",
1773
+                "## File Changes",
1774
+                f"- `{index_path}`",
1775
+                f"- `{chapter_one}`",
1776
+                f"- `{chapters / '06-ssl-configuration.html'}`",
1777
+                "",
1778
+            ]
1779
+        )
1780
+    )
1781
+
1782
+    context = build_context(
1783
+        temp_dir=temp_dir,
1784
+        messages=[],
1785
+        safeguards=FakeSafeguards(),
1786
+        assess_confidence=assess_confidence,
1787
+        verify_action=verify_action,
1788
+        auto_recover=False,
1789
+    )
1790
+    queued_messages: list[str] = []
1791
+    context.queue_steering_message_callback = queued_messages.append
1792
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
1793
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
1794
+    dod.implementation_plan = str(implementation_plan)
1795
+    sync_todos_to_definition_of_done(
1796
+        dod,
1797
+        [
1798
+            {
1799
+                "content": "Ensure all files are properly linked and formatted consistently",
1800
+                "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
1801
+                "status": "pending",
1802
+            },
1803
+            {
1804
+                "content": "Create the final chapter (06-ssl-configuration.html)",
1805
+                "active_form": "Working on: Create the final chapter (06-ssl-configuration.html)",
1806
+                "status": "pending",
1807
+            },
1808
+        ],
1809
+    )
1810
+    assert tool_batches_should_prioritize_missing_artifact(
1811
+        next_pending=dod.pending_items[0],
1812
+        missing_artifact=(chapters / "06-ssl-configuration.html", False),
1813
+    )
1814
+
1815
+    tool_call = ToolCall(
1816
+        id="dup-read",
1817
+        name="read",
1818
+        arguments={"file_path": str(index_path)},
1819
+    )
1820
+    runner._queue_duplicate_observation_nudge(tool_call, dod=dod)  # type: ignore[attr-defined]
1821
+
1822
+    assert queued_messages
1823
+    message = queued_messages[-1]
1824
+    assert "06-ssl-configuration.html" in message
1825
+    assert "Do not switch into review or consistency-check mode" in message
1826
+    assert (
1827
+        "Continue with the next pending item: `Ensure all files are properly linked and formatted consistently`"
1828
+        not in message
1829
+    )
1830
+
1831
+
1832
+@pytest.mark.asyncio
1833
+async def test_tool_batch_runner_hands_off_to_verification_once_planned_artifacts_exist(
1834
+    temp_dir: Path,
1835
+) -> None:
1836
+    async def assess_confidence(
1837
+        tool_name: str,
1838
+        tool_args: dict,
1839
+        context: str,
1840
+    ) -> ConfidenceAssessment:
1841
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
1842
+
1843
+    async def verify_action(
1844
+        tool_name: str,
1845
+        tool_args: dict,
1846
+        result: str,
1847
+        expected: str = "",
1848
+    ) -> ActionVerification:
1849
+        raise AssertionError("Verification should not run for this scenario")
1850
+
1851
+    guide_root = temp_dir / "guides" / "nginx"
1852
+    chapters = guide_root / "chapters"
1853
+    chapters.mkdir(parents=True)
1854
+    index_path = guide_root / "index.html"
1855
+    chapter_one = chapters / "01-getting-started.html"
1856
+    chapter_two = chapters / "02-installation.html"
1857
+    index_path.write_text("<a href=\"chapters/01-getting-started.html\">One</a>\n")
1858
+    chapter_one.write_text("<h1>One</h1>\n")
1859
+    chapter_two.write_text("<h1>Two</h1>\n")
1860
+
1861
+    implementation_plan = temp_dir / "implementation.md"
1862
+    implementation_plan.write_text(
1863
+        "\n".join(
1864
+            [
1865
+                "# Implementation Plan",
1866
+                "",
1867
+                "## File Changes",
1868
+                f"- `{chapters}/`",
1869
+                f"- `{index_path}`",
1870
+                f"- `{chapter_one}`",
1871
+                f"- `{chapter_two}`",
1872
+                "",
1873
+            ]
1874
+        )
1875
+    )
1876
+
1877
+    context = build_context(
1878
+        temp_dir=temp_dir,
1879
+        messages=[],
1880
+        safeguards=FakeSafeguards(),
1881
+        assess_confidence=assess_confidence,
1882
+        verify_action=verify_action,
1883
+        auto_recover=False,
1884
+    )
1885
+    queued_messages: list[str] = []
1886
+    context.queue_steering_message_callback = queued_messages.append
1887
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
1888
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
1889
+    dod.implementation_plan = str(implementation_plan)
1890
+    sync_todos_to_definition_of_done(
1891
+        dod,
1892
+        [
1893
+            {
1894
+                "content": "Create the guide files",
1895
+                "active_form": "Working on: Create the guide files",
1896
+                "status": "completed",
1897
+            },
1898
+            {
1899
+                "content": "Ensure all files are properly linked and formatted consistently",
1900
+                "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
1901
+                "status": "pending",
1902
+            },
1903
+        ],
1904
+    )
1905
+    tool_call = ToolCall(
1906
+        id="write-final",
1907
+        name="write",
1908
+        arguments={
1909
+            "file_path": str(chapter_two),
1910
+            "content": "<h1>Two</h1>\n",
1911
+        },
1912
+    )
1913
+    executor = FakeExecutor(
1914
+        [
1915
+            tool_outcome(
1916
+                tool_call=tool_call,
1917
+                output=f"Successfully wrote {chapter_two}",
1918
+                is_error=False,
1919
+            )
1920
+        ]
1921
+    )
1922
+
1923
+    summary = TurnSummary(final_response="")
1924
+    await runner.execute_batch(
1925
+        tool_calls=[tool_call],
1926
+        tool_source="assistant",
1927
+        pending_tool_calls_seen=set(),
1928
+        emit=_noop_emit,
1929
+        summary=summary,
1930
+        dod=dod,
1931
+        executor=executor,  # type: ignore[arg-type]
1932
+        on_confirmation=None,
1933
+        on_user_question=None,
1934
+        emit_confirmation=None,
1935
+        consecutive_errors=0,
1936
+    )
1937
+
1938
+    assert any(
1939
+        "All explicitly planned artifacts now exist." in message
1940
+        for message in queued_messages
1941
+    )
1942
+    assert any(
1943
+        "Ensure all files are properly linked and formatted consistently" in message
1944
+        for message in queued_messages
1945
+    )
1946
+    assert any(
1947
+        "Move to verification once no specific mismatch remains." in message
1948
+        for message in queued_messages
1949
+    )
1950
+
1951
+
1952
+@pytest.mark.asyncio
1953
+async def test_tool_batch_runner_mutation_handoff_points_at_next_missing_artifact(
1954
+    temp_dir: Path,
1955
+) -> None:
1956
+    async def assess_confidence(
1957
+        tool_name: str,
1958
+        tool_args: dict,
1959
+        context: str,
1960
+    ) -> ConfidenceAssessment:
1961
+        raise AssertionError("Confidence scoring should not run in this scenario")
1962
+
1963
+    async def verify_action(
1964
+        tool_name: str,
1965
+        tool_args: dict,
1966
+        result: str,
1967
+        expected: str = "",
1968
+    ) -> ActionVerification:
1969
+        raise AssertionError("Verification should not run in this scenario")
1970
+
1971
+    guide_root = temp_dir / "guides" / "nginx"
1972
+    chapters = guide_root / "chapters"
1973
+    guide_root.mkdir(parents=True)
1974
+    chapters.mkdir()
1975
+    index_path = guide_root / "index.html"
1976
+    index_path.write_text("<html></html>\n")
1977
+    chapter_one = chapters / "01-getting-started.html"
1978
+    chapter_two = chapters / "02-installation.html"
1979
+    implementation_plan = temp_dir / "implementation.md"
1980
+    implementation_plan.write_text(
1981
+        "\n".join(
1982
+            [
1983
+                "# Implementation Plan",
1984
+                "",
1985
+                "## File Changes",
1986
+                f"- `{guide_root}/`",
1987
+                f"- `{index_path}`",
1988
+                f"- `{chapter_one}`",
1989
+                f"- `{chapter_two}`",
1990
+                "",
1991
+            ]
1992
+        )
1993
+    )
1994
+
1995
+    context = build_context(
1996
+        temp_dir=temp_dir,
1997
+        messages=[],
1998
+        safeguards=FakeSafeguards(),
1999
+        assess_confidence=assess_confidence,
2000
+        verify_action=verify_action,
2001
+        auto_recover=False,
2002
+    )
2003
+    queued_messages: list[str] = []
2004
+    context.queue_steering_message_callback = queued_messages.append
2005
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2006
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2007
+    dod.implementation_plan = str(implementation_plan)
2008
+    sync_todos_to_definition_of_done(
2009
+        dod,
2010
+        [
2011
+            {
2012
+                "content": "Create the main index.html file with proper structure",
2013
+                "active_form": "Working on: Create the main index.html file with proper structure",
2014
+                "status": "pending",
2015
+            },
2016
+            {
2017
+                "content": "Create each chapter file in sequence, following the established pattern",
2018
+                "active_form": "Working on: Create each chapter file in sequence, following the established pattern",
2019
+                "status": "pending",
2020
+            },
2021
+            {
2022
+                "content": "Ensure all files are properly linked and formatted consistently",
2023
+                "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
2024
+                "status": "pending",
2025
+            },
2026
+        ],
2027
+    )
2028
+    tool_call = ToolCall(
2029
+        id="write-index",
2030
+        name="write",
2031
+        arguments={"file_path": str(index_path), "content": "<html></html>\n"},
2032
+    )
2033
+    executor = FakeExecutor(
2034
+        [tool_outcome(tool_call=tool_call, output=f"Successfully wrote {index_path}", is_error=False)]
2035
+    )
2036
+
2037
+    summary = TurnSummary(final_response="")
2038
+    await runner.execute_batch(
2039
+        tool_calls=[tool_call],
2040
+        tool_source="assistant",
2041
+        pending_tool_calls_seen=set(),
2042
+        emit=_noop_emit,
2043
+        summary=summary,
2044
+        dod=dod,
2045
+        executor=executor,  # type: ignore[arg-type]
2046
+        on_confirmation=None,
2047
+        on_user_question=None,
2048
+        emit_confirmation=None,
2049
+        consecutive_errors=0,
2050
+    )
2051
+
2052
+    assert queued_messages
2053
+    message = queued_messages[-1]
2054
+    assert "Resume by creating `01-getting-started.html` now." in message
2055
+    assert "refresh `TodoWrite`" in message
2056
+    assert "Do not move to verification, final confirmation, or TodoWrite-only bookkeeping" in message
2057
+    assert "Do not spend another turn on working notes or rediscovery alone." in message
2058
+
2059
+
2060
+@pytest.mark.asyncio
2061
+async def test_tool_batch_runner_large_plan_does_not_claim_completion_early(
2062
+    temp_dir: Path,
2063
+) -> None:
2064
+    async def assess_confidence(
2065
+        tool_name: str,
2066
+        tool_args: dict,
2067
+        context: str,
2068
+    ) -> ConfidenceAssessment:
2069
+        raise AssertionError("Confidence scoring should not run in this scenario")
2070
+
2071
+    async def verify_action(
2072
+        tool_name: str,
2073
+        tool_args: dict,
2074
+        result: str,
2075
+        expected: str = "",
2076
+    ) -> ActionVerification:
2077
+        raise AssertionError("Verification should not run in this scenario")
2078
+
2079
+    guide_root = temp_dir / "guides" / "nginx"
2080
+    chapters = guide_root / "chapters"
2081
+    guide_root.mkdir(parents=True)
2082
+    chapters.mkdir()
2083
+    index_path = guide_root / "index.html"
2084
+    index_path.write_text("<html></html>\n")
2085
+
2086
+    chapter_paths = [
2087
+        chapters / "01-getting-started.html",
2088
+        chapters / "02-installation.html",
2089
+        chapters / "03-first-website.html",
2090
+        chapters / "04-configuration-basics.html",
2091
+        chapters / "05-advanced-configurations.html",
2092
+        chapters / "06-performance-tuning.html",
2093
+        chapters / "07-security-best-practices.html",
2094
+    ]
2095
+    for chapter in chapter_paths[:4]:
2096
+        chapter.write_text(f"<h1>{chapter.stem}</h1>\n")
2097
+    chapter_paths[4].write_text("<h1>Advanced configurations</h1>\n")
2098
+
2099
+    implementation_plan = temp_dir / "implementation.md"
2100
+    implementation_plan.write_text(
2101
+        "\n".join(
2102
+            [
2103
+                "# Implementation Plan",
2104
+                "",
2105
+                "## File Changes",
2106
+                f"- `{guide_root}/`",
2107
+                f"- `{chapters}/`",
2108
+                f"- `{index_path}`",
2109
+                *[f"- `{path}`" for path in chapter_paths],
2110
+                "",
2111
+            ]
2112
+        )
2113
+    )
2114
+
2115
+    context = build_context(
2116
+        temp_dir=temp_dir,
2117
+        messages=[],
2118
+        safeguards=FakeSafeguards(),
2119
+        assess_confidence=assess_confidence,
2120
+        verify_action=verify_action,
2121
+        auto_recover=False,
2122
+    )
2123
+    queued_messages: list[str] = []
2124
+    context.queue_steering_message_callback = queued_messages.append
2125
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2126
+    dod = create_definition_of_done("Create a thorough nginx guide.")
2127
+    dod.implementation_plan = str(implementation_plan)
2128
+    sync_todos_to_definition_of_done(
2129
+        dod,
2130
+        [
2131
+            {
2132
+                "content": "Create the nginx guide artifacts",
2133
+                "active_form": "Creating nginx guide artifacts",
2134
+                "status": "pending",
2135
+            },
2136
+            {
2137
+                "content": "Verify all guide files are linked and complete",
2138
+                "active_form": "Verifying guide linkage and completeness",
2139
+                "status": "pending",
2140
+            },
2141
+        ],
2142
+    )
2143
+    tool_call = ToolCall(
2144
+        id="write-chapter-05",
2145
+        name="write",
2146
+        arguments={
2147
+            "file_path": str(chapter_paths[4]),
2148
+            "content": "<h1>Advanced configurations</h1>\n",
2149
+        },
2150
+    )
2151
+    executor = FakeExecutor(
2152
+        [
2153
+            tool_outcome(
2154
+                tool_call=tool_call,
2155
+                output=f"Successfully wrote {chapter_paths[4]}",
2156
+                is_error=False,
2157
+            )
2158
+        ]
2159
+    )
2160
+
2161
+    summary = TurnSummary(final_response="")
2162
+    await runner.execute_batch(
2163
+        tool_calls=[tool_call],
2164
+        tool_source="assistant",
2165
+        pending_tool_calls_seen=set(),
2166
+        emit=_noop_emit,
2167
+        summary=summary,
2168
+        dod=dod,
2169
+        executor=executor,  # type: ignore[arg-type]
2170
+        on_confirmation=None,
2171
+        on_user_question=None,
2172
+        emit_confirmation=None,
2173
+        consecutive_errors=0,
2174
+    )
2175
+
2176
+    assert any(
2177
+        "Resume by creating `06-performance-tuning.html` now." in message
2178
+        for message in queued_messages
2179
+    )
2180
+    assert not any(
2181
+        "All explicitly planned artifacts now exist." in message
2182
+        for message in queued_messages
2183
+    )
2184
+
2185
+
2186
+@pytest.mark.asyncio
2187
+async def test_tool_batch_runner_todowrite_with_missing_artifact_requeues_exact_resume_step(
2188
+    temp_dir: Path,
2189
+) -> None:
2190
+    async def assess_confidence(
2191
+        tool_name: str,
2192
+        tool_args: dict,
2193
+        context: str,
2194
+    ) -> ConfidenceAssessment:
2195
+        raise AssertionError("Confidence scoring should not run in this scenario")
2196
+
2197
+    async def verify_action(
2198
+        tool_name: str,
2199
+        tool_args: dict,
2200
+        result: str,
2201
+        expected: str = "",
2202
+    ) -> ActionVerification:
2203
+        raise AssertionError("Verification should not run in this scenario")
2204
+
2205
+    guide_root = temp_dir / "guides" / "nginx"
2206
+    chapters = guide_root / "chapters"
2207
+    guide_root.mkdir(parents=True)
2208
+    chapters.mkdir()
2209
+    index_path = guide_root / "index.html"
2210
+    index_path.write_text("<html></html>\n")
2211
+    chapter_one = chapters / "01-getting-started.html"
2212
+    chapter_two = chapters / "02-installation.html"
2213
+    chapter_one.write_text("<h1>One</h1>\n")
2214
+
2215
+    implementation_plan = temp_dir / "implementation.md"
2216
+    implementation_plan.write_text(
2217
+        "\n".join(
2218
+            [
2219
+                "# Implementation Plan",
2220
+                "",
2221
+                "## File Changes",
2222
+                f"- `{guide_root}/`",
2223
+                f"- `{chapters}/`",
2224
+                f"- `{index_path}`",
2225
+                f"- `{chapter_one}`",
2226
+                f"- `{chapter_two}`",
2227
+                "",
2228
+            ]
2229
+        )
2230
+    )
2231
+
2232
+    context = build_context(
2233
+        temp_dir=temp_dir,
2234
+        messages=[],
2235
+        safeguards=FakeSafeguards(),
2236
+        assess_confidence=assess_confidence,
2237
+        verify_action=verify_action,
2238
+        auto_recover=False,
2239
+    )
2240
+    queued_messages: list[str] = []
2241
+    context.queue_steering_message_callback = queued_messages.append
2242
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2243
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2244
+    dod.implementation_plan = str(implementation_plan)
2245
+    sync_todos_to_definition_of_done(
2246
+        dod,
2247
+        [
2248
+            {
2249
+                "content": "Create 01-getting-started.html",
2250
+                "active_form": "Creating 01-getting-started.html",
2251
+                "status": "completed",
2252
+            },
2253
+            {
2254
+                "content": "Create 02-installation.html",
2255
+                "active_form": "Creating 02-installation.html",
2256
+                "status": "pending",
2257
+            },
2258
+        ],
2259
+    )
2260
+    dod.touched_files.extend([str(index_path), str(chapter_one)])
2261
+
2262
+    tool_call = ToolCall(
2263
+        id="todo-only",
2264
+        name="TodoWrite",
2265
+        arguments={
2266
+            "todos": [
2267
+                {
2268
+                    "content": "Create 01-getting-started.html",
2269
+                    "active_form": "Creating 01-getting-started.html",
2270
+                    "status": "completed",
2271
+                },
2272
+                {
2273
+                    "content": "Create 02-installation.html",
2274
+                    "active_form": "Creating 02-installation.html",
2275
+                    "status": "pending",
2276
+                },
2277
+            ]
2278
+        },
2279
+    )
2280
+    executor = FakeExecutor(
2281
+        [
2282
+            tool_outcome(
2283
+                tool_call=tool_call,
2284
+                output="Todos updated",
2285
+                is_error=False,
2286
+                metadata={
2287
+                    "new_todos": [
2288
+                        {
2289
+                            "content": "Create 01-getting-started.html",
2290
+                            "active_form": "Creating 01-getting-started.html",
2291
+                            "status": "completed",
2292
+                        },
2293
+                        {
2294
+                            "content": "Create 02-installation.html",
2295
+                            "active_form": "Creating 02-installation.html",
2296
+                            "status": "pending",
2297
+                        },
2298
+                    ]
2299
+                },
2300
+            )
2301
+        ]
2302
+    )
2303
+
2304
+    summary = TurnSummary(final_response="")
2305
+    await runner.execute_batch(
2306
+        tool_calls=[tool_call],
2307
+        tool_source="assistant",
2308
+        pending_tool_calls_seen=set(),
2309
+        emit=_noop_emit,
2310
+        summary=summary,
2311
+        dod=dod,
2312
+        executor=executor,  # type: ignore[arg-type]
2313
+        on_confirmation=None,
2314
+        on_user_question=None,
2315
+        emit_confirmation=None,
2316
+        consecutive_errors=0,
2317
+    )
2318
+
2319
+    assert queued_messages
2320
+    message = queued_messages[-1]
2321
+    assert "Todo tracking is updated. An explicitly planned artifact is still missing." in message
2322
+    assert "Resume by creating `02-installation.html` now." in message
2323
+    assert "refresh `TodoWrite`" in message
2324
+    assert "Do not spend the next turn on TodoWrite alone" in message
2325
+
2326
+
2327
+@pytest.mark.asyncio
2328
+async def test_tool_batch_runner_todowrite_after_artifacts_exist_pushes_verification_handoff(
2329
+    temp_dir: Path,
2330
+) -> None:
2331
+    async def assess_confidence(
2332
+        tool_name: str,
2333
+        tool_args: dict,
2334
+        context: str,
2335
+    ) -> ConfidenceAssessment:
2336
+        raise AssertionError("Confidence scoring should not run in this scenario")
2337
+
2338
+    async def verify_action(
2339
+        tool_name: str,
2340
+        tool_args: dict,
2341
+        result: str,
2342
+        expected: str = "",
2343
+    ) -> ActionVerification:
2344
+        raise AssertionError("Verification should not run in this scenario")
2345
+
2346
+    guide_root = temp_dir / "guides" / "nginx"
2347
+    chapters = guide_root / "chapters"
2348
+    guide_root.mkdir(parents=True)
2349
+    chapters.mkdir()
2350
+    index_path = guide_root / "index.html"
2351
+    chapter_one = chapters / "01-getting-started.html"
2352
+    chapter_two = chapters / "02-installation.html"
2353
+    index_path.write_text("<html></html>\n")
2354
+    chapter_one.write_text("<h1>One</h1>\n")
2355
+    chapter_two.write_text("<h1>Two</h1>\n")
2356
+
2357
+    implementation_plan = temp_dir / "implementation.md"
2358
+    implementation_plan.write_text(
2359
+        "\n".join(
2360
+            [
2361
+                "# Implementation Plan",
2362
+                "",
2363
+                "## File Changes",
2364
+                f"- `{guide_root}/`",
2365
+                f"- `{chapters}/`",
2366
+                f"- `{index_path}`",
2367
+                f"- `{chapter_one}`",
2368
+                f"- `{chapter_two}`",
2369
+                "",
2370
+            ]
2371
+        )
2372
+    )
2373
+
2374
+    context = build_context(
2375
+        temp_dir=temp_dir,
2376
+        messages=[],
2377
+        safeguards=FakeSafeguards(),
2378
+        assess_confidence=assess_confidence,
2379
+        verify_action=verify_action,
2380
+        auto_recover=False,
2381
+    )
2382
+    queued_messages: list[str] = []
2383
+    context.queue_steering_message_callback = queued_messages.append
2384
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2385
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2386
+    dod.implementation_plan = str(implementation_plan)
2387
+    dod.verification_commands = [f"ls -la {guide_root}"]
2388
+    sync_todos_to_definition_of_done(
2389
+        dod,
2390
+        [
2391
+            {
2392
+                "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
2393
+                "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
2394
+                "status": "pending",
2395
+            },
2396
+            {
2397
+                "content": "Verify all guide files are linked and complete",
2398
+                "active_form": "Working on: Verify all guide files are linked and complete",
2399
+                "status": "pending",
2400
+            },
2401
+        ],
2402
+        project_root=temp_dir,
2403
+    )
2404
+
2405
+    tool_call = ToolCall(
2406
+        id="todo-only",
2407
+        name="TodoWrite",
2408
+        arguments={
2409
+            "todos": [
2410
+                {
2411
+                    "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
2412
+                    "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
2413
+                    "status": "pending",
2414
+                },
2415
+                {
2416
+                    "content": "Verify all guide files are linked and complete",
2417
+                    "active_form": "Working on: Verify all guide files are linked and complete",
2418
+                    "status": "pending",
2419
+                },
2420
+            ]
2421
+        },
2422
+    )
2423
+    executor = FakeExecutor(
2424
+        [
2425
+            tool_outcome(
2426
+                tool_call=tool_call,
2427
+                output="Todos updated",
2428
+                is_error=False,
2429
+                metadata={
2430
+                    "new_todos": [
2431
+                        {
2432
+                            "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
2433
+                            "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
2434
+                            "status": "pending",
2435
+                        },
2436
+                        {
2437
+                            "content": "Verify all guide files are linked and complete",
2438
+                            "active_form": "Working on: Verify all guide files are linked and complete",
2439
+                            "status": "pending",
2440
+                        },
2441
+                    ]
2442
+                },
2443
+            )
2444
+        ]
2445
+    )
2446
+
2447
+    summary = TurnSummary(final_response="")
2448
+    await runner.execute_batch(
2449
+        tool_calls=[tool_call],
2450
+        tool_source="assistant",
2451
+        pending_tool_calls_seen=set(),
2452
+        emit=_noop_emit,
2453
+        summary=summary,
2454
+        dod=dod,
2455
+        executor=executor,  # type: ignore[arg-type]
2456
+        on_confirmation=None,
2457
+        on_user_question=None,
2458
+        emit_confirmation=None,
2459
+        consecutive_errors=0,
2460
+    )
2461
+
2462
+    assert queued_messages
2463
+    message = queued_messages[-1]
2464
+    assert "Todo tracking is updated. All explicitly planned artifacts now exist." in message
2465
+    assert "Verify all guide files are linked and complete" in message
2466
+    assert "Move to verification once no specific mismatch remains." in message
2467
+    assert "reopen reference materials" in message
2468
+    assert "Fortran guide structure" not in message
2469
+
2470
+
2471
+@pytest.mark.asyncio
2472
+async def test_tool_batch_runner_todowrite_with_existing_output_roots_requeues_next_mutation(
2473
+    temp_dir: Path,
2474
+) -> None:
2475
+    async def assess_confidence(
2476
+        tool_name: str,
2477
+        tool_args: dict,
2478
+        context: str,
2479
+    ) -> ConfidenceAssessment:
2480
+        raise AssertionError("Confidence scoring should not run in this scenario")
2481
+
2482
+    async def verify_action(
2483
+        tool_name: str,
2484
+        tool_args: dict,
2485
+        result: str,
2486
+        expected: str = "",
2487
+    ) -> ActionVerification:
2488
+        raise AssertionError("Verification should not run in this scenario")
2489
+
2490
+    guide_root = temp_dir / "guides" / "nginx"
2491
+    chapters = guide_root / "chapters"
2492
+    guide_root.mkdir(parents=True)
2493
+    chapters.mkdir()
2494
+    index_path = guide_root / "index.html"
2495
+    index_path.write_text(
2496
+        "\n".join(
2497
+            [
2498
+                "<!DOCTYPE html>",
2499
+                "<html>",
2500
+                "<body>",
2501
+                '<a href="chapters/01-introduction.html">Introduction</a>',
2502
+                "</body>",
2503
+                "</html>",
2504
+                "",
2505
+            ]
2506
+        )
2507
+    )
2508
+
2509
+    implementation_plan = temp_dir / "implementation.md"
2510
+    implementation_plan.write_text(
2511
+        "\n".join(
2512
+            [
2513
+                "# Implementation Plan",
2514
+                "",
2515
+                "## File Changes",
2516
+                f"- `{guide_root}/`",
2517
+                f"- `{chapters}/`",
2518
+                f"- `{index_path}`",
2519
+                "",
2520
+            ]
2521
+        )
2522
+    )
2523
+
2524
+    context = build_context(
2525
+        temp_dir=temp_dir,
2526
+        messages=[],
2527
+        safeguards=FakeSafeguards(),
2528
+        assess_confidence=assess_confidence,
2529
+        verify_action=verify_action,
2530
+        auto_recover=False,
2531
+    )
2532
+    queued_messages: list[str] = []
2533
+    context.queue_steering_message_callback = queued_messages.append
2534
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2535
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2536
+    dod.implementation_plan = str(implementation_plan)
2537
+    dod.touched_files.append(str(index_path))
2538
+    sync_todos_to_definition_of_done(
2539
+        dod,
2540
+        [
2541
+            {
2542
+                "content": "Examine the existing Fortran guide structure",
2543
+                "active_form": "Examining the existing Fortran guide structure",
2544
+                "status": "completed",
2545
+            },
2546
+            {
2547
+                "content": "Create the nginx directory structure",
2548
+                "active_form": "Creating the nginx directory structure",
2549
+                "status": "completed",
2550
+            },
2551
+            {
2552
+                "content": "Write the introduction chapter",
2553
+                "active_form": "Writing the introduction chapter",
2554
+                "status": "pending",
2555
+            },
2556
+        ],
2557
+        project_root=temp_dir,
2558
+    )
2559
+
2560
+    tool_call = ToolCall(
2561
+        id="todo-next-mutation",
2562
+        name="TodoWrite",
2563
+        arguments={
2564
+            "todos": [
2565
+                {
2566
+                    "content": "Examine the existing Fortran guide structure",
2567
+                    "active_form": "Examining the existing Fortran guide structure",
2568
+                    "status": "completed",
2569
+                },
2570
+                {
2571
+                    "content": "Create the nginx directory structure",
2572
+                    "active_form": "Creating the nginx directory structure",
2573
+                    "status": "completed",
2574
+                },
2575
+                {
2576
+                    "content": "Write the introduction chapter",
2577
+                    "active_form": "Writing the introduction chapter",
2578
+                    "status": "pending",
2579
+                },
2580
+            ]
2581
+        },
2582
+    )
2583
+    executor = FakeExecutor(
2584
+        [
2585
+            tool_outcome(
2586
+                tool_call=tool_call,
2587
+                output="Todos updated",
2588
+                is_error=False,
2589
+                metadata={
2590
+                    "new_todos": [
2591
+                        {
2592
+                            "content": "Examine the existing Fortran guide structure",
2593
+                            "active_form": "Examining the existing Fortran guide structure",
2594
+                            "status": "completed",
2595
+                        },
2596
+                        {
2597
+                            "content": "Create the nginx directory structure",
2598
+                            "active_form": "Creating the nginx directory structure",
2599
+                            "status": "completed",
2600
+                        },
2601
+                        {
2602
+                            "content": "Write the introduction chapter",
2603
+                            "active_form": "Writing the introduction chapter",
2604
+                            "status": "pending",
2605
+                        },
2606
+                    ]
2607
+                },
2608
+            )
2609
+        ]
2610
+    )
2611
+
2612
+    summary = TurnSummary(final_response="")
2613
+    await runner.execute_batch(
2614
+        tool_calls=[tool_call],
2615
+        tool_source="assistant",
2616
+        pending_tool_calls_seen=set(),
2617
+        emit=_noop_emit,
2618
+        summary=summary,
2619
+        dod=dod,
2620
+        executor=executor,  # type: ignore[arg-type]
2621
+        on_confirmation=None,
2622
+        on_user_question=None,
2623
+        emit_confirmation=None,
2624
+        consecutive_errors=0,
2625
+    )
2626
+
2627
+    assert queued_messages
2628
+    message = queued_messages[-1]
2629
+    assert "Todo tracking is updated. An explicitly planned artifact is still missing." in message
2630
+    assert "Continue with the next pending item: `Write the introduction chapter`." in message
2631
+    assert "Resume by creating `01-introduction.html` now." in message
2632
+    assert "It is the next missing declared output under `chapters/`." in message
2633
+    assert "Prefer one `write` call for `" in message
2634
+    assert "01-introduction.html` instead of more rereads." in message
2635
+    assert "Do not spend the next turn on TodoWrite alone" in message
2636
+
2637
+
2638
+@pytest.mark.asyncio
2639
+async def test_tool_batch_runner_todowrite_with_declared_child_targets_names_next_missing_file(
2640
+    temp_dir: Path,
2641
+) -> None:
2642
+    async def assess_confidence(
2643
+        tool_name: str,
2644
+        tool_args: dict,
2645
+        context: str,
2646
+    ) -> ConfidenceAssessment:
2647
+        raise AssertionError("Confidence scoring should not run in this scenario")
2648
+
2649
+    async def verify_action(
2650
+        tool_name: str,
2651
+        tool_args: dict,
2652
+        result: str,
2653
+        expected: str = "",
2654
+    ) -> ActionVerification:
2655
+        raise AssertionError("Verification should not run in this scenario")
2656
+
2657
+    guide_root = temp_dir / "guides" / "nginx"
2658
+    chapters = guide_root / "chapters"
2659
+    guide_root.mkdir(parents=True)
2660
+    chapters.mkdir()
2661
+    index_path = guide_root / "index.html"
2662
+    index_path.write_text(
2663
+        "\n".join(
2664
+            [
2665
+                "<html>",
2666
+                '<a href="chapters/introduction.html">Introduction</a>',
2667
+                '<a href="chapters/installation.html">Installation</a>',
2668
+                "</html>",
2669
+            ]
2670
+        )
2671
+        + "\n"
2672
+    )
2673
+
2674
+    implementation_plan = temp_dir / "implementation.md"
2675
+    implementation_plan.write_text(
2676
+        "\n".join(
2677
+            [
2678
+                "# Implementation Plan",
2679
+                "",
2680
+                "## File Changes",
2681
+                f"- `{guide_root}/`",
2682
+                f"- `{chapters}/`",
2683
+                f"- `{index_path}`",
2684
+                "",
2685
+            ]
2686
+        )
2687
+    )
2688
+
2689
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2690
+    dod.implementation_plan = str(implementation_plan)
2691
+    dod.pending_items = [
2692
+        "Write the introduction chapter",
2693
+        "Complete the requested work",
2694
+    ]
2695
+    dod.touched_files.append(str(index_path))
2696
+
2697
+    queued_messages: list[str] = []
2698
+    context = build_context(
2699
+        temp_dir=temp_dir,
2700
+        messages=[],
2701
+        safeguards=FakeSafeguards(),
2702
+        assess_confidence=assess_confidence,
2703
+        verify_action=verify_action,
2704
+        auto_recover=False,
2705
+    )
2706
+    context.queue_steering_message_callback = queued_messages.append
2707
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2708
+
2709
+    tool_call = ToolCall(
2710
+        id="todo-1",
2711
+        name="TodoWrite",
2712
+        arguments={
2713
+            "todos": [
2714
+                {
2715
+                    "content": "Write the introduction chapter",
2716
+                    "activeForm": "Writing the introduction chapter",
2717
+                    "status": "pending",
2718
+                }
2719
+            ]
2720
+        },
2721
+    )
2722
+    executor = FakeExecutor(
2723
+        [
2724
+            tool_outcome(
2725
+                tool_call=tool_call,
2726
+                output="Todos updated",
2727
+                is_error=False,
2728
+                metadata={
2729
+                    "new_todos": [
2730
+                        {
2731
+                            "content": "Write the introduction chapter",
2732
+                            "active_form": "Writing the introduction chapter",
2733
+                            "status": "pending",
2734
+                        }
2735
+                    ]
2736
+                },
2737
+            )
2738
+        ]
2739
+    )
2740
+
2741
+    summary = TurnSummary(final_response="")
2742
+    await runner.execute_batch(
2743
+        tool_calls=[tool_call],
2744
+        tool_source="assistant",
2745
+        pending_tool_calls_seen=set(),
2746
+        emit=_noop_emit,
2747
+        summary=summary,
2748
+        dod=dod,
2749
+        executor=executor,  # type: ignore[arg-type]
2750
+        on_confirmation=None,
2751
+        on_user_question=None,
2752
+        emit_confirmation=None,
2753
+        consecutive_errors=0,
2754
+    )
2755
+
2756
+    assert queued_messages
2757
+    message = queued_messages[-1]
2758
+    assert "Todo tracking is updated. An explicitly planned artifact is still missing." in message
2759
+    assert "Continue with the next pending item: `Write the introduction chapter`." in message
2760
+    assert "Resume by creating `introduction.html` now." in message
2761
+    assert "It is the next missing declared output under `chapters/`." in message
2762
+    assert "Prefer one `write` call for `" in message
2763
+    assert "introduction.html` instead of more rereads." in message
2764
+    assert "Do not spend the next turn on TodoWrite alone" in message
2765
+
2766
+
2767
+@pytest.mark.asyncio
2768
+async def test_tool_batch_runner_bookkeeping_note_with_missing_artifact_requeues_resume_step(
2769
+    temp_dir: Path,
2770
+) -> None:
2771
+    async def assess_confidence(
2772
+        tool_name: str,
2773
+        tool_args: dict,
2774
+        context: str,
2775
+    ) -> ConfidenceAssessment:
2776
+        raise AssertionError("Confidence scoring should not run in this scenario")
2777
+
2778
+    async def verify_action(
2779
+        tool_name: str,
2780
+        tool_args: dict,
2781
+        result: str,
2782
+        expected: str = "",
2783
+    ) -> ActionVerification:
2784
+        raise AssertionError("Verification should not run in this scenario")
2785
+
2786
+    guide_root = temp_dir / "guides" / "nginx"
2787
+    chapters = guide_root / "chapters"
2788
+    guide_root.mkdir(parents=True)
2789
+    chapters.mkdir()
2790
+    index_path = guide_root / "index.html"
2791
+    chapter_one = chapters / "01-getting-started.html"
2792
+    chapter_two = chapters / "02-installation.html"
2793
+    index_path.write_text("<html></html>\n")
2794
+    chapter_one.write_text("<h1>One</h1>\n")
2795
+
2796
+    implementation_plan = temp_dir / "implementation.md"
2797
+    implementation_plan.write_text(
2798
+        "\n".join(
2799
+            [
2800
+                "# Implementation Plan",
2801
+                "",
2802
+                "## File Changes",
2803
+                f"- `{guide_root}/`",
2804
+                f"- `{chapters}/`",
2805
+                f"- `{index_path}`",
2806
+                f"- `{chapter_one}`",
2807
+                f"- `{chapter_two}`",
2808
+                "",
2809
+            ]
2810
+        )
2811
+    )
2812
+
2813
+    context = build_context(
2814
+        temp_dir=temp_dir,
2815
+        messages=[],
2816
+        safeguards=FakeSafeguards(),
2817
+        assess_confidence=assess_confidence,
2818
+        verify_action=verify_action,
2819
+        auto_recover=False,
2820
+    )
2821
+    queued_messages: list[str] = []
2822
+    context.queue_steering_message_callback = queued_messages.append
2823
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2824
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2825
+    dod.implementation_plan = str(implementation_plan)
2826
+    sync_todos_to_definition_of_done(
2827
+        dod,
2828
+        [
2829
+            {
2830
+                "content": "Create 01-getting-started.html",
2831
+                "active_form": "Creating 01-getting-started.html",
2832
+                "status": "completed",
2833
+            },
2834
+            {
2835
+                "content": "Create 02-installation.html",
2836
+                "active_form": "Creating 02-installation.html",
2837
+                "status": "pending",
2838
+            },
2839
+        ],
2840
+        project_root=temp_dir,
2841
+    )
2842
+    dod.touched_files.extend([str(index_path), str(chapter_one)])
2843
+
2844
+    tool_call = ToolCall(
2845
+        id="working-note",
2846
+        name="notepad_write_working",
2847
+        arguments={"content": "Creating the second chapter file: Installation"},
2848
+    )
2849
+    executor = FakeExecutor(
2850
+        [
2851
+            tool_outcome(
2852
+                tool_call=tool_call,
2853
+                output="Working note recorded",
2854
+                is_error=False,
2855
+            )
2856
+        ]
2857
+    )
2858
+
2859
+    summary = TurnSummary(final_response="")
2860
+    await runner.execute_batch(
2861
+        tool_calls=[tool_call],
2862
+        tool_source="assistant",
2863
+        pending_tool_calls_seen=set(),
2864
+        emit=_noop_emit,
2865
+        summary=summary,
2866
+        dod=dod,
2867
+        executor=executor,  # type: ignore[arg-type]
2868
+        on_confirmation=None,
2869
+        on_user_question=None,
2870
+        emit_confirmation=None,
2871
+        consecutive_errors=0,
2872
+    )
2873
+
2874
+    assert queued_messages
2875
+    message = queued_messages[-1]
2876
+    assert "Bookkeeping note is recorded. An explicitly planned artifact is still missing." in message
2877
+    assert "Resume by creating `02-installation.html` now." in message
2878
+    assert "Make your next response the concrete mutation tool call itself" in message
2879
+    assert "refresh `TodoWrite`" in message
2880
+    assert "Do not spend the next turn on additional notes, rediscovery, verification, or final confirmation" in message
2881
+
2882
+
2883
+@pytest.mark.asyncio
2884
+async def test_tool_batch_runner_working_note_respects_discovery_first_pending_step(
2885
+    temp_dir: Path,
2886
+) -> None:
2887
+    async def assess_confidence(
2888
+        tool_name: str,
2889
+        tool_args: dict,
2890
+        context: str,
2891
+    ) -> ConfidenceAssessment:
2892
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
2893
+
2894
+    async def verify_action(
2895
+        tool_name: str,
2896
+        tool_args: dict,
2897
+        result: str,
2898
+        expected: str = "",
2899
+    ) -> ActionVerification:
2900
+        raise AssertionError("Verification should not run in this scenario")
2901
+
2902
+    implementation_plan = temp_dir / "implementation.md"
2903
+    implementation_plan.write_text(
2904
+        "\n".join(
2905
+            [
2906
+                "# Implementation Plan",
2907
+                "",
2908
+                "## File Changes",
2909
+                f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
2910
+                f"- `{temp_dir / 'guides' / 'nginx' / 'chapters'}`",
2911
+                "",
2912
+            ]
2913
+        )
2914
+    )
2915
+
2916
+    context = build_context(
2917
+        temp_dir=temp_dir,
2918
+        messages=[],
2919
+        safeguards=FakeSafeguards(),
2920
+        assess_confidence=assess_confidence,
2921
+        verify_action=verify_action,
2922
+        auto_recover=False,
2923
+    )
2924
+    queued_messages: list[str] = []
2925
+    context.queue_steering_message_callback = queued_messages.append
2926
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2927
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2928
+    dod.implementation_plan = str(implementation_plan)
2929
+    dod.pending_items.extend(
2930
+        [
2931
+            "First, examine the existing fortran guide structure and content to understand the format",
2932
+            "Create the nginx directory structure",
2933
+            "Develop the main index.html file for the nginx guide",
2934
+        ]
2935
+    )
2936
+
2937
+    tool_call = ToolCall(
2938
+        id="working-note",
2939
+        name="notepad_write_working",
2940
+        arguments={"content": "Analyzing the fortran guide structure before creating nginx guide"},
2941
+    )
2942
+    executor = FakeExecutor(
2943
+        [
2944
+            tool_outcome(
2945
+                tool_call=tool_call,
2946
+                output="Working note recorded",
2947
+                is_error=False,
2948
+            )
2949
+        ]
2950
+    )
2951
+
2952
+    summary = TurnSummary(final_response="")
2953
+    await runner.execute_batch(
2954
+        tool_calls=[tool_call],
2955
+        tool_source="assistant",
2956
+        pending_tool_calls_seen=set(),
2957
+        emit=_noop_emit,
2958
+        summary=summary,
2959
+        dod=dod,
2960
+        executor=executor,  # type: ignore[arg-type]
2961
+        on_confirmation=None,
2962
+        on_user_question=None,
2963
+        emit_confirmation=None,
2964
+        consecutive_errors=0,
2965
+    )
2966
+
2967
+    assert queued_messages
2968
+    message = queued_messages[-1]
2969
+    assert (
2970
+        "Continue with the next pending item: `First, examine the existing fortran guide structure and content to understand the format`."
2971
+        in message
2972
+    )
2973
+    assert "one concrete evidence-gathering tool call" in message
2974
+    assert "Resume by creating `index.html` now." not in message
2975
+
2976
+
2977
+@pytest.mark.asyncio
2978
+async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_valid(
2979
+    temp_dir: Path,
2980
+) -> None:
2981
+    async def assess_confidence(
2982
+        tool_name: str,
2983
+        tool_args: dict,
2984
+        context: str,
2985
+    ) -> ConfidenceAssessment:
2986
+        raise AssertionError("Confidence scoring should not run in this scenario")
2987
+
2988
+    async def verify_action(
2989
+        tool_name: str,
2990
+        tool_args: dict,
2991
+        result: str,
2992
+        expected: str = "",
2993
+    ) -> ActionVerification:
2994
+        raise AssertionError("Verification should not run in this scenario")
2995
+
2996
+    prompt = (
2997
+        "Have a look at ~/Loader/guides/fortran/index.html, then "
2998
+        "~/Loader/guides/fortran/chapters. The table of contents links in "
2999
+        "index.html are inaccurate and the href’s are wrong. Let’s update the "
3000
+        "links and their link texts to be correct."
3001
+    )
3002
+    chapters = temp_dir / "chapters"
3003
+    chapters.mkdir()
3004
+    (chapters / "01-introduction.html").write_text(
3005
+        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
3006
+    )
3007
+    (chapters / "02-setup.html").write_text(
3008
+        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
3009
+    )
3010
+    current_block = (
3011
+        "<h2>Table of Contents</h2>\n"
3012
+        '        <ul class="chapter-list">\n'
3013
+        '            <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
3014
+        '            <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
3015
+        "        </ul>\n"
3016
+    )
3017
+    index_path = temp_dir / "index.html"
3018
+    index_path.write_text(current_block)
3019
+
3020
+    context = build_context(
3021
+        temp_dir=temp_dir,
3022
+        messages=[],
3023
+        safeguards=FakeSafeguards(),
3024
+        assess_confidence=assess_confidence,
3025
+        verify_action=verify_action,
3026
+        auto_recover=False,
3027
+    )
3028
+    context.session.current_task = prompt  # type: ignore[attr-defined]
3029
+    queued_messages: list[str] = []
3030
+    context.queue_steering_message_callback = queued_messages.append
3031
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
3032
+    tool_call = ToolCall(
3033
+        id="edit-1",
3034
+        name="edit",
3035
+        arguments={
3036
+            "file_path": str(index_path),
3037
+            "old_string": current_block,
3038
+            "new_string": current_block,
3039
+        },
3040
+    )
3041
+    executor = FakeExecutor(
3042
+        [
3043
+            tool_outcome(
3044
+                tool_call=tool_call,
3045
+                output=(
3046
+                    "[Blocked - old_string and new_string are identical - no change "
3047
+                    "would occur] Suggestion: Provide different old and new strings"
3048
+                ),
3049
+                is_error=True,
3050
+                state=ToolExecutionState.BLOCKED,
3051
+            )
3052
+        ]
3053
+    )
3054
+
3055
+    await runner.execute_batch(
3056
+        tool_calls=[tool_call],
3057
+        tool_source="assistant",
3058
+        pending_tool_calls_seen=set(),
3059
+        emit=_noop_emit,
3060
+        summary=TurnSummary(final_response=""),
3061
+        dod=create_definition_of_done(prompt),
3062
+        executor=executor,  # type: ignore[arg-type]
3063
+        on_confirmation=None,
3064
+        on_user_question=None,
3065
+        emit_confirmation=None,
3066
+        consecutive_errors=0,
3067
+    )
3068
+
3069
+    assert queued_messages == []
3070
+
3071
+
3072
+async def _noop_emit(event: AgentEvent) -> None:
3073
+    return None
3074
+
3075
+
3076
+@pytest.mark.asyncio
3077
+async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
3078
+    temp_dir: Path,
3079
+) -> None:
3080
+    async def assess_confidence(
3081
+        tool_name: str,
3082
+        tool_args: dict,
3083
+        context: str,
3084
+    ) -> ConfidenceAssessment:
3085
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
3086
+
3087
+    async def verify_action(
3088
+        tool_name: str,
3089
+        tool_args: dict,
3090
+        result: str,
3091
+        expected: str = "",
3092
+    ) -> ActionVerification:
3093
+        raise AssertionError("Verification should not run for this scenario")
3094
+
3095
+    context = build_context(
3096
+        temp_dir=temp_dir,
3097
+        messages=[],
3098
+        safeguards=FakeSafeguards(),
3099
+        assess_confidence=assess_confidence,
3100
+        verify_action=verify_action,
3101
+    )
3102
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
3103
+    tool_call = ToolCall(
3104
+        id="write-1",
3105
+        name="write",
3106
+        arguments={"file_path": str(temp_dir / "README.md"), "content": "updated\n"},
3107
+    )
3108
+    executor = FakeExecutor(
3109
+        [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
3110
+    )
3111
+    summary = TurnSummary(final_response="")
3112
+    dod = create_definition_of_done("Update README and verify it still works.")
3113
+    events: list[AgentEvent] = []
3114
+
3115
+    async def emit(event: AgentEvent) -> None:
3116
+        events.append(event)
3117
+
3118
+    await runner.execute_batch(
3119
+        tool_calls=[tool_call],
3120
+        tool_source="assistant",
3121
+        pending_tool_calls_seen=set(),
3122
+        emit=emit,
3123
+        summary=summary,
3124
+        dod=dod,
3125
+        executor=executor,  # type: ignore[arg-type]
3126
+        on_confirmation=None,
3127
+        on_user_question=None,
3128
+        emit_confirmation=None,
3129
+        consecutive_errors=0,
3130
+    )
3131
+
3132
+    assert dod.last_verification_result == "planned"
3133
+    assert dod.verification_commands
3134
+    assert "Collect verification evidence" in dod.pending_items
3135
+    assert dod.active_verification_attempt_id == "verification-attempt-1"
14223136
     assert dod.active_verification_attempt_number == 1
14233137
     assert summary.workflow_timeline[-1].reason_code == "verification_planned"
14243138
     assert summary.workflow_timeline[-1].policy_outcome == "planned"
@@ -1526,3 +3240,238 @@ async def test_tool_batch_runner_marks_passed_verification_stale_after_new_mutat
15263240
         summary.workflow_timeline[-1].verification_observations[0].command
15273241
         == "uv run pytest -q"
15283242
     )
3243
+
3244
+
3245
+def test_tool_batch_runner_blocked_active_repair_nudge_uses_repair_scope(temp_dir: Path) -> None:
3246
+    async def assess_confidence(
3247
+        tool_name: str,
3248
+        tool_args: dict,
3249
+        context: str,
3250
+    ) -> ConfidenceAssessment:
3251
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
3252
+
3253
+    async def verify_action(
3254
+        tool_name: str,
3255
+        tool_args: dict,
3256
+        result: str,
3257
+        expected: str = "",
3258
+    ) -> ActionVerification:
3259
+        raise AssertionError("Verification should not run in this scenario")
3260
+
3261
+    repair_target = temp_dir / "guide" / "index.html"
3262
+    context = build_context(
3263
+        temp_dir=temp_dir,
3264
+        messages=[
3265
+            Message(
3266
+                role=Role.ASSISTANT,
3267
+                content=(
3268
+                    "Repair focus:\n"
3269
+                    f"- Fix the broken local reference `chapters/01-getting-started.html` in `{repair_target}`.\n"
3270
+                    f"- Immediate next step: edit `{repair_target}`.\n"
3271
+                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`; otherwise remove or replace `chapters/01-getting-started.html`.\n"
3272
+                ),
3273
+            )
3274
+        ],
3275
+        safeguards=FakeSafeguards(),
3276
+        assess_confidence=assess_confidence,
3277
+        verify_action=verify_action,
3278
+    )
3279
+    queued: list[str] = []
3280
+    context.queue_steering_message_callback = queued.append
3281
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
3282
+
3283
+    runner._queue_blocked_active_repair_nudge(
3284
+        "[Blocked - active repair scope: verification already identified the repair target.]"
3285
+    )
3286
+
3287
+    assert queued
3288
+    assert str(repair_target) in queued[0]
3289
+    assert str(temp_dir / "guide" / "chapters" / "01-getting-started.html") in queued[0]
3290
+    assert "Do not reopen unrelated reference materials" in queued[0]
3291
+
3292
+
3293
+def test_tool_batch_runner_blocked_active_repair_mutation_nudge_uses_allowed_paths(
3294
+    temp_dir: Path,
3295
+) -> None:
3296
+    async def assess_confidence(
3297
+        tool_name: str,
3298
+        tool_args: dict,
3299
+        context: str,
3300
+    ) -> ConfidenceAssessment:
3301
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
3302
+
3303
+    async def verify_action(
3304
+        tool_name: str,
3305
+        tool_args: dict,
3306
+        result: str,
3307
+        expected: str = "",
3308
+    ) -> ActionVerification:
3309
+        raise AssertionError("Verification should not run in this scenario")
3310
+
3311
+    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
3312
+    stylesheet = temp_dir / "guide" / "styles.css"
3313
+    context = build_context(
3314
+        temp_dir=temp_dir,
3315
+        messages=[
3316
+            Message(
3317
+                role=Role.ASSISTANT,
3318
+                content=(
3319
+                    "Repair focus:\n"
3320
+                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
3321
+                    f"- Immediate next step: edit `{repair_target}`.\n"
3322
+                    f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
3323
+                ),
3324
+            )
3325
+        ],
3326
+        safeguards=FakeSafeguards(),
3327
+        assess_confidence=assess_confidence,
3328
+        verify_action=verify_action,
3329
+    )
3330
+    queued: list[str] = []
3331
+    context.queue_steering_message_callback = queued.append
3332
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
3333
+
3334
+    runner._queue_blocked_active_repair_mutation_nudge(
3335
+        "[Blocked - active repair mutation scope: verification already identified the repair target.]"
3336
+    )
3337
+
3338
+    assert queued
3339
+    assert str(repair_target) in queued[0]
3340
+    assert str(stylesheet) in queued[0]
3341
+    assert "before widening the change set" in queued[0]
3342
+
3343
+
3344
+def test_tool_batch_runner_blocked_late_reference_drift_nudge_points_to_missing_artifact(
3345
+    temp_dir: Path,
3346
+) -> None:
3347
+    async def assess_confidence(
3348
+        tool_name: str,
3349
+        tool_args: dict,
3350
+        context: str,
3351
+    ) -> ConfidenceAssessment:
3352
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
3353
+
3354
+    async def verify_action(
3355
+        tool_name: str,
3356
+        tool_args: dict,
3357
+        result: str,
3358
+        expected: str = "",
3359
+    ) -> ActionVerification:
3360
+        raise AssertionError("Verification should not run in this scenario")
3361
+
3362
+    context = build_context(
3363
+        temp_dir=temp_dir,
3364
+        messages=[],
3365
+        safeguards=FakeSafeguards(),
3366
+        assess_confidence=assess_confidence,
3367
+        verify_action=verify_action,
3368
+    )
3369
+    queued: list[str] = []
3370
+    context.queue_steering_message_callback = queued.append
3371
+    store = DefinitionOfDoneStore(temp_dir)
3372
+    dod = create_definition_of_done("Create a multi-file guide from a reference")
3373
+    plan_path = temp_dir / "implementation.md"
3374
+    plan_path.write_text(
3375
+        "# File Changes\n"
3376
+        "- `guide/index.html`\n"
3377
+        "- `guide/chapters/01-getting-started.html`\n"
3378
+        "- `guide/chapters/02-installation.html`\n"
3379
+        "- `guide/chapters/03-first-website.html`\n"
3380
+    )
3381
+    dod.implementation_plan = str(plan_path)
3382
+    (temp_dir / "guide" / "chapters").mkdir(parents=True, exist_ok=True)
3383
+    (temp_dir / "guide" / "index.html").write_text("index")
3384
+    (temp_dir / "guide" / "chapters" / "01-getting-started.html").write_text("one")
3385
+    (temp_dir / "guide" / "chapters" / "02-installation.html").write_text("two")
3386
+    runner = ToolBatchRunner(context, store)
3387
+
3388
+    runner._queue_blocked_late_reference_drift_nudge(
3389
+        "[Blocked - late reference drift: several planned artifacts already exist.]",
3390
+        dod=dod,
3391
+    )
3392
+
3393
+    assert queued
3394
+    assert "03-first-website.html" in queued[0]
3395
+    assert "older reference materials" in queued[0]
3396
+
3397
+
3398
+def test_tool_batch_runner_blocked_completed_artifact_scope_nudge_prefers_verification(
3399
+    temp_dir: Path,
3400
+) -> None:
3401
+    async def assess_confidence(
3402
+        tool_name: str,
3403
+        tool_args: dict,
3404
+        context: str,
3405
+    ) -> ConfidenceAssessment:
3406
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
3407
+
3408
+    async def verify_action(
3409
+        tool_name: str,
3410
+        tool_args: dict,
3411
+        result: str,
3412
+        expected: str = "",
3413
+    ) -> ActionVerification:
3414
+        raise AssertionError("Verification should not run in this scenario")
3415
+
3416
+    guide_root = temp_dir / "guide"
3417
+    chapters = guide_root / "chapters"
3418
+    guide_root.mkdir(parents=True)
3419
+    chapters.mkdir()
3420
+    index_path = guide_root / "index.html"
3421
+    chapter_one = chapters / "01-getting-started.html"
3422
+    chapter_two = chapters / "02-installation.html"
3423
+    index_path.write_text("index")
3424
+    chapter_one.write_text("one")
3425
+    chapter_two.write_text("two")
3426
+
3427
+    implementation_plan = temp_dir / "implementation.md"
3428
+    implementation_plan.write_text(
3429
+        "\n".join(
3430
+            [
3431
+                "# Implementation Plan",
3432
+                "",
3433
+                "## File Changes",
3434
+                f"- `{guide_root}`",
3435
+                f"- `{chapters}`",
3436
+                f"- `{index_path}`",
3437
+                f"- `{chapter_one}`",
3438
+                f"- `{chapter_two}`",
3439
+                "",
3440
+            ]
3441
+        )
3442
+    )
3443
+
3444
+    context = build_context(
3445
+        temp_dir=temp_dir,
3446
+        messages=[],
3447
+        safeguards=FakeSafeguards(),
3448
+        assess_confidence=assess_confidence,
3449
+        verify_action=verify_action,
3450
+    )
3451
+    queued: list[str] = []
3452
+    context.queue_steering_message_callback = queued.append
3453
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
3454
+    dod = create_definition_of_done("Create a multi-file guide from a reference")
3455
+    dod.implementation_plan = str(implementation_plan)
3456
+    dod.verification_commands = [f"ls -la {guide_root}"]
3457
+    sync_todos_to_definition_of_done(
3458
+        dod,
3459
+        [
3460
+            {
3461
+                "content": "Verify all guide files are linked and complete",
3462
+                "active_form": "Working on: Verify all guide files are linked and complete",
3463
+                "status": "pending",
3464
+            }
3465
+        ],
3466
+        project_root=temp_dir,
3467
+    )
3468
+
3469
+    runner._queue_blocked_completed_artifact_scope_nudge(
3470
+        "[Blocked - completed artifact set scope: all explicitly planned artifacts already exist.]",
3471
+        dod=dod,
3472
+    )
3473
+
3474
+    assert queued
3475
+    assert "All explicitly planned artifacts already exist." in queued[0]
3476
+    assert "Verify all guide files are linked and complete" in queued[0]
3477
+    assert "Do not reopen earlier reference materials." in queued[0]
tests/test_turn_completion.pymodified
@@ -186,6 +186,103 @@ async def test_turn_completion_marks_non_mutating_response_done(
186186
     )
187187
 
188188
 
189
+@pytest.mark.asyncio
190
+async def test_turn_completion_blocks_false_completion_without_preserving_it(
191
+    temp_dir: Path,
192
+) -> None:
193
+    backend = ScriptedBackend()
194
+    agent = Agent(
195
+        backend=backend,
196
+        config=non_streaming_config(),
197
+        project_root=temp_dir,
198
+    )
199
+    runtime = ConversationRuntime(agent)
200
+    events = []
201
+
202
+    async def capture(event) -> None:
203
+        events.append(event)
204
+
205
+    prepared = await runtime.turn_preparation.prepare(
206
+        task=(
207
+            "Create a multi-file nginx guide under ~/Loader/guides/nginx "
208
+            "with an index and chapter files."
209
+        ),
210
+        emit=capture,
211
+        requested_mode="execute",
212
+        original_task=None,
213
+        on_user_question=None,
214
+    )
215
+    await runtime.phase_tracker.enter(
216
+        TurnPhase.ASSISTANT,
217
+        capture,
218
+        detail="Requesting assistant response",
219
+        reason_code="request_assistant_response",
220
+    )
221
+
222
+    implementation_plan = temp_dir / "implementation.md"
223
+    implementation_plan.write_text(
224
+        "# Implementation Plan\n\n"
225
+        "## File Changes\n\n"
226
+        "1. Create main index.html file:\n"
227
+        "   - `index.html`\n\n"
228
+        "2. Create chapter files:\n"
229
+        "   - `chapters/01-getting-started.html`\n"
230
+        "   - `chapters/06-troubleshooting.html`\n"
231
+    )
232
+    chapters_dir = temp_dir / "chapters"
233
+    chapters_dir.mkdir()
234
+    (chapters_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
235
+    (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
236
+
237
+    prepared.definition_of_done.implementation_plan = str(implementation_plan)
238
+    prepared.definition_of_done.mutating_actions.append("write")
239
+    prepared.definition_of_done.touched_files.extend(
240
+        [
241
+            str(temp_dir / "index.html"),
242
+            str(chapters_dir / "01-getting-started.html"),
243
+        ]
244
+    )
245
+
246
+    queued_messages: list[str] = []
247
+    runtime.context.queue_steering_message_callback = queued_messages.append
248
+
249
+    completion_claim = (
250
+        "I've successfully completed the NGINX guide with all planned files "
251
+        "and verified everything is done."
252
+    )
253
+    decision = await runtime.turn_completion.handle_text_response(
254
+        content=completion_claim,
255
+        response_content=completion_claim,
256
+        task=prepared.task,
257
+        effective_task=prepared.effective_task,
258
+        iterations=1,
259
+        max_iterations=agent.config.max_iterations,
260
+        actions_taken=[],
261
+        continuation_count=0,
262
+        dod=prepared.definition_of_done,
263
+        emit=capture,
264
+        summary=prepared.summary,
265
+        executor=prepared.executor,
266
+        rollback_plan=prepared.rollback_plan,
267
+    )
268
+
269
+    assert decision.action == TurnCompletionAction.CONTINUE
270
+    assert prepared.summary.assistant_messages == []
271
+    assert not any(
272
+        message.role.value == "assistant" and message.content == completion_claim
273
+        for message in agent.session.messages
274
+    )
275
+    assert agent.session.messages[-1].role.value == "user"
276
+    assert agent.session.messages[-1].content.startswith(
277
+        "[PLANNED ARTIFACTS STILL MISSING]"
278
+    )
279
+    assert "`06-troubleshooting.html`" in agent.session.messages[-1].content
280
+    assert queued_messages
281
+    assert "06-troubleshooting.html" in queued_messages[-1]
282
+    assert "Do not summarize, mark completion, or write bookkeeping notes yet" in queued_messages[-1]
283
+    assert not any(event.type == "response" for event in events)
284
+
285
+
189286
 @pytest.mark.asyncio
190287
 async def test_turn_completion_handles_fake_tool_narration_without_reroute(
191288
     temp_dir: Path,
tests/test_workflow.pymodified
@@ -15,10 +15,12 @@ from loader.runtime.workflow import (
1515
     WorkflowMode,
1616
     advance_todos_from_tool_call,
1717
     build_execute_bridge,
18
+    effective_pending_todo_items,
1819
     enrich_clarify_brief_with_grounding,
1920
     extract_verification_commands_from_markdown,
2021
     merge_refreshed_todos_with_existing_scope,
2122
     preserve_task_grounded_acceptance_criteria,
23
+    reconcile_aggregate_completion_steps,
2224
     sync_todos_to_definition_of_done,
2325
 )
2426
 
@@ -454,6 +456,167 @@ def test_merge_refreshed_todos_with_existing_scope_filters_retro_refresh_noise()
454456
     assert "04-configuring.html" not in labels
455457
 
456458
 
459
+def test_merge_refreshed_todos_with_existing_scope_drops_unplanned_filename_expansion() -> None:
460
+    task = (
461
+        "Create an equally thorough nginx guide with index.html plus chapter files "
462
+        "covering getting started, installation, configuration, usage, and troubleshooting."
463
+    )
464
+
465
+    todos = merge_refreshed_todos_with_existing_scope(
466
+        task,
467
+        existing_pending_items=[
468
+            "Create chapter files with appropriate content structure",
469
+        ],
470
+        existing_completed_items=[
471
+            "Create the nginx guide directory structure",
472
+            "Create introduction.html",
473
+        ],
474
+        refreshed_steps=[
475
+            "Create optimization.html",
476
+            "Create security.html",
477
+            "Ensure consistent chapter navigation",
478
+        ],
479
+        planned_files={
480
+            "index.html",
481
+            "introduction.html",
482
+            "installation.html",
483
+            "configuration.html",
484
+            "usage.html",
485
+            "troubleshooting.html",
486
+        },
487
+    )
488
+
489
+    labels = {item["content"]: item["status"] for item in todos}
490
+    assert "Create chapter files with appropriate content structure" in labels
491
+    assert "Ensure consistent chapter navigation" in labels
492
+    assert "Create optimization.html" not in labels
493
+    assert "Create security.html" not in labels
494
+
495
+
496
+def test_planning_artifacts_with_file_changes_replaces_file_change_section() -> None:
497
+    artifacts = PlanningArtifacts(
498
+        implementation_markdown="\n".join(
499
+            [
500
+                "# Implementation Plan",
501
+                "",
502
+                "## File Changes",
503
+                "- `old.txt`",
504
+                "",
505
+                "## Execution Order",
506
+                "- Do the work",
507
+                "",
508
+            ]
509
+        )
510
+        + "\n",
511
+        verification_markdown="# Verification Plan\n",
512
+        verification_commands=[],
513
+        acceptance_criteria=["task"],
514
+        implementation_steps=["Do the work"],
515
+    )
516
+
517
+    updated = artifacts.with_file_changes(
518
+        ["`guides/nginx/index.html`", "`guides/nginx/chapters/`"]
519
+    )
520
+
521
+    assert "`old.txt`" not in updated.implementation_markdown
522
+    assert "`guides/nginx/index.html`" in updated.implementation_markdown
523
+    assert "`guides/nginx/chapters/`" in updated.implementation_markdown
524
+
525
+
526
+def test_effective_pending_todo_items_filters_stale_discovery_after_artifacts_exist(
527
+    temp_dir: Path,
528
+) -> None:
529
+    guide_root = temp_dir / "guides" / "nginx"
530
+    chapters = guide_root / "chapters"
531
+    guide_root.mkdir(parents=True)
532
+    chapters.mkdir()
533
+    index_path = guide_root / "index.html"
534
+    chapter_one = chapters / "01-getting-started.html"
535
+    chapter_two = chapters / "02-installation.html"
536
+    index_path.write_text("<html></html>\n")
537
+    chapter_one.write_text("<h1>One</h1>\n")
538
+    chapter_two.write_text("<h1>Two</h1>\n")
539
+
540
+    implementation_plan = temp_dir / "implementation.md"
541
+    implementation_plan.write_text(
542
+        "\n".join(
543
+            [
544
+                "# Implementation Plan",
545
+                "",
546
+                "## File Changes",
547
+                f"- `{guide_root}/`",
548
+                f"- `{chapters}/`",
549
+                f"- `{index_path}`",
550
+                f"- `{chapter_one}`",
551
+                f"- `{chapter_two}`",
552
+                "",
553
+            ]
554
+        )
555
+    )
556
+
557
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
558
+    dod.implementation_plan = str(implementation_plan)
559
+    dod.pending_items = [
560
+        "First, examine the existing Fortran guide structure to understand the format and content organization",
561
+        "Verify all guide files are linked and complete",
562
+        "Complete the requested work",
563
+    ]
564
+
565
+    pending = effective_pending_todo_items(dod, project_root=temp_dir)
566
+
567
+    assert "Verify all guide files are linked and complete" in pending
568
+    assert "Complete the requested work" in pending
569
+    assert not any("Fortran guide structure" in item for item in pending)
570
+
571
+
572
+def test_effective_pending_todo_items_filters_stale_creation_steps_after_artifacts_exist(
573
+    temp_dir: Path,
574
+) -> None:
575
+    guide_root = temp_dir / "guides" / "nginx"
576
+    chapters = guide_root / "chapters"
577
+    guide_root.mkdir(parents=True)
578
+    chapters.mkdir()
579
+    index_path = guide_root / "index.html"
580
+    chapter_one = chapters / "01-getting-started.html"
581
+    chapter_two = chapters / "02-installation.html"
582
+    index_path.write_text("<html></html>\n")
583
+    chapter_one.write_text("<h1>One</h1>\n")
584
+    chapter_two.write_text("<h1>Two</h1>\n")
585
+
586
+    implementation_plan = temp_dir / "implementation.md"
587
+    implementation_plan.write_text(
588
+        "\n".join(
589
+            [
590
+                "# Implementation Plan",
591
+                "",
592
+                "## File Changes",
593
+                f"- `{guide_root}/`",
594
+                f"- `{chapters}/`",
595
+                f"- `{index_path}`",
596
+                f"- `{chapter_one}`",
597
+                f"- `{chapter_two}`",
598
+                "",
599
+            ]
600
+        )
601
+    )
602
+
603
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
604
+    dod.implementation_plan = str(implementation_plan)
605
+    dod.pending_items = [
606
+        "Create 01-getting-started.html",
607
+        "Creating 02-installation.html",
608
+        "Verify all guide files are linked and complete",
609
+        "Complete the requested work",
610
+    ]
611
+
612
+    pending = effective_pending_todo_items(dod, project_root=temp_dir)
613
+
614
+    assert "Verify all guide files are linked and complete" in pending
615
+    assert "Complete the requested work" in pending
616
+    assert "Create 01-getting-started.html" not in pending
617
+    assert "Creating 02-installation.html" not in pending
618
+
619
+
457620
 def test_workflow_artifact_store_and_bridge_round_trip(tmp_path: Path) -> None:
458621
     store = WorkflowArtifactStore(tmp_path)
459622
     brief = ClarifyBrief.fallback(
@@ -523,6 +686,58 @@ def test_sync_todos_to_definition_of_done_preserves_runtime_items() -> None:
523686
     assert "Update tests" in dod.completed_items
524687
 
525688
 
689
+def test_sync_todos_to_definition_of_done_keeps_completed_items_monotonic() -> None:
690
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
691
+    sync_todos_to_definition_of_done(
692
+        dod,
693
+        [
694
+            {
695
+                "content": "Create 03-first-website.html",
696
+                "active_form": "Creating 03-first-website.html",
697
+                "status": "pending",
698
+            },
699
+            {
700
+                "content": "Create 04-configuration-basics.html",
701
+                "active_form": "Creating 04-configuration-basics.html",
702
+                "status": "pending",
703
+            },
704
+        ],
705
+    )
706
+
707
+    assert advance_todos_from_tool_call(
708
+        dod,
709
+        ToolCall(
710
+            id="write-third-chapter",
711
+            name="write",
712
+            arguments={
713
+                "file_path": "/tmp/nginx/chapters/03-first-website.html",
714
+                "content": "<html></html>",
715
+            },
716
+        ),
717
+    )
718
+    assert "Create 03-first-website.html" in dod.completed_items
719
+
720
+    sync_todos_to_definition_of_done(
721
+        dod,
722
+        [
723
+            {
724
+                "content": "Create 03-first-website.html",
725
+                "active_form": "Creating 03-first-website.html",
726
+                "status": "pending",
727
+            },
728
+            {
729
+                "content": "Create 04-configuration-basics.html",
730
+                "active_form": "Creating 04-configuration-basics.html",
731
+                "status": "pending",
732
+            },
733
+        ],
734
+    )
735
+
736
+    assert "Create 03-first-website.html" in dod.completed_items
737
+    assert "Create 03-first-website.html" not in dod.pending_items
738
+    assert "Create 04-configuration-basics.html" in dod.pending_items
739
+
740
+
526741
 def test_advance_todos_from_tool_call_tracks_plan_progress() -> None:
527742
     dod = create_definition_of_done("Fix the chapter links in index.html.")
528743
     sync_todos_to_definition_of_done(
@@ -651,6 +866,41 @@ def test_advance_todos_from_tool_call_keeps_aggregate_mutation_steps_pending() -
651866
     )
652867
 
653868
 
869
+def test_advance_todos_from_tool_call_keeps_plural_chapter_creation_step_pending() -> None:
870
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
871
+    sync_todos_to_definition_of_done(
872
+        dod,
873
+        [
874
+            {
875
+                "content": "Create chapter files following the established pattern",
876
+                "active_form": "Working on: Create chapter files following the established pattern",
877
+                "status": "pending",
878
+            },
879
+            {
880
+                "content": "Ensure consistency with existing guide formatting and content style",
881
+                "active_form": "Working on: Ensure consistency with existing guide formatting and content style",
882
+                "status": "pending",
883
+            },
884
+        ],
885
+    )
886
+
887
+    assert (
888
+        advance_todos_from_tool_call(
889
+            dod,
890
+            ToolCall(
891
+                id="write-one-chapter",
892
+                name="write",
893
+                arguments={
894
+                    "file_path": "/tmp/nginx/chapters/01-overview.html",
895
+                    "content": "<html></html>",
896
+                },
897
+            ),
898
+        )
899
+        is False
900
+    )
901
+    assert "Create chapter files following the established pattern" in dod.pending_items
902
+
903
+
654904
 def test_advance_todos_from_tool_call_tracks_bash_directory_creation_progress() -> None:
655905
     dod = create_definition_of_done("Create a multi-file nginx guide.")
656906
     sync_todos_to_definition_of_done(
@@ -679,3 +929,283 @@ def test_advance_todos_from_tool_call_tracks_bash_directory_creation_progress()
679929
     )
680930
     assert "Create the nginx directory structure" in dod.completed_items
681931
     assert "Create index.html for nginx guide" in dod.pending_items
932
+
933
+
934
+def test_advance_todos_from_tool_call_does_not_complete_linking_step_from_glob() -> None:
935
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
936
+    sync_todos_to_definition_of_done(
937
+        dod,
938
+        [
939
+            {
940
+                "content": "Link all chapters together properly in the index file",
941
+                "active_form": "Working on: Link all chapters together properly in the index file",
942
+                "status": "pending",
943
+            },
944
+        ],
945
+    )
946
+
947
+    assert (
948
+        advance_todos_from_tool_call(
949
+            dod,
950
+            ToolCall(
951
+                id="glob-reference-chapters",
952
+                name="glob",
953
+                arguments={"path": "~/Loader", "pattern": "**/fortran/chapters/*"},
954
+            ),
955
+        )
956
+        is False
957
+    )
958
+    assert "Link all chapters together properly in the index file" in dod.pending_items
959
+
960
+
961
+def test_sync_todos_to_definition_of_done_keeps_linking_step_pending_while_artifacts_missing(
962
+    temp_dir: Path,
963
+) -> None:
964
+    guide_root = temp_dir / "guides" / "nginx"
965
+    chapters = guide_root / "chapters"
966
+    guide_root.mkdir(parents=True)
967
+    chapters.mkdir()
968
+    index_path = guide_root / "index.html"
969
+    chapter_one = chapters / "01-getting-started.html"
970
+    chapter_two = chapters / "02-installation.html"
971
+    index_path.write_text("<html></html>\n")
972
+    chapter_one.write_text("<h1>One</h1>\n")
973
+
974
+    implementation_plan = temp_dir / "implementation.md"
975
+    implementation_plan.write_text(
976
+        "\n".join(
977
+            [
978
+                "# Implementation Plan",
979
+                "",
980
+                "## File Changes",
981
+                f"- `{guide_root}/`",
982
+                f"- `{chapters}/`",
983
+                f"- `{index_path}`",
984
+                f"- `{chapter_one}`",
985
+                f"- `{chapter_two}`",
986
+                "",
987
+            ]
988
+        )
989
+    )
990
+
991
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
992
+    dod.implementation_plan = str(implementation_plan)
993
+    sync_todos_to_definition_of_done(
994
+        dod,
995
+        [
996
+            {
997
+                "content": "Create 01-getting-started.html chapter file",
998
+                "active_form": "Creating 01-getting-started.html chapter file",
999
+                "status": "completed",
1000
+            },
1001
+            {
1002
+                "content": "Link all chapters together properly in the index file",
1003
+                "active_form": "Linking chapters in the index file",
1004
+                "status": "completed",
1005
+            },
1006
+            {
1007
+                "content": "Create 02-installation.html chapter file",
1008
+                "active_form": "Creating 02-installation.html chapter file",
1009
+                "status": "pending",
1010
+            },
1011
+        ],
1012
+        project_root=temp_dir,
1013
+    )
1014
+
1015
+    assert "Link all chapters together properly in the index file" in dod.pending_items
1016
+    assert "Link all chapters together properly in the index file" not in dod.completed_items
1017
+
1018
+
1019
+def test_sync_todos_to_definition_of_done_allows_linking_step_when_artifacts_exist(
1020
+    temp_dir: Path,
1021
+) -> None:
1022
+    guide_root = temp_dir / "guides" / "nginx"
1023
+    chapters = guide_root / "chapters"
1024
+    guide_root.mkdir(parents=True)
1025
+    chapters.mkdir()
1026
+    index_path = guide_root / "index.html"
1027
+    chapter_one = chapters / "01-getting-started.html"
1028
+    chapter_two = chapters / "02-installation.html"
1029
+    index_path.write_text("<html></html>\n")
1030
+    chapter_one.write_text("<h1>One</h1>\n")
1031
+    chapter_two.write_text("<h1>Two</h1>\n")
1032
+
1033
+    implementation_plan = temp_dir / "implementation.md"
1034
+    implementation_plan.write_text(
1035
+        "\n".join(
1036
+            [
1037
+                "# Implementation Plan",
1038
+                "",
1039
+                "## File Changes",
1040
+                f"- `{guide_root}/`",
1041
+                f"- `{chapters}/`",
1042
+                f"- `{index_path}`",
1043
+                f"- `{chapter_one}`",
1044
+                f"- `{chapter_two}`",
1045
+                "",
1046
+            ]
1047
+        )
1048
+    )
1049
+
1050
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
1051
+    dod.implementation_plan = str(implementation_plan)
1052
+    sync_todos_to_definition_of_done(
1053
+        dod,
1054
+        [
1055
+            {
1056
+                "content": "Link all chapters together properly in the index file",
1057
+                "active_form": "Linking chapters in the index file",
1058
+                "status": "completed",
1059
+            },
1060
+        ],
1061
+        project_root=temp_dir,
1062
+    )
1063
+
1064
+    assert "Link all chapters together properly in the index file" in dod.completed_items
1065
+
1066
+
1067
+def test_sync_todos_to_definition_of_done_reopens_directory_content_step_when_output_dir_is_empty(
1068
+    temp_dir: Path,
1069
+) -> None:
1070
+    guide_root = temp_dir / "guides" / "nginx"
1071
+    chapters = guide_root / "chapters"
1072
+    guide_root.mkdir(parents=True)
1073
+    chapters.mkdir()
1074
+    index_path = guide_root / "index.html"
1075
+    index_path.write_text("<html></html>\n")
1076
+
1077
+    implementation_plan = temp_dir / "implementation.md"
1078
+    implementation_plan.write_text(
1079
+        "\n".join(
1080
+            [
1081
+                "# Implementation Plan",
1082
+                "",
1083
+                "## File Changes",
1084
+                f"- `{guide_root / 'index.html'}`",
1085
+                f"- `{chapters}/` (directory for chapter files)",
1086
+                "",
1087
+                "## Execution Order",
1088
+                "- Create chapter files with appropriate content",
1089
+            ]
1090
+        )
1091
+    )
1092
+
1093
+    dod = create_definition_of_done("Create an equally thorough nginx guide with chapters.")
1094
+    dod.implementation_plan = str(implementation_plan)
1095
+    sync_todos_to_definition_of_done(
1096
+        dod,
1097
+        [
1098
+            {
1099
+                "content": "Create chapter files with appropriate content",
1100
+                "active_form": "Creating chapter files with appropriate content",
1101
+                "status": "completed",
1102
+            },
1103
+        ],
1104
+        project_root=temp_dir,
1105
+    )
1106
+
1107
+    assert "Create chapter files with appropriate content" in dod.pending_items
1108
+    assert "Create chapter files with appropriate content" not in dod.completed_items
1109
+
1110
+
1111
+def test_reconcile_aggregate_completion_steps_reopens_linking_step_when_artifacts_missing(
1112
+    temp_dir: Path,
1113
+) -> None:
1114
+    guide_root = temp_dir / "guides" / "nginx"
1115
+    chapters = guide_root / "chapters"
1116
+    guide_root.mkdir(parents=True)
1117
+    chapters.mkdir()
1118
+    index_path = guide_root / "index.html"
1119
+    chapter_one = chapters / "01-getting-started.html"
1120
+    chapter_two = chapters / "02-installation.html"
1121
+    chapter_three = chapters / "03-first-website.html"
1122
+    index_path.write_text("<html></html>\n")
1123
+    chapter_one.write_text("<h1>One</h1>\n")
1124
+    chapter_two.write_text("<h1>Two</h1>\n")
1125
+
1126
+    implementation_plan = temp_dir / "implementation.md"
1127
+    implementation_plan.write_text(
1128
+        "\n".join(
1129
+            [
1130
+                "# Implementation Plan",
1131
+                "",
1132
+                "## File Changes",
1133
+                f"- `{guide_root}/`",
1134
+                f"- `{chapters}/`",
1135
+                f"- `{index_path}`",
1136
+                f"- `{chapter_one}`",
1137
+                f"- `{chapter_two}`",
1138
+                f"- `{chapter_three}`",
1139
+                "",
1140
+            ]
1141
+        )
1142
+    )
1143
+
1144
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
1145
+    dod.implementation_plan = str(implementation_plan)
1146
+    dod.completed_items.append("Link all chapters together properly")
1147
+
1148
+    reconcile_aggregate_completion_steps(dod, project_root=temp_dir)
1149
+
1150
+    assert "Link all chapters together properly" not in dod.completed_items
1151
+    assert "Link all chapters together properly" in dod.pending_items
1152
+
1153
+
1154
+def test_sync_todos_to_definition_of_done_drops_unplanned_artifact_expansion_after_plan_complete(
1155
+    temp_dir: Path,
1156
+) -> None:
1157
+    guide_root = temp_dir / "guides" / "nginx"
1158
+    chapters = guide_root / "chapters"
1159
+    guide_root.mkdir(parents=True)
1160
+    chapters.mkdir()
1161
+    index_path = guide_root / "index.html"
1162
+    chapter_one = chapters / "01-getting-started.html"
1163
+    chapter_two = chapters / "02-installation.html"
1164
+    index_path.write_text("<html></html>\n")
1165
+    chapter_one.write_text("<h1>One</h1>\n")
1166
+    chapter_two.write_text("<h1>Two</h1>\n")
1167
+
1168
+    implementation_plan = temp_dir / "implementation.md"
1169
+    implementation_plan.write_text(
1170
+        "\n".join(
1171
+            [
1172
+                "# Implementation Plan",
1173
+                "",
1174
+                "## File Changes",
1175
+                f"- `{guide_root}/`",
1176
+                f"- `{chapters}/`",
1177
+                f"- `{index_path}`",
1178
+                f"- `{chapter_one}`",
1179
+                f"- `{chapter_two}`",
1180
+                "",
1181
+            ]
1182
+        )
1183
+    )
1184
+
1185
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
1186
+    dod.implementation_plan = str(implementation_plan)
1187
+    sync_todos_to_definition_of_done(
1188
+        dod,
1189
+        [
1190
+            {
1191
+                "content": "Create 01-getting-started.html",
1192
+                "active_form": "Creating 01-getting-started.html",
1193
+                "status": "completed",
1194
+            },
1195
+            {
1196
+                "content": "Create 02-installation.html",
1197
+                "active_form": "Creating 02-installation.html",
1198
+                "status": "completed",
1199
+            },
1200
+            {
1201
+                "content": "Create 07-performance-tuning.html",
1202
+                "active_form": "Creating 07-performance-tuning.html",
1203
+                "status": "in_progress",
1204
+            },
1205
+        ],
1206
+        project_root=temp_dir,
1207
+    )
1208
+
1209
+    assert "Creating 07-performance-tuning.html" not in dod.pending_items
1210
+    assert "Create 01-getting-started.html" in dod.completed_items
1211
+    assert "Create 02-installation.html" in dod.completed_items
tests/test_workflow_recovery.pyadded
@@ -0,0 +1,20 @@
1
+"""Focused tests for workflow recovery priority rules."""
2
+
3
+from __future__ import annotations
4
+
5
+from pathlib import Path
6
+
7
+from loader.runtime.workflow_recovery import _should_prioritize_missing_artifact
8
+
9
+
10
+def test_workflow_recovery_prioritizes_missing_artifact_over_review_step() -> None:
11
+    missing_artifact = (Path("/tmp/guide/06-ssl-configuration.html"), False)
12
+
13
+    assert _should_prioritize_missing_artifact(
14
+        next_pending="Ensure all files are properly linked and formatted consistently",
15
+        missing_artifact=missing_artifact,
16
+    )
17
+    assert not _should_prioritize_missing_artifact(
18
+        next_pending="Create the final chapter (06-ssl-configuration.html)",
19
+        missing_artifact=missing_artifact,
20
+    )
tests/test_workflow_tools.pymodified
@@ -43,6 +43,65 @@ async def test_todo_write_persists_and_returns_previous_state(tmp_path: Path) ->
4343
     assert json.loads(store_path.read_text()) == []
4444
 
4545
 
46
+@pytest.mark.asyncio
47
+async def test_todo_write_merges_partial_status_updates_with_existing_scope(
48
+    tmp_path: Path,
49
+) -> None:
50
+    tool = TodoWriteTool(tmp_path)
51
+
52
+    initial = await tool.execute(
53
+        todos=[
54
+            {
55
+                "content": "Create nginx index",
56
+                "active_form": "Creating nginx index",
57
+                "status": "completed",
58
+            },
59
+            {
60
+                "content": "Create chapter files",
61
+                "active_form": "Creating chapter files",
62
+                "status": "in_progress",
63
+            },
64
+            {
65
+                "content": "Verify links",
66
+                "active_form": "Verifying links",
67
+                "status": "pending",
68
+            },
69
+        ]
70
+    )
71
+    partial = await tool.execute(
72
+        todos=[
73
+            {
74
+                "content": "Create chapter files",
75
+                "active_form": "Creating chapter files",
76
+                "status": "completed",
77
+            }
78
+        ]
79
+    )
80
+
81
+    initial_payload = json.loads(initial.output)
82
+    partial_payload = json.loads(partial.output)
83
+    assert initial.is_error is False
84
+    assert partial.is_error is False
85
+    assert partial_payload["old_todos"] == initial_payload["new_todos"]
86
+    assert partial_payload["new_todos"] == [
87
+        {
88
+            "content": "Create nginx index",
89
+            "active_form": "Creating nginx index",
90
+            "status": "completed",
91
+        },
92
+        {
93
+            "content": "Create chapter files",
94
+            "active_form": "Creating chapter files",
95
+            "status": "completed",
96
+        },
97
+        {
98
+            "content": "Verify links",
99
+            "active_form": "Verifying links",
100
+            "status": "pending",
101
+        },
102
+    ]
103
+
104
+
46105
 @pytest.mark.asyncio
47106
 async def test_todo_write_rejects_invalid_payloads_and_sets_verification_nudge(
48107
     tmp_path: Path,