`df5639d`

Strengthen qwen recovery and repair flow

Authored by

espadonne 2 weeks ago

SHA: df5639d514ff7b3f567a54307ba0f5d0bd3e03df
Parents: 297e213
Tree: a3395ea

34 changed files

Status	File	+	-
M	`src/loader/runtime/artifact_invalidation.py`	14	2
M	`src/loader/runtime/compaction.py`	3	206
M	`src/loader/runtime/dod.py`	515	19
M	`src/loader/runtime/explore.py`	1	0
M	`src/loader/runtime/finalization.py`	379	30
M	`src/loader/runtime/hooks.py`	593	6
M	`src/loader/runtime/repair.py`	454	10
A	`src/loader/runtime/repair_focus.py`	132	0
M	`src/loader/runtime/safeguard_services.py`	197	353
M	`src/loader/runtime/tool_batch_recovery.py`	338	51
M	`src/loader/runtime/tool_batches.py`	666	252
M	`src/loader/runtime/turn_completion.py`	3	4
M	`src/loader/runtime/turn_iteration.py`	10	4
M	`src/loader/runtime/turn_loop.py`	1	1
M	`src/loader/runtime/turn_preparation.py`	1	0
M	`src/loader/runtime/workflow.py`	376	7
M	`src/loader/runtime/workflow_lanes.py`	51	2
M	`src/loader/runtime/workflow_recovery.py`	85	1
M	`src/loader/tools/workflow_tools.py`	24	0
M	`tests/test_artifact_invalidation.py`	46	0
M	`tests/test_compaction.py`	6	17
M	`tests/test_dod.py`	168	0
M	`tests/test_finalization.py`	299	2
M	`tests/test_permissions.py`	1012	2
M	`tests/test_repair.py`	567	0
M	`tests/test_runtime_harness.py`	12	64
M	`tests/test_runtime_repair_flows.py`	114	3
M	`tests/test_safeguard_services.py`	107	94
M	`tests/test_tool_batch_policies.py`	226	14
M	`tests/test_tool_batches.py`	2066	117
M	`tests/test_turn_completion.py`	97	0
M	`tests/test_workflow.py`	530	0
A	`tests/test_workflow_recovery.py`	20	0
M	`tests/test_workflow_tools.py`	59	0

src/loader/runtime/artifact_invalidation.pymodified

          acceptance_criteria: list[str],
          touched_files: list[str],
          last_verification_result: str | None,
 +        retry_count: int = 0,
 +        planned_artifacts_complete: bool = False,
      ) -> ArtifactFreshness:
          """Return stale-artifact state and the recommended recovery strategy."""
          reason_codes: list[str] = []
          evidence: list[ArtifactEvidence] = []
 +        allow_repair_local_touchpoints = planned_artifacts_complete and retry_count > 0
          unexpected_paths = [
              name
              for path in touched_files
 -            if (name := _path_name(path)) and not _text_covers_path_reference(plan_text, path)
 +            if (name := _path_name(path))
 +            and not _text_covers_path_reference(plan_text, path)
+         ]
          confirmed_touchpoints = [
              name
                  f"Persisted artifacts still point at `{item}`.",
+             )
 -        if unexpected_paths:
 +        if unexpected_paths and not allow_repair_local_touchpoints:
              stale_plan = True
              reason_codes.append("touched_files_outside_plan")
              reasons.append(
                  "Touched files outside the current plan: "
                  + ", ".join(dict.fromkeys(unexpected_paths))
+             )
 +        elif unexpected_paths:
 +            for item in dict.fromkeys(unexpected_paths):
 +                _append_evidence(
 +                    evidence,
 +                    ArtifactEvidenceKind.CONFIRMED_TOUCHPOINT,
 +                    "Verification repair touched supplemental file "
 +                    f"`{item}` after the originally planned artifacts were complete.",
 +                )
          acceptance_anchors = [
              item

src/loader/runtime/compaction.pymodified

  from pathlib import Path
  from ..llm.base import Message, Role, ToolCall
 -from .semantic_rules import html_toc as html_toc_rule
  DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 100_000
  MIN_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 12_000
          current_task=current_task,
          focus_path=focus_path,
+     )
 -    has_confirmed_titles = _summarize_html_title_discovery(relevant_messages) is not None
 -    verification_gap = _summarize_latest_html_verification_gap(relevant_messages)
      if target_path:
 -        if verification_gap:
 -            return (
 -                f"Update `{target_path}` to fix the specific verification failures "
 -                f"({verification_gap}) instead of restarting discovery."
 -            )
 -        if has_confirmed_titles:
 -            return (
 -                f"Update `{target_path}` using the confirmed chapter file/title pairs "
 -                "instead of rereading files."
 -            )
          return (
              f"Update `{target_path}` using the confirmed findings instead of "
              "restarting earlier discovery steps."
      if explicit_mapping_fact:
          facts.append(explicit_mapping_fact)
 -    verification_gap_fact = _collect_html_verification_gap_fact(
 -        messages,
 -        tool_calls_by_id=tool_calls_by_id,
 -    )
 -    if verification_gap_fact:
 -        facts.append(verification_gap_fact)
+-
 -    title_fact = _summarize_html_title_discovery(
 -        messages,
 -        tool_calls_by_id=tool_calls_by_id,
 -    )
 -    if title_fact:
 -        facts.append(title_fact)
+-
 -    file_fact = _collect_html_file_discovery_fact(
 -        messages,
 -        tool_calls_by_id=tool_calls_by_id,
 -    )
 -    if file_fact:
 -        facts.append(file_fact)
+-
      return facts
      return f"Filename mappings confirmed: {preview}"
 -def _summarize_html_title_discovery(
 -    messages: list[Message],
 -    *,
 -    max_pairs: int = 4,
 -    tool_calls_by_id: dict[str, ToolCall] | None = None,
 -) -> str | None:
 -    if tool_calls_by_id is None:
 -        tool_calls_by_id = {
 -            tool_call.id: tool_call
 -            for message in messages
 -            for tool_call in message.tool_calls
 -        }
+-
 -    confirmed_pairs: list[str] = []
 -    for message in messages:
 -        if message.role != Role.TOOL or _is_compacted_context_message(message.content):
 -            continue
 -        if any(result.is_error for result in message.tool_results):
 -            continue
+-
 -        tool_call = next(
 -            (
 -                tool_calls_by_id.get(result.tool_call_id)
 -                for result in message.tool_results
 -                if result.tool_call_id in tool_calls_by_id
 -            ),
 -            None,
 -        )
 -        if tool_call is None or tool_call.name != "read":
 -            continue
+-
 -        raw_path = tool_call.arguments.get("file_path")
 -        if not isinstance(raw_path, str):
 -            continue
 -        normalized_path = _normalize_path_candidate(raw_path) or raw_path
 -        if html_toc_rule.is_html_toc_index_path(normalized_path) or "/chapters/" not in normalized_path:
 -            continue
+-
 -        payload = "\n".join(
 -            result.content.strip()
 -            for result in message.tool_results
 -            if result.content.strip()
 -        ) or message.content
 -        title = html_toc_rule.extract_html_title_from_text(payload)
 -        if not title:
 -            continue
+-
 -        pair = f"{Path(normalized_path).name} = {title}"
 -        if pair not in confirmed_pairs:
 -            confirmed_pairs.append(pair)
+-
 -    if not confirmed_pairs:
 -        return None
+-
 -    preview = ", ".join(confirmed_pairs[:max_pairs])
 -    if len(confirmed_pairs) > max_pairs:
 -        preview += ", ..."
 -    return f"Chapter titles confirmed: {preview}"
+-
+-
 -def _collect_html_file_discovery_fact(
 -    messages: list[Message],
 -    *,
 -    tool_calls_by_id: dict[str, ToolCall],
 -) -> str | None:
 -    filenames: list[str] = []
 -    for message in messages:
 -        if message.role != Role.TOOL or _is_compacted_context_message(message.content):
 -            continue
 -        if any(result.is_error for result in message.tool_results):
 -            continue
+-
 -        tool_name = _resolve_tool_name(
 -            message,
 -            tool_calls_by_id=tool_calls_by_id,
 -        )
 -        if tool_name not in {"glob", "bash"}:
 -            continue
+-
 -        payload = "\n".join(
 -            result.content.strip()
 -            for result in message.tool_results
 -            if result.content.strip()
 -        ) or message.content
 -        matches = re.findall(r"([A-Za-z0-9_.-]+\.html)", payload)
 -        for name in matches:
 -            if name not in filenames:
 -                filenames.append(name)
+-
 -    if len(filenames) < 3:
 -        return None
+-
 -    preview = ", ".join(filenames[:6])
 -    if len(filenames) > 6:
 -        preview += ", ..."
 -    return f"Existing files include {preview}"
+-
+-
 -def _collect_html_verification_gap_fact(
 -    messages: list[Message],
 -    *,
 -    tool_calls_by_id: dict[str, ToolCall],
 -) -> str | None:
 -    gap = _summarize_latest_html_verification_gap(
 -        messages,
 -        tool_calls_by_id=tool_calls_by_id,
 -    )
 -    if not gap:
 -        return None
 -    return f"Verification gaps: {gap}"
+-
+-
 -def _summarize_latest_html_verification_gap(
 -    messages: list[Message],
 -    *,
 -    max_items: int = 2,
 -    tool_calls_by_id: dict[str, ToolCall] | None = None,
 -) -> str | None:
 -    if tool_calls_by_id is None:
 -        tool_calls_by_id = {
 -            tool_call.id: tool_call
 -            for message in messages
 -            for tool_call in message.tool_calls
 -        }
+-
 -    for message in reversed(messages):
 -        if message.role != Role.TOOL or _is_compacted_context_message(message.content):
 -            continue
 -        if not any(result.is_error for result in message.tool_results):
 -            continue
 -        tool_name = _resolve_tool_name(
 -            message,
 -            tool_calls_by_id=tool_calls_by_id,
 -        )
 -        if tool_name != "bash":
 -            continue
+-
 -        payload = "\n".join(
 -            result.content.strip()
 -            for result in message.tool_results
 -            if result.content.strip()
 -        ) or message.content
 -        gap = html_toc_rule.summarize_html_toc_verification_gap(
 -            payload,
 -            max_items=max_items,
 -        )
 -        if gap:
 -            return gap
+-
 -    return None
+-
+-
 -def _summarize_html_file_discovery(payload: str) -> str | None:
 -    return html_toc_rule.summarize_html_file_discovery(payload)
+-
+-
  def _resolve_tool_name(
      message: Message,
      *,
      if focus_path:
          normalized_focus = _normalize_path_candidate(focus_path)
          if normalized_focus:
 -            resolved_focus = html_toc_rule.resolve_html_toc_index_path(normalized_focus)
 -            if resolved_focus is not None:
 -                return str(resolved_focus)
              return normalized_focus
      candidates: Counter[str] = Counter()
              if not normalized:
                  continue
              path_name = Path(normalized).name
 -            if html_toc_rule.is_html_toc_index_path(normalized):
 +            if path_name == "index.html":
                  candidates[normalized] += 10
 -            elif path_name.endswith(".html") and "/chapters/" not in normalized:
 +            elif "." in path_name:
                  candidates[normalized] += 4
      if candidates:
      if not current_task:
          return None
      current_task_paths = extract_key_files([Message(role=Role.USER, content=current_task)], limit=3)
 -    for path in current_task_paths:
 -        if html_toc_rule.is_html_toc_index_path(path):
 -            return path
      return current_task_paths[0] if current_task_paths else None
+     )
      focus = Path(normalized_focus).expanduser()
      anchors = {str(focus)}
+-
 -    resolved_index = html_toc_rule.resolve_html_toc_index_path(focus)
 -    if resolved_index is not None:
 -        anchors.add(str(resolved_index))
 -        anchors.add(str(resolved_index.parent))
 -        anchors.add(str(resolved_index.parent / "chapters"))
 -    else:
 -        anchors.add(str(focus.parent))
 +    anchors.add(str(focus.parent))
      return tuple(anchor for anchor in anchors if anchor)

src/loader/runtime/dod.pymodified

  from ..llm.base import ToolCall
  from ..tools.shell_tools import BashTool
 -from .semantic_rules import html_toc as html_toc_rule
  from .verification_observations import VerificationAttempt, verification_attempt_id
  TaskSize = Literal["small", "standard", "large"]
  VerificationConfidence = Literal["high", "medium", "low"]
  VerificationKind = Literal["test", "typecheck", "lint", "build", "smoke", "runtime", "manual"]
 +_DIRECTORY_CONTENT_HINTS = (
 +    "file",
 +    "files",
 +    "chapter",
 +    "chapters",
 +    "page",
 +    "pages",
 +    "test",
 +    "tests",
 +    "artifact",
 +    "artifacts",
 +    "document",
 +    "documents",
 +    "content",
 +    "entry",
 +    "entries",
 +)
 +_DIRECTORY_MUTATION_HINTS = (
 +    "create",
 +    "creating",
 +    "generate",
 +    "generating",
 +    "write",
 +    "writing",
 +    "add",
 +    "adding",
 +    "build",
 +    "building",
 +    "populate",
 +    "populating",
 +)
++
  @dataclass
  class VerificationEvidence:
      """Generate verification commands from execution history and project shape."""
      commands: list[str] = []
 -    semantic_command = _derive_html_toc_verification_command(
 +    html_link_command = _derive_local_html_link_verification_command(
 +        dod,
 +        project_root=project_root,
 +    )
 +    planned_artifact_targets = collect_planned_artifact_targets(
          dod,
          project_root=project_root,
 -        task_statement=task_statement,
+     )
      explicit = [cmd for cmd in dod.successful_commands if _is_verification_command(cmd)]
              if path.suffix == ".py":
                  _append_unique(commands, f"python {shlex.quote(path.name)}")
 -    if semantic_command:
 -        _append_unique(commands, semantic_command)
 +    if html_link_command:
 +        _append_unique(commands, html_link_command)
 +    for command in _build_planned_artifact_verification_commands(planned_artifact_targets):
 +        _append_unique(commands, command)
      if commands:
          return commands
      return []
 -def _derive_html_toc_verification_command(
 +def _derive_local_html_link_verification_command(
      dod: DefinitionOfDone,
      *,
      project_root: Path,
 -    task_statement: str,
  ) -> str | None:
 -    task_hints = " ".join([task_statement, *dod.acceptance_criteria]).lower()
 -    if not html_toc_rule.task_targets_html_toc(task_hints):
 -        return None
+-
 +    html_paths: list[Path] = []
      for path_str in dod.touched_files:
          path = Path(path_str)
          effective_path = path if path.is_absolute() else (project_root / path)
 -        command = html_toc_rule.build_html_toc_verification_command(effective_path)
 -        if command:
 -            return command
 +        if effective_path.suffix.lower() != ".html" or not effective_path.exists():
 +            continue
 +        html_paths.append(effective_path)
++
 +    unique_paths = list(dict.fromkeys(str(path) for path in html_paths))
 +    resolved_paths = [Path(path) for path in unique_paths]
 +    if not resolved_paths:
 +        return None
 +    if not any(_html_file_contains_local_links(path) for path in resolved_paths):
 +        return None
 +    return _build_local_html_link_verification_command(resolved_paths)
++
++
 +def collect_planned_artifact_targets(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +    max_paths: int | None = None,
 +) -> list[tuple[Path, bool]]:
 +    if not dod.implementation_plan:
 +        return []
++
 +    plan_path = Path(dod.implementation_plan)
 +    if not plan_path.exists():
 +        return []
++
 +    markdown = plan_path.read_text()
 +    file_change_lines = _extract_markdown_section_lines(markdown, "File Changes")
 +    candidates = _extract_planned_path_literals(file_change_lines or markdown.splitlines())
 +    if not candidates:
 +        confirmed_progress_lines = _extract_markdown_section_lines(
 +            markdown,
 +            "Confirmed Progress",
 +        )
 +        candidates = _extract_planned_path_literals(confirmed_progress_lines)
 +    targets: list[tuple[Path, bool]] = []
 +    seen: set[tuple[str, bool]] = set()
++
 +    selected_candidates = candidates if max_paths is None else candidates[:max_paths]
 +    for raw_path in selected_candidates:
 +        effective_path = _resolve_planned_artifact_path(raw_path, project_root=project_root)
 +        if effective_path is None:
 +            continue
 +        expect_directory = raw_path.endswith("/")
 +        if not expect_directory and not effective_path.suffix:
 +            continue
 +        key = (str(effective_path), expect_directory)
 +        if key in seen:
 +            continue
 +        seen.add(key)
 +        targets.append((effective_path, expect_directory))
 +    return targets
++
++
 +def all_planned_artifacts_exist(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +    max_paths: int | None = None,
 +) -> bool:
 +    targets = collect_planned_artifact_targets(
 +        dod,
 +        project_root=project_root,
 +        max_paths=max_paths,
 +    )
 +    if not targets:
 +        return False
 +    if not all(
 +        planned_artifact_target_satisfied(
 +            dod,
 +            target=target,
 +            expect_directory=expect_directory,
 +            project_root=project_root,
 +        )
 +        for target, expect_directory in targets
 +    ):
 +        return False
 +    return not _planned_html_outputs_have_missing_local_links(
 +        dod,
 +        project_root=project_root,
 +        targets=targets,
 +    )
++
++
 +def planned_artifact_target_satisfied(
 +    dod: DefinitionOfDone,
 +    *,
 +    target: Path,
 +    expect_directory: bool,
 +    project_root: Path,
 +) -> bool:
 +    """Return whether one planned file or directory target is substantively satisfied."""
++
 +    if not expect_directory:
 +        return target.is_file()
 +    if not target.is_dir():
 +        return False
 +    if not planned_directory_requires_generated_files(
 +        dod,
 +        target=target,
 +        project_root=project_root,
 +    ):
 +        return True
 +    return _directory_contains_files(target)
++
++
 +def infer_next_declared_html_output_file(
 +    *,
 +    target: Path,
 +    project_root: Path,
 +) -> Path | None:
 +    """Return the first missing HTML file already declared within an output directory."""
++
 +    missing_targets = collect_missing_declared_html_output_files(
 +        target=target,
 +        project_root=project_root,
 +    )
 +    return missing_targets[0] if missing_targets else None
++
++
 +def collect_missing_declared_html_output_files(
 +    *,
 +    target: Path,
 +    project_root: Path,
 +) -> tuple[Path, ...]:
 +    """Return missing HTML outputs already declared within the current artifact graph."""
++
 +    normalized_target = target.resolve(strict=False)
 +    artifact_root = _resolve_declared_html_artifact_root(
 +        normalized_target,
 +        project_root=project_root.resolve(strict=False),
 +    )
 +    if artifact_root is None:
 +        return ()
++
 +    html_files = [path for path in sorted(artifact_root.rglob("*.html")) if path.is_file()]
 +    if not html_files:
 +        return ()
++
 +    missing_targets: list[Path] = []
 +    seen: set[str] = set()
 +    for html_file in html_files:
 +        try:
 +            content = html_file.read_text()
 +        except OSError:
 +            continue
 +        for resolved_target in _iter_local_html_targets(html_file, content):
 +            if resolved_target.exists():
 +                continue
 +            if resolved_target.suffix.lower() not in {".html", ".htm"}:
 +                continue
 +            try:
 +                resolved_target.relative_to(artifact_root)
 +                resolved_target.relative_to(normalized_target)
 +            except ValueError:
 +                continue
 +            key = str(resolved_target)
 +            if key in seen:
 +                continue
 +            seen.add(key)
 +            missing_targets.append(resolved_target)
 +    return tuple(missing_targets)
++
++
 +def _build_planned_artifact_verification_commands(
 +    targets: list[tuple[Path, bool]],
 +) -> list[str]:
 +    commands: list[str] = []
 +    for effective_path, expect_directory in targets:
 +        command = (
 +            f"test -d {shlex.quote(str(effective_path))}"
 +            if expect_directory
 +            else f"test -f {shlex.quote(str(effective_path))}"
 +        )
 +        _append_unique(commands, command)
 +    return commands
++
++
 +def _extract_markdown_section_lines(markdown: str, heading: str) -> list[str]:
 +    current_heading: str | None = None
 +    collected: list[str] = []
 +    for line in markdown.splitlines():
 +        stripped = line.strip()
 +        if stripped.startswith("## "):
 +            current_heading = stripped[3:].strip().lower()
 +            continue
 +        if current_heading == heading.lower():
 +            collected.append(line)
 +    return collected
++
++
 +def _extract_planned_path_literals(lines: list[str]) -> list[str]:
 +    paths: list[str] = []
 +    seen: set[str] = set()
++
 +    for line in lines:
 +        candidates = re.findall(r"`([^`]+)`", line)
 +        if not candidates:
 +            stripped = line.strip()
 +            stripped = re.sub(r"^[-*+]\s+", "", stripped)
 +            stripped = re.sub(r"^\d+[.)]\s+", "", stripped)
 +            stripped = stripped.strip("`'\",.:;()[]{}")
 +            candidates = [stripped] if _looks_like_path_literal(stripped) else []
 +        for candidate in candidates:
 +            normalized = candidate.strip("`'\",.:;()[]{}")
 +            if not _looks_like_path_literal(normalized) or normalized in seen:
 +                continue
 +            seen.add(normalized)
 +            paths.append(normalized)
 +    return paths
++
++
 +def _resolve_declared_html_artifact_root(
 +    target: Path,
 +    *,
 +    project_root: Path,
 +) -> Path | None:
 +    for candidate in [target, *target.parents]:
 +        if (candidate / "index.html").is_file():
 +            return candidate
 +        if candidate == project_root or candidate == candidate.parent:
 +            break
++
 +    fallback = target if target.exists() else target.parent
 +    if fallback.exists():
 +        return fallback
      return None
 -def _build_html_toc_verification_command(index_path: Path) -> str:
 -    command = html_toc_rule.build_html_toc_verification_command(index_path)
 -    if command is None:
 -        raise ValueError(f"{index_path} is not a valid HTML TOC target")
 -    return command
 +def _iter_local_html_targets(file_path: Path, content: str) -> list[Path]:
 +    pattern = re.compile(r'href\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE)
 +    targets: list[Path] = []
 +    seen: set[str] = set()
 +    for href in pattern.findall(content):
 +        candidate = href.strip()
 +        if not _is_local_html_link_target(candidate):
 +            continue
 +        resolved = (file_path.parent / candidate).resolve(strict=False)
 +        key = str(resolved)
 +        if key in seen:
 +            continue
 +        seen.add(key)
 +        targets.append(resolved)
 +    return targets
++
++
 +def _is_local_html_link_target(href: str) -> bool:
 +    candidate = href.strip()
 +    if not candidate or candidate.startswith(("#", "http://", "https://", "mailto:")):
 +        return False
 +    if "?" in candidate:
 +        candidate = candidate.split("?", 1)[0]
 +    if "#" in candidate:
 +        candidate = candidate.split("#", 1)[0]
 +    return Path(candidate).suffix.lower() in {".html", ".htm"}
++
++
 +def _looks_like_path_literal(value: str) -> bool:
 +    if not value or " " in value:
 +        return False
 +    if value.startswith(("http://", "https://")):
 +        return False
 +    return (
 +        value.startswith(("~/", "./", "../", "/"))
 +        or "/" in value
 +        or value.endswith("/")
 +    )
++
++
 +def _resolve_planned_artifact_path(
 +    raw_path: str,
 +    *,
 +    project_root: Path,
 +) -> Path | None:
 +    text = raw_path.strip()
 +    if not text:
 +        return None
 +    path = Path(text).expanduser()
 +    if path.is_absolute():
 +        return path
 +    return project_root / path
++
++
 +def planned_directory_requires_generated_files(
 +    dod: DefinitionOfDone,
 +    *,
 +    target: Path,
 +    project_root: Path,
 +) -> bool:
 +    """Return whether a planned directory is expected to contain generated files."""
++
 +    plan_path = Path(dod.implementation_plan) if dod.implementation_plan else None
 +    if plan_path is not None and plan_path.exists():
 +        markdown = plan_path.read_text()
 +        file_change_lines = _extract_markdown_section_lines(markdown, "File Changes")
 +        if any(
 +            _line_describes_directory_contents(line, target=target, project_root=project_root)
 +            for line in file_change_lines
 +        ):
 +            return True
++
 +        execution_lines = _extract_markdown_section_lines(markdown, "Execution Order")
 +        if any(
 +            _line_mentions_directory_generation(line, target=target)
 +            for line in execution_lines
 +        ):
 +            return True
++
 +    todo_lines = [*dod.pending_items, *dod.completed_items]
 +    return any(
 +        _line_mentions_directory_generation(line, target=target)
 +        for line in todo_lines
 +    )
++
++
 +def _line_describes_directory_contents(
 +    line: str,
 +    *,
 +    target: Path,
 +    project_root: Path,
 +) -> bool:
 +    lowered = line.lower()
 +    if not any(hint in lowered for hint in _DIRECTORY_CONTENT_HINTS):
 +        return False
++
 +    target_text = str(target)
 +    relative_target = str(target.relative_to(project_root)) if target.is_relative_to(project_root) else ""
 +    if target_text in line or relative_target and relative_target in line:
 +        return True
 +    return _line_mentions_directory_generation(line, target=target)
++
++
 +def _line_mentions_directory_generation(line: str, *, target: Path) -> bool:
 +    lowered = line.lower()
 +    if not any(hint in lowered for hint in _DIRECTORY_CONTENT_HINTS):
 +        return False
 +    if not any(hint in lowered for hint in _DIRECTORY_MUTATION_HINTS) and "directory for" not in lowered:
 +        return False
 +    directory_tokens = _directory_tokens(target)
 +    return any(token in lowered for token in directory_tokens)
++
++
 +def _directory_tokens(target: Path) -> set[str]:
 +    tokens: set[str] = set()
 +    for raw_token in re.split(r"[^a-z0-9]+", target.name.lower()):
 +        token = raw_token.strip()
 +        if len(token) < 2:
 +            continue
 +        tokens.add(token)
 +        if token.endswith("ies") and len(token) > 3:
 +            tokens.add(f"{token[:-3]}y")
 +        elif token.endswith("s") and len(token) > 3:
 +            tokens.add(token[:-1])
 +    return tokens
++
++
 +def _directory_contains_files(target: Path) -> bool:
 +    try:
 +        return any(child.is_file() for child in target.rglob("*"))
 +    except OSError:
 +        return False
++
++
 +def _html_file_contains_local_links(path: Path) -> bool:
 +    pattern = re.compile(r'href\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE)
 +    try:
 +        text = path.read_text()
 +    except OSError:
 +        return False
 +    return any(_is_local_html_link_target(href) for href in pattern.findall(text))
++
++
 +def _planned_html_outputs_have_missing_local_links(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +    targets: list[tuple[Path, bool]],
 +) -> bool:
 +    html_paths: list[Path] = []
 +    for raw_path in dod.touched_files:
 +        path = Path(raw_path)
 +        effective_path = path if path.is_absolute() else (project_root / path)
 +        if effective_path.suffix.lower() != ".html" or not effective_path.exists():
 +            continue
 +        html_paths.append(effective_path)
++
 +    for target, expect_directory in targets:
 +        if expect_directory or target.suffix.lower() != ".html" or not target.exists():
 +            continue
 +        html_paths.append(target)
++
 +    seen: set[str] = set()
 +    for path in html_paths:
 +        normalized = str(path)
 +        if normalized in seen:
 +            continue
 +        seen.add(normalized)
 +        if _html_file_has_missing_local_links(path):
 +            return True
 +    return False
++
++
 +def _html_file_has_missing_local_links(path: Path) -> bool:
 +    pattern = re.compile(r'href\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE)
 +    try:
 +        text = path.read_text()
 +    except OSError:
 +        return False
 +    for href in pattern.findall(text):
 +        target = href.strip()
 +        if not _is_local_html_link_target(target):
 +            continue
 +        normalized = target.split("#", 1)[0].split("?", 1)[0].strip()
 +        if not normalized:
 +            continue
 +        if not (path.parent / normalized).resolve().exists():
 +            return True
 +    return False
++
++
 +def _is_local_html_link_target(href: str) -> bool:
 +    target = href.strip()
 +    if not target:
 +        return False
 +    if target.startswith(("#", "mailto:", "tel:", "javascript:")):
 +        return False
 +    if "://" in target:
 +        return False
 +    target = target.split("#", 1)[0].split("?", 1)[0].strip()
 +    return bool(target)
++
++
 +def _build_local_html_link_verification_command(paths: list[Path]) -> str:
 +    serialized_paths = ", ".join(repr(str(path)) for path in paths)
 +    return "\n".join(
 +        [
 +            "python3 - <<'PY'",
 +            "from pathlib import Path",
 +            "import re",
 +            "",
 +            f"paths = [{serialized_paths}]",
 +            (
 +                r"pattern = re.compile(r'href\s*=\s*[\"\\\']([^\"\\\']+)[\"\\\']', "
 +                "re.IGNORECASE)"
 +            ),
 +            "checked = 0",
 +            "missing = []",
 +            "for raw_path in paths:",
 +            "    html_path = Path(raw_path)",
 +            "    if not html_path.exists():",
 +            "        continue",
 +            "    text = html_path.read_text()",
 +            "    for href in pattern.findall(text):",
 +            "        target = href.strip()",
 +            "        if not target:",
 +            "            continue",
 +            "        if target.startswith((\"#\", \"mailto:\", \"tel:\", \"javascript:\")):",
 +            "            continue",
 +            "        if \"://\" in target:",
 +            "            continue",
 +            "        target = target.split(\"#\", 1)[0].split(\"?\", 1)[0].strip()",
 +            "        if not target:",
 +            "            continue",
 +            "        checked += 1",
 +            "        resolved = (html_path.parent / target).resolve()",
 +            "        if not resolved.exists():",
 +            "            missing.append(f\"{html_path}:{href} -> {resolved}\")",
 +            "if missing:",
 +            "    print(\"Missing local HTML links:\")",
 +            "    print(\"\\n\".join(missing))",
 +            "    raise SystemExit(1)",
 +            "print(f\"Checked {checked} local HTML links across {len(paths)} file(s).\")",
 +            "PY",
 +        ]
 +    )
  def _first_non_empty_line(text: str) -> str:

src/loader/runtime/explore.pymodified

                  registry=self.registry,
                  rollback_plan=None,
                  workspace_root=self.context.project_root,
 +                session=self.context.session,
              ),
+         )

src/loader/runtime/finalization.pymodified

      DefinitionOfDoneStore,
      VerificationEvidence,
      build_verification_summary,
 +    collect_planned_artifact_targets,
      derive_verification_commands,
      ensure_active_verification_attempt,
      synthesize_todo_items,
  from .logging import get_runtime_logger
  from .memory import MemoryStore
  from .policy_timeline import append_verification_timeline_entry
 -from .semantic_rules import html_toc as html_toc_rule
  from .session import normalize_usage
  from .tracing import RuntimeTracer
  from .verification_observations import (
      WorkflowMode,
      WorkflowTimelineEntry,
      WorkflowTimelineEntryKind,
 +    effective_pending_todo_items,
      extract_verification_commands_from_markdown,
+ )
          """Gate completion on DoD state and verification evidence."""
          implementation_item = "Complete the requested work"
 -        if implementation_item in dod.pending_items:
 -            dod.pending_items.remove(implementation_item)
 -            dod.completed_items.append(implementation_item)
 +        verification_item = "Collect verification evidence"
          tracked_pending_items = [
 -            item for item in dod.pending_items if item != "Collect verification evidence"
 +            item
 +            for item in effective_pending_todo_items(
 +                dod,
 +                project_root=self.context.project_root,
 +            )
 +            if item not in {implementation_item, verification_item}
+         ]
 +        missing_planned_artifacts = _missing_planned_artifact_labels(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
          mutating_paths = [path for path in dod.touched_files if path]
          requires_verification = bool(mutating_paths or dod.mutating_actions)
              reason=f"files={mutating_paths[:3]}, actions={len(dod.mutating_actions)}"
              if requires_verification else None,
+         )
 +        if missing_planned_artifacts:
 +            recovery_nudge = _build_missing_artifact_recovery_nudge(
 +                _first_missing_planned_artifact(
 +                    dod,
 +                    project_root=self.context.project_root,
 +                )
 +            )
 +            if recovery_nudge:
 +                self.context.queue_steering_message(recovery_nudge)
 +            missing_provenance = [
 +                EvidenceProvenance(
 +                    category="tracked_work",
 +                    source="dod.implementation_plan",
 +                    summary=f"planned artifact still missing: {label}",
 +                    status=EvidenceProvenanceStatus.MISSING.value,
 +                    subject=label,
 +                )
 +                for label in missing_planned_artifacts
 +            ]
 +            missing_text = "\n".join(
 +                f"- {label}" for label in missing_planned_artifacts[:8]
 +            )
 +            pending_text = ""
 +            if tracked_pending_items:
 +                pending_text = (
 +                    "\nRemaining tracked work:\n"
 +                    + "\n".join(f"- {item}" for item in tracked_pending_items[:6])
 +                )
 +            self.dod_store.save(dod)
 +            await self.emit_dod_status(emit, dod)
 +            self.context.session.append(
 +                Message(
 +                    role=Role.USER,
 +                    content=(
 +                        "[PLANNED ARTIFACTS STILL MISSING]\n"
 +                        "The explicit implementation plan is not complete yet. "
 +                        "Do not move to verification or final confirmation.\n\n"
 +                        "Missing planned artifacts:\n"
 +                        f"{missing_text}"
 +                        f"{pending_text}\n\n"
 +                        "Continue by creating or updating the missing planned artifacts."
 +                    ),
 +                )
 +            )
 +            return CompletionGateResult(
 +                should_continue=True,
 +                reason_code="planned_artifacts_missing_continue",
 +                reason_summary=(
 +                    "continued because explicitly planned artifacts were still missing "
 +                    "before verification"
 +                ),
 +                final_response="",
 +                evidence_provenance=missing_provenance,
 +            )
          if tracked_pending_items and not requires_verification:
              pending_provenance = [
                  EvidenceProvenance(
+             )
          if not requires_verification:
 +            if implementation_item in dod.pending_items:
 +                dod.pending_items.remove(implementation_item)
 +            if implementation_item not in dod.completed_items:
 +                dod.completed_items.append(implementation_item)
              skip_provenance = [
                  EvidenceProvenance(
                      category="verification",
                  f"Task: {dod.task_statement}\n"
                  "No new file changes were made since the last failed verification.\n\n"
                  f"{build_verification_summary(dod.evidence)}\n\n"
 -                f"{_build_verification_repair_guidance(dod)}\n\n"
 +                f"{_build_verification_repair_guidance(dod, project_root=self.context.project_root)}\n\n"
                  "Apply a concrete edit or patch before trying to finish again."
+             )
 +            recovery_nudge = _build_verification_failure_recovery_nudge(
 +                dod,
 +                project_root=self.context.project_root,
 +            )
 +            if recovery_nudge:
 +                self.context.queue_steering_message(recovery_nudge)
              self.context.session.append(Message(role=Role.USER, content=repair_prompt))
              return CompletionGateResult(
                  should_continue=True,
          dod.confidence = "medium"
          self.dod_store.save(dod)
          await self.emit_dod_status(emit, dod)
 +        recovery_nudge = _build_verification_failure_recovery_nudge(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
 +        if recovery_nudge:
 +            self.context.queue_steering_message(recovery_nudge)
          await self.set_workflow_mode(
              ModeDecision.transition(
                  WorkflowMode.EXECUTE,
              f"Attempt: {dod.retry_count}/{dod.retry_budget}\n"
              f"Pending items: {', '.join(dod.pending_items)}\n\n"
              f"{build_verification_summary(dod.evidence)}\n\n"
 -            f"{_build_verification_repair_guidance(dod)}\n\n"
 +            f"{_build_verification_repair_guidance(dod, project_root=self.context.project_root)}\n\n"
              "Fix the failures above, then finish the task again."
+         )
          self.context.session.append(Message(role=Role.USER, content=failure_prompt))
      return entries
 +def _missing_planned_artifact_labels(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +) -> list[str]:
 +    labels: list[str] = []
 +    for target, expect_directory in collect_planned_artifact_targets(
 +        dod,
 +        project_root=project_root,
 +        max_paths=12,
 +    ):
 +        exists = target.is_dir() if expect_directory else target.is_file()
 +        if exists:
 +            continue
 +        label = target.name or str(target)
 +        if expect_directory and not label.endswith("/"):
 +            label += "/"
 +        labels.append(f"`{label}`")
 +    return labels
++
++
 +def _first_missing_planned_artifact(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +) -> tuple[Path, bool] | None:
 +    for target, expect_directory in collect_planned_artifact_targets(
 +        dod,
 +        project_root=project_root,
 +        max_paths=12,
 +    ):
 +        exists = target.is_dir() if expect_directory else target.is_file()
 +        if not exists:
 +            return target, expect_directory
 +    return None
++
++
 +def _build_missing_artifact_recovery_nudge(
 +    missing_artifact: tuple[Path, bool] | None,
 +) -> str | None:
 +    if missing_artifact is None:
 +        return None
++
 +    target, expect_directory = missing_artifact
 +    label = target.name or str(target)
 +    if expect_directory and not label.endswith("/"):
 +        label += "/"
++
 +    if expect_directory:
 +        return (
 +            "Your prior completion claim was incorrect because "
 +            f"`{label}` does not exist yet. Do not summarize, mark completion, or "
 +            "write bookkeeping notes yet. Your next response should be one concrete "
 +            f"tool call that creates `{target}`. If a specific missing fact blocks "
 +            "that step, ask one precise question."
 +        )
++
 +    return (
 +        "Your prior completion claim was incorrect because "
 +        f"`{label}` does not exist yet. Do not summarize, mark completion, or "
 +        "write bookkeeping notes yet. Your next response should be one concrete "
 +        f"`write` or `edit`-style tool call that creates or updates `{target}`. "
 +        "If a specific missing fact blocks that step, ask one precise question."
 +    )
++
++
  def _verification_result_observations(
      dod: DefinitionOfDone,
      *,
+     )
 -def _build_verification_repair_guidance(dod: DefinitionOfDone) -> str:
 -    fixes = _extract_verification_repairs(dod.evidence)
 -    if not fixes:
 +def _build_verification_repair_guidance(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +) -> str:
 +    repair_targets = _extract_verification_repair_targets(dod.evidence)
 +    fixes = _extract_verification_repairs(
 +        dod.evidence,
 +        repair_targets=repair_targets,
 +    )
 +    repair_source_paths = _existing_repair_source_paths(
 +        dod,
 +        repair_targets=repair_targets,
 +        project_root=project_root,
 +    )
 +    if not fixes and not repair_targets:
          return (
              "Use the failed verification evidence directly, avoid rereading unrelated "
              "files, and fix the target file before retrying."
+         )
 -    return "\n".join(
 -        [
 -            "Repair focus:",
 -            *[f"- {item}" for item in fixes],
 -            "- Reuse these exact failures instead of restarting discovery from earlier chapters.",
 -        ]
 -    )
 +    lines = ["Repair focus:"]
 +    lines.extend(f"- {item}" for item in fixes)
 +    primary_target = repair_targets[0] if repair_targets else None
 +    if primary_target is not None:
 +        lines.extend(
 +            [
 +                f"- Immediate next step: edit `{primary_target.artifact_path}`.",
 +                "- If the broken reference should remain, create "
 +                f"`{primary_target.expected_path}`; otherwise remove or replace "
 +                f"`{primary_target.failing_reference}`.",
 +                *(
 +                    [
 +                        "- Use the existing artifact files as the source of truth while "
 +                        "repairing this file: "
 +                        + ", ".join(f"`{path}`" for path in repair_source_paths[:6])
 +                        + (", ..." if len(repair_source_paths) > 6 else "")
 +                    ]
 +                    if repair_source_paths
 +                    else []
 +                ),
 +                "- Do not reread unrelated reference materials or restart discovery "
 +                "while this concrete repair target is unresolved.",
 +            ]
 +        )
 +    else:
 +        lines.append(
 +            "- Reuse these exact failures instead of restarting discovery from earlier "
 +            "chapters."
 +        )
 +    return "\n".join(lines)
  def _extract_verification_repairs(
      evidence_items: list[VerificationEvidence],
 +    *,
 +    repair_targets: list[VerificationRepairTarget] | None = None,
  ) -> list[str]:
      fixes: list[str] = []
 +    target_map = {
 +        (target.artifact_path, target.failing_reference, target.expected_path): target
 +        for target in (repair_targets or _extract_verification_repair_targets(evidence_items))
 +    }
 +    for target in target_map.values():
 +        item = (
 +            f"Fix the broken local reference `{target.failing_reference}` in "
 +            f"`{target.artifact_path}`."
 +        )
 +        if item not in fixes:
 +            fixes.append(item)
      for evidence in evidence_items:
          for candidate in (evidence.stderr, evidence.output, evidence.stdout):
 -            missing, mismatches = html_toc_rule.parse_html_toc_verification_failures(
 -                str(candidate)
 -            )
 -            for href in missing:
 -                item = (
 -                    f"Fix the missing TOC href `{href}` in the target HTML "
 -                    "table-of-contents page."
 -                )
 -                if item not in fixes:
 -                    fixes.append(item)
 -            for mismatch in mismatches:
 +            for problem in _extract_missing_local_html_links(str(candidate)):
 +                parsed = _parse_missing_local_html_link(problem)
 +                if parsed is not None:
 +                    key = (
 +                        parsed.artifact_path,
 +                        parsed.failing_reference,
 +                        parsed.expected_path,
 +                    )
 +                    if key in target_map:
 +                        continue
                  item = (
 -                    f"Fix the TOC label mismatch `{mismatch}` in the target HTML "
 -                    "table-of-contents page."
 +                    "Fix the missing local HTML link "
 +                    f"`{problem}` in the edited artifact set."
+                 )
                  if item not in fixes:
                      fixes.append(item)
      return fixes
 +@dataclass(frozen=True)
 +class VerificationRepairTarget:
 +    """Structured repair target extracted from failed verification evidence."""
++
 +    artifact_path: str
 +    failing_reference: str
 +    expected_path: str
++
++
 +def _build_verification_failure_recovery_nudge(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +) -> str | None:
 +    repair_targets = _extract_verification_repair_targets(dod.evidence)
 +    repair_source_paths = _existing_repair_source_paths(
 +        dod,
 +        repair_targets=repair_targets,
 +        project_root=project_root,
 +    )
 +    if repair_targets:
 +        primary_target = repair_targets[0]
 +        source_hint = ""
 +        if repair_source_paths:
 +            preview = ", ".join(f"`{path}`" for path in repair_source_paths[:4])
 +            if len(repair_source_paths) > 4:
 +                preview += ", ..."
 +            source_hint = (
 +                " Use the existing artifact files already on disk as the source of truth: "
 +                f"{preview}."
 +            )
 +        return (
 +            "Verification already identified the concrete repair target. "
 +            "Do not restart discovery or reread unrelated references. "
 +            "Your next response should be one concrete `edit` or `write`-style tool "
 +            f"call that updates `{primary_target.artifact_path}` to repair "
 +            f"`{primary_target.failing_reference}`. "
 +            f"If that reference should stay, create `{primary_target.expected_path}`; "
 +            "otherwise remove or replace the broken local reference."
 +            f"{source_hint}"
 +        )
++
 +    fixes = _extract_verification_repairs(dod.evidence, repair_targets=repair_targets)
 +    if not fixes:
 +        return None
 +    return (
 +        "Verification already identified a concrete failure in the active artifact set. "
 +        "Reuse that evidence directly, apply one concrete edit or patch, and do not "
 +        "restart discovery unless a specific missing fact blocks the repair."
 +    )
++
++
 +def _existing_repair_source_paths(
 +    dod: DefinitionOfDone,
 +    *,
 +    repair_targets: list[VerificationRepairTarget],
 +    project_root: Path,
 +) -> list[str]:
 +    if not repair_targets:
 +        return []
++
 +    candidate_dirs = {
 +        Path(target.expected_path).parent.resolve(strict=False)
 +        for target in repair_targets
 +        if str(target.expected_path).strip()
 +    }
 +    candidate_dirs.update(
 +        Path(target.artifact_path).parent.resolve(strict=False)
 +        for target in repair_targets
 +        if str(target.artifact_path).strip()
 +    )
++
 +    paths: list[str] = []
 +    seen: set[str] = set()
 +    for target, expect_directory in collect_planned_artifact_targets(
 +        dod,
 +        project_root=project_root,
 +        max_paths=24,
 +    ):
 +        if expect_directory or not target.is_file():
 +            continue
 +        resolved = target.resolve(strict=False)
 +        if resolved.parent not in candidate_dirs:
 +            continue
 +        normalized = str(resolved)
 +        if normalized in seen:
 +            continue
 +        seen.add(normalized)
 +        paths.append(normalized)
 +    return paths
++
++
 +def _extract_verification_repair_targets(
 +    evidence_items: list[VerificationEvidence],
 +) -> list[VerificationRepairTarget]:
 +    targets: list[VerificationRepairTarget] = []
 +    seen: set[tuple[str, str, str]] = set()
 +    for evidence in evidence_items:
 +        for candidate in (evidence.stderr, evidence.output, evidence.stdout):
 +            for problem in _extract_missing_local_html_links(str(candidate)):
 +                parsed = _parse_missing_local_html_link(problem)
 +                if parsed is None:
 +                    continue
 +                key = (
 +                    parsed.artifact_path,
 +                    parsed.failing_reference,
 +                    parsed.expected_path,
 +                )
 +                if key in seen:
 +                    continue
 +                seen.add(key)
 +                targets.append(parsed)
 +    return targets
++
++
 +def _parse_missing_local_html_link(problem: str) -> VerificationRepairTarget | None:
 +    if " -> " not in problem:
 +        return None
 +    broken_target, expected_path = problem.split(" -> ", 1)
 +    broken_target = broken_target.strip()
 +    expected_path = expected_path.strip()
 +    if not broken_target or not expected_path or ":" not in broken_target:
 +        return None
 +    artifact_path, failing_reference = broken_target.rsplit(":", 1)
 +    artifact_path = artifact_path.strip()
 +    failing_reference = failing_reference.strip()
 +    if not artifact_path or not failing_reference:
 +        return None
 +    return VerificationRepairTarget(
 +        artifact_path=artifact_path,
 +        failing_reference=failing_reference,
 +        expected_path=expected_path,
 +    )
++
++
 +def _extract_missing_local_html_links(text: str) -> list[str]:
 +    if "Missing local HTML links:" not in text:
 +        return []
++
 +    problems: list[str] = []
 +    capture = False
 +    for raw_line in text.splitlines():
 +        line = raw_line.strip()
 +        if not line:
 +            continue
 +        if line == "Missing local HTML links:":
 +            capture = True
 +            continue
 +        if not capture:
 +            continue
 +        if " -> " not in line:
 +            continue
 +        if line not in problems:
 +            problems.append(line)
 +    return problems
++
++
  def _classify_verification_kind(command: str) -> str:
      """Classify the verification command into a summary kind."""

src/loader/runtime/hooks.pymodified

  from __future__ import annotations
 +import shlex
  from collections.abc import Iterable
  from dataclasses import dataclass, field
  from enum import StrEnum
  from ..llm.base import ToolCall
  from ..tools.base import Tool, ToolRegistry
  from ..tools.base import ToolResult as RegistryToolResult
 +from .dod import (
 +    DefinitionOfDoneStore,
 +    all_planned_artifacts_exist,
 +    collect_missing_declared_html_output_files,
 +    collect_planned_artifact_targets,
 +    planned_artifact_target_satisfied,
 +)
  from .memory import MemoryStore
  from .permissions import PermissionOverride, PermissionPolicy
 +from .repair_focus import (
 +    extract_active_repair_context,
 +    normalize_repair_path,
 +    path_matches_allowed_paths,
 +    path_within_allowed_roots,
 +)
  from .rollback import RollbackPlan, create_rollback_plan_for_action, is_destructive_tool
 -from .safeguard_services import ActionTracker, PreActionValidator
 +from .safeguard_services import (
 +    ActionTracker,
 +    PreActionValidator,
 +    extract_shell_text_rewrite_target,
 +)
  class HookEvent(StrEnum):
          arguments = context.tool_call.arguments
          raw_path = str(arguments.get(argument_key, "")).strip()
 -        if not raw_path or raw_path.startswith(("/", "~")):
 +        if not raw_path:
              return HookResult()
 -        resolved = self._resolve_recent_context_path(
 -            raw_path,
 -            require_existing=True,
 -        )
 +        require_existing = context.tool_call.name in {"read", "glob", "grep", "edit", "patch"}
 +        resolved: str | None = None
 +        if raw_path.startswith("/"):
 +            resolved = self._resolve_workspace_mirror_path(
 +                raw_path,
 +                require_existing=require_existing,
 +            )
 +        elif not raw_path.startswith("~"):
 +            resolved = self._resolve_recent_context_path(
 +                raw_path,
 +                require_existing=require_existing,
 +            )
          if resolved is None:
              return HookResult()
                  return str(candidate)
          return None
 +    def _resolve_workspace_mirror_path(
 +        self,
 +        raw_path: str,
 +        *,
 +        require_existing: bool,
 +    ) -> str | None:
 +        candidate = Path(raw_path).expanduser()
 +        try:
 +            resolved = candidate.resolve(strict=False)
 +        except Exception:
 +            resolved = candidate
++
 +        try:
 +            relative = resolved.relative_to(self.workspace_root)
 +        except ValueError:
 +            return None
 +        if not relative.parts:
 +            return None
++
 +        anchor = relative.parts[0]
 +        for base_dir in self.action_tracker.recent_path_contexts():
 +            base_path = Path(base_dir).expanduser()
 +            try:
 +                resolved_base = base_path.resolve(strict=False)
 +            except Exception:
 +                resolved_base = base_path
 +            if resolved_base == self.workspace_root:
 +                continue
 +            try:
 +                resolved_base.relative_to(self.workspace_root)
 +                continue
 +            except ValueError:
 +                pass
++
 +            try:
 +                anchor_index = resolved_base.parts.index(anchor)
 +            except ValueError:
 +                continue
 +            if anchor_index <= 0:
 +                continue
++
 +            anchor_root = Path(*resolved_base.parts[: anchor_index + 1])
 +            remapped = Path(*resolved_base.parts[:anchor_index]).joinpath(*relative.parts)
 +            if remapped == resolved:
 +                continue
 +            if require_existing:
 +                if remapped.exists():
 +                    return str(remapped)
 +                continue
 +            if remapped.exists() or remapped.parent.exists() or anchor_root.exists():
 +                return str(remapped)
 +        return None
++
++
 +_OBSERVATION_TOOLS = frozenset({"read", "glob", "grep", "bash"})
 +_MUTATION_TOOLS = frozenset({"write", "edit", "patch", "bash"})
 +_READ_ONLY_BASH_PREFIXES = frozenset(
 +    {"ls", "pwd", "find", "stat", "cat", "head", "tail", "rg", "grep"}
 +)
 +_MUTATING_BASH_FRAGMENTS = (
 +    " >",
 +    ">>",
 +    "| tee",
 +    "touch ",
 +    "mkdir ",
 +    "rm ",
 +    "mv ",
 +    "cp ",
 +    "sed -i",
 +    "perl -pi",
 +    "git add",
 +    "git commit",
 +    "git apply",
 +)
++
++
 +def _extract_observation_paths(tool_call: ToolCall) -> list[str]:
 +    arguments = tool_call.arguments
 +    if tool_call.name == "read":
 +        file_path = str(arguments.get("file_path", "")).strip()
 +        return [file_path] if file_path else []
++
 +    if tool_call.name in {"glob", "grep"}:
 +        candidates: list[str] = []
 +        search_path = str(arguments.get("path", "")).strip()
 +        if search_path:
 +            anchored_path = _derive_search_anchor(search_path, str(arguments.get("pattern", "")).strip())
 +            candidates.append(anchored_path or search_path)
 +        pattern = str(arguments.get("pattern", "")).strip()
 +        if not search_path and pattern.startswith(("/", "~")):
 +            candidates.append(str(Path(pattern).expanduser().parent))
 +        return candidates
++
 +    command = str(arguments.get("command", "")).strip()
 +    if not _is_read_only_bash(command):
 +        return []
 +    return _extract_bash_paths(command)
++
++
 +def _is_read_only_bash(command: str) -> bool:
 +    normalized = " ".join(command.split())
 +    if not normalized:
 +        return False
 +    if extract_shell_text_rewrite_target(normalized) is not None:
 +        return False
 +    if any(fragment in normalized for fragment in _MUTATING_BASH_FRAGMENTS):
 +        return False
 +    try:
 +        argv = shlex.split(normalized)
 +    except ValueError:
 +        return False
 +    if not argv:
 +        return False
 +    return argv[0] in _READ_ONLY_BASH_PREFIXES
++
++
 +def _extract_bash_paths(command: str) -> list[str]:
 +    try:
 +        argv = shlex.split(command)
 +    except ValueError:
 +        return []
 +    observed: list[str] = []
 +    for token in argv[1:]:
 +        candidate = token.strip()
 +        if not candidate or candidate.startswith("-"):
 +            continue
 +        if candidate.startswith(("/", "~")):
 +            observed.append(candidate)
 +    return observed
++
++
 +def _derive_search_anchor(search_path: str, pattern: str) -> str:
 +    normalized_search_path = str(search_path or "").strip()
 +    normalized_pattern = str(pattern or "").strip()
 +    if not normalized_search_path or not normalized_pattern:
 +        return normalized_search_path
++
 +    literal_segments: list[str] = []
 +    for segment in normalized_pattern.split("/"):
 +        cleaned = segment.strip()
 +        if not cleaned or cleaned == ".":
 +            continue
 +        if any(token in cleaned for token in ("*", "?", "[")):
 +            continue
 +        literal_segments.append(cleaned)
++
 +    if not literal_segments:
 +        return normalized_search_path
++
 +    if "." in literal_segments[-1]:
 +        literal_segments = literal_segments[:-1]
 +    if not literal_segments:
 +        return normalized_search_path
++
 +    try:
 +        anchored = Path(normalized_search_path).expanduser().joinpath(*literal_segments)
 +    except (OSError, RuntimeError, ValueError):
 +        return normalized_search_path
 +    return str(anchored)
++
++
 +def _extract_mutation_paths(tool_call: ToolCall) -> list[str]:
 +    arguments = tool_call.arguments
 +    if tool_call.name in {"write", "edit", "patch"}:
 +        file_path = str(arguments.get("file_path", "")).strip()
 +        return [file_path] if file_path else []
++
 +    if tool_call.name != "bash":
 +        return []
++
 +    command = str(arguments.get("command", "")).strip()
 +    if not command or not _is_mutating_bash(command):
 +        return []
 +    target = extract_shell_text_rewrite_target(command)
 +    return [target] if target else []
++
++
 +def _is_mutating_bash(command: str) -> bool:
 +    normalized = " ".join(command.split())
 +    if not normalized:
 +        return False
 +    if extract_shell_text_rewrite_target(normalized) is not None:
 +        return True
 +    if any(fragment in normalized for fragment in _MUTATING_BASH_FRAGMENTS):
 +        return True
 +    try:
 +        argv = shlex.split(normalized)
 +    except ValueError:
 +        return False
 +    if not argv:
 +        return False
 +    return argv[0] in {"touch", "mkdir", "rm", "mv", "cp", "chmod", "chown"}
++
++
 +def _repair_declared_output_paths(repair: Any, *, project_root: Path) -> set[str]:
 +    declared_outputs: set[str] = set()
 +    for root in getattr(repair, "allowed_roots", ()) or ():
 +        normalized_root = normalize_repair_path(root)
 +        if not normalized_root:
 +            continue
 +        for path in collect_missing_declared_html_output_files(
 +            target=Path(normalized_root),
 +            project_root=project_root,
 +        ):
 +            declared_outputs.add(normalize_repair_path(str(path)))
 +    return declared_outputs
++
++
 +class ActiveRepairScopeHook(BaseToolHook):
 +    """Keep fix-mode observations anchored to the active artifact set."""
++
 +    def __init__(
 +        self,
 +        *,
 +        dod_store: DefinitionOfDoneStore,
 +        project_root: Path,
 +        session: Any,
 +    ) -> None:
 +        self.dod_store = dod_store
 +        self.project_root = project_root
 +        self.session = session
++
 +    async def pre_tool_use(self, context: HookContext) -> HookResult:
 +        if context.tool_call.name not in _OBSERVATION_TOOLS:
 +            return HookResult()
 +        if context.source == "verification":
 +            return HookResult()
++
 +        repair = self._active_repair_context()
 +        if repair is None:
 +            return HookResult()
++
 +        observed_paths = _extract_observation_paths(context.tool_call)
 +        if not observed_paths:
 +            return HookResult()
 +        declared_output_paths = _repair_declared_output_paths(
 +            repair,
 +            project_root=self.project_root,
 +        )
 +        if repair.allowed_paths:
 +            if all(path_matches_allowed_paths(path, repair.allowed_paths) for path in observed_paths):
 +                return HookResult()
 +            if declared_output_paths and all(
 +                normalize_repair_path(path) in declared_output_paths
 +                for path in observed_paths
 +            ):
 +                return HookResult()
 +            if context.tool_call.name in {"glob", "grep", "bash"} and repair.allowed_roots:
 +                if all(path_within_allowed_roots(path, repair.allowed_roots) for path in observed_paths):
 +                    return HookResult()
++
 +            allowed_preview = ", ".join(f"`{path}`" for path in repair.allowed_paths[:3])
 +            if len(repair.allowed_paths) > 3:
 +                allowed_preview += ", ..."
 +            declared_preview = ", ".join(
 +                f"`{Path(path).name or path}`"
 +                for path in sorted(declared_output_paths)[:3]
 +            )
 +            if len(declared_output_paths) > 3:
 +                declared_preview += ", ..."
 +            suggestion_suffix = (
 +                f" Declared sibling outputs currently allowed inside this repair set include: {declared_preview}."
 +                if declared_preview
 +                else ""
 +            )
 +            return HookResult(
 +                decision=HookDecision.DENY,
 +                message=(
 +                    "[Blocked - active repair scope: verification already identified "
 +                    f"`{repair.artifact_path}` as the current repair target. "
 +                    "Stay on the concrete repair files until that repair passes.] "
 +                    "Suggestion: inspect or edit only "
 +                    f"{allowed_preview} and do not reopen unrelated reference materials."
 +                    f"{suggestion_suffix}"
 +                ),
 +                terminal_state="blocked",
 +            )
++
 +        if not repair.allowed_roots:
 +            return HookResult()
 +        if all(path_within_allowed_roots(path, repair.allowed_roots) for path in observed_paths):
 +            return HookResult()
++
 +        roots_preview = ", ".join(f"`{root}`" for root in repair.allowed_roots[:2])
 +        if len(repair.allowed_roots) > 2:
 +            roots_preview += ", ..."
 +        return HookResult(
 +            decision=HookDecision.DENY,
 +            message=(
 +                "[Blocked - active repair scope: verification already identified "
 +                f"`{repair.artifact_path}` as the current repair target. "
 +                "Stay inside the current artifact set until that repair passes.] "
 +                "Suggestion: inspect or edit files under "
 +                f"{roots_preview} and do not reopen unrelated reference materials."
 +            ),
 +            terminal_state="blocked",
 +        )
++
 +    def _active_repair_context(self):
 +        dod_path = getattr(self.session, "active_dod_path", None)
 +        if not dod_path:
 +            return None
 +        path = Path(str(dod_path))
 +        if not path.exists():
 +            return None
 +        dod = self.dod_store.load(path)
 +        if dod.status == "done":
 +            return None
 +        return extract_active_repair_context(getattr(self.session, "messages", []))
++
++
 +class ActiveRepairMutationScopeHook(BaseToolHook):
 +    """Keep repair-phase mutations pinned to the concrete repair targets."""
++
 +    def __init__(
 +        self,
 +        *,
 +        dod_store: DefinitionOfDoneStore,
 +        project_root: Path,
 +        session: Any,
 +    ) -> None:
 +        self.dod_store = dod_store
 +        self.project_root = project_root
 +        self.session = session
++
 +    async def pre_tool_use(self, context: HookContext) -> HookResult:
 +        if context.tool_call.name not in _MUTATION_TOOLS:
 +            return HookResult()
 +        if context.source == "verification":
 +            return HookResult()
++
 +        repair = self._active_repair_context()
 +        if repair is None or not repair.allowed_paths:
 +            return HookResult()
 +        allowed_paths = {normalize_repair_path(path) for path in repair.allowed_paths}
++
 +        mutation_paths = _extract_mutation_paths(context.tool_call)
 +        if not mutation_paths:
 +            if context.tool_call.name == "bash" and _is_mutating_bash(
 +                str(context.tool_call.arguments.get("command", "")).strip()
 +            ):
 +                return HookResult(
 +                    decision=HookDecision.DENY,
 +                    message=(
 +                        "[Blocked - active repair mutation scope: the current repair already "
 +                        f"identifies `{repair.artifact_path}` as the concrete target.] "
 +                        "Suggestion: use write/edit/patch directly on one of the active repair "
 +                        "files instead of a broad shell mutation."
 +                    ),
 +                    terminal_state="blocked",
 +                )
 +            return HookResult()
 +        normalized_mutation_paths = [
 +            normalize_repair_path(path) for path in mutation_paths if str(path).strip()
 +        ]
 +        allowed_declared_outputs = _repair_declared_output_paths(
 +            repair,
 +            project_root=self.project_root,
 +        )
++
 +        if normalized_mutation_paths and all(
 +            path in allowed_paths for path in normalized_mutation_paths
 +        ):
 +            return HookResult()
 +        if normalized_mutation_paths and all(
 +            path in allowed_paths or path in allowed_declared_outputs
 +            for path in normalized_mutation_paths
 +        ):
 +            return HookResult()
++
 +        allowed_preview = ", ".join(f"`{path}`" for path in repair.allowed_paths[:3])
 +        if len(repair.allowed_paths) > 3:
 +            allowed_preview += ", ..."
 +        declared_preview = ", ".join(
 +            f"`{Path(path).name or path}`"
 +            for path in sorted(allowed_declared_outputs)[:3]
 +        )
 +        if len(allowed_declared_outputs) > 3:
 +            declared_preview += ", ..."
 +        suggestion_suffix = (
 +            f" Declared sibling outputs currently allowed inside this repair set include: {declared_preview}."
 +            if declared_preview
 +            else ""
 +        )
 +        return HookResult(
 +            decision=HookDecision.DENY,
 +            message=(
 +                "[Blocked - active repair mutation scope: verification already identified "
 +                f"`{repair.artifact_path}` as the current repair target.] Suggestion: keep "
 +                f"mutations on the active repair files only: {allowed_preview}."
 +                f"{suggestion_suffix}"
 +            ),
 +            terminal_state="blocked",
 +        )
++
 +    def _active_repair_context(self):
 +        dod_path = getattr(self.session, "active_dod_path", None)
 +        if not dod_path:
 +            return None
 +        path = Path(str(dod_path))
 +        if not path.exists():
 +            return None
 +        dod = self.dod_store.load(path)
 +        if dod.status == "done":
 +            return None
 +        return extract_active_repair_context(getattr(self.session, "messages", []))
++
 +class LateReferenceDriftHook(BaseToolHook):
 +    """Block reopening old reference paths once planned artifacts are well underway."""
++
 +    _MIN_COMPLETED_FILES = 3
++
 +    def __init__(self, *, dod_store: DefinitionOfDoneStore, project_root: Path, session: Any) -> None:
 +        self.dod_store = dod_store
 +        self.project_root = project_root
 +        self.session = session
++
 +    async def pre_tool_use(self, context: HookContext) -> HookResult:
 +        if context.tool_call.name not in _OBSERVATION_TOOLS:
 +            return HookResult()
++
 +        completed_scope = self._completed_artifact_scope()
 +        if completed_scope is not None:
 +            observed_paths = _extract_observation_paths(context.tool_call)
 +            if not observed_paths:
 +                return HookResult()
 +            if all(path_within_allowed_roots(path, completed_scope) for path in observed_paths):
 +                return HookResult()
++
 +            roots_preview = ", ".join(f"`{root}`" for root in completed_scope[:2])
 +            if len(completed_scope) > 2:
 +                roots_preview += ", ..."
 +            return HookResult(
 +                decision=HookDecision.DENY,
 +                message=(
 +                    "[Blocked - completed artifact set scope: all explicitly planned artifacts "
 +                    "already exist.] Suggestion: stay within the current output roots under "
 +                    f"{roots_preview} and use those files as the source of truth instead of "
 +                    "reopening earlier reference materials."
 +                ),
 +                terminal_state="blocked",
 +            )
++
 +        late_stage = self._late_stage_missing_artifact()
 +        if late_stage is None:
 +            return HookResult()
 +        missing_artifact, planned_roots = late_stage
 +        observed_paths = _extract_observation_paths(context.tool_call)
 +        if not observed_paths:
 +            return HookResult()
 +        if all(path_within_allowed_roots(path, planned_roots) for path in observed_paths):
 +            return HookResult()
++
 +        roots_preview = ", ".join(f"`{root}`" for root in planned_roots[:2])
 +        if len(planned_roots) > 2:
 +            roots_preview += ", ..."
 +        return HookResult(
 +            decision=HookDecision.DENY,
 +            message=(
 +                "[Blocked - late reference drift: several planned artifacts already exist and "
 +                f"`{missing_artifact}` is still missing.] Suggestion: finish the next missing "
 +                f"artifact inside {roots_preview} before reopening earlier reference materials."
 +            ),
 +            terminal_state="blocked",
 +        )
++
 +    def _late_stage_missing_artifact(self) -> tuple[str, tuple[str, ...]] | None:
 +        dod_path = getattr(self.session, "active_dod_path", None)
 +        if not dod_path:
 +            return None
 +        path = Path(str(dod_path))
 +        if not path.exists():
 +            return None
 +        dod = self.dod_store.load(path)
 +        if dod.status == "done":
 +            return None
++
 +        planned_targets = collect_planned_artifact_targets(
 +            dod,
 +            project_root=self.project_root,
 +        )
 +        if not planned_targets:
 +            return None
++
 +        missing_label = ""
 +        completed_files = 0
 +        planned_roots: list[str] = []
 +        seen_roots: set[str] = set()
 +        for target, expect_directory in planned_targets:
 +            satisfied = planned_artifact_target_satisfied(
 +                dod,
 +                target=target,
 +                expect_directory=expect_directory,
 +                project_root=self.project_root,
 +            )
 +            if not expect_directory:
 +                if satisfied:
 +                    completed_files += 1
 +                elif not missing_label:
 +                    missing_label = str(target)
 +                root = str(target.parent)
 +            else:
 +                if not satisfied and not missing_label:
 +                    missing_label = str(target)
 +                root = str(target)
 +            if root not in seen_roots:
 +                planned_roots.append(root)
 +                seen_roots.add(root)
++
 +        if not missing_label:
 +            return None
 +        if completed_files < self._MIN_COMPLETED_FILES:
 +            return None
 +        return missing_label, tuple(planned_roots)
++
 +    def _completed_artifact_scope(self) -> tuple[str, ...] | None:
 +        dod_path = getattr(self.session, "active_dod_path", None)
 +        if not dod_path:
 +            return None
 +        path = Path(str(dod_path))
 +        if not path.exists():
 +            return None
 +        dod = self.dod_store.load(path)
 +        if dod.status in {"done", "fixing"}:
 +            return None
++
 +        planned_targets = collect_planned_artifact_targets(
 +            dod,
 +            project_root=self.project_root,
 +        )
 +        if not planned_targets:
 +            return None
 +        if not all_planned_artifacts_exist(dod, project_root=self.project_root):
 +            return None
++
 +        planned_roots: list[str] = []
 +        seen_roots: set[str] = set()
 +        for target, expect_directory in planned_targets:
 +            root = str(target if expect_directory else target.parent)
 +            if root in seen_roots:
 +                continue
 +            seen_roots.add(root)
 +            planned_roots.append(root)
 +        return tuple(planned_roots)
++
  class HookManager:
      """Runs tool hooks across Loader's three lifecycle events."""
      registry: ToolRegistry,
      rollback_plan: RollbackPlan | None,
      workspace_root: Path,
 +    session: Any,
  ) -> HookManager:
      """Build Loader's default tool hook stack for one runtime turn."""
              FilePathAliasHook(),
              SearchPathAliasHook(),
              RelativePathContextHook(action_tracker, workspace_root),
 +            ActiveRepairScopeHook(
 +                dod_store=DefinitionOfDoneStore(workspace_root),
 +                project_root=workspace_root,
 +                session=session,
 +            ),
 +            ActiveRepairMutationScopeHook(
 +                dod_store=DefinitionOfDoneStore(workspace_root),
 +                project_root=workspace_root,
 +                session=session,
 +            ),
 +            LateReferenceDriftHook(
 +                dod_store=DefinitionOfDoneStore(workspace_root),
 +                project_root=workspace_root,
 +                session=session,
 +            ),
              DuplicateActionHook(action_tracker),
              ActionValidationHook(validator),
              RollbackTrackingHook(registry, rollback_plan),

src/loader/runtime/repair.pymodified

  from __future__ import annotations
 +import re
  from dataclasses import dataclass, field
 +from pathlib import Path
  from ..llm.base import ToolCall
  from .context import RuntimeContext
 +from .dod import (
 +    DefinitionOfDone,
 +    collect_planned_artifact_targets,
 +    infer_next_declared_html_output_file,
 +    planned_artifact_target_satisfied,
 +)
  from .parsing import parse_tool_calls
 +from .workflow import effective_pending_todo_items, reconcile_aggregate_completion_steps
++
 +_SPECIAL_DOD_ITEMS = {
 +    "Complete the requested work",
 +    "Collect verification evidence",
 +}
 +_LATE_STAGE_EMPTY_RETRY_EXTRA = 2
 +_WORKING_NOTE_TOOL_NAMES = (
 +    "notepad_write_working",
 +    "notepad_append",
 +    "notepad_write_priority",
 +    "notepad_write_manual",
 +)
 +_MUTATION_TODO_HINTS = (
 +    "create",
 +    "creating",
 +    "update",
 +    "updating",
 +    "edit",
 +    "editing",
 +    "write",
 +    "writing",
 +    "fix",
 +    "fixing",
 +    "modify",
 +    "modifying",
 +    "change",
 +    "changing",
 +    "patch",
 +    "patching",
 +    "replace",
 +    "replacing",
 +    "correct",
 +    "correcting",
 +    "rewrite",
 +    "rewriting",
 +)
 +_CONSISTENCY_REVIEW_HINTS = (
 +    "consistent",
 +    "consistently",
 +    "formatted",
 +    "link",
 +    "linked",
 +    "navigation",
 +    "work properly",
 +    "all files",
 +    "every file",
 +)
  @dataclass(slots=True)
          original_task: str | None,
          empty_retry_count: int,
          max_empty_retries: int,
 +        dod: DefinitionOfDone | None = None,
      ) -> EmptyResponseDecision:
          """Return the next action when the assistant responds with empty content."""
 -        _ = task, original_task, max_empty_retries
 -        if empty_retry_count == 1:
 +        _ = task, original_task
 +        effective_max_empty_retries = self._effective_max_empty_retries(
 +            dod,
 +            base_max_empty_retries=max_empty_retries,
 +        )
 +        if empty_retry_count <= effective_max_empty_retries:
              return EmptyResponseDecision(
                  should_continue=True,
                  reason_code="empty_response_retry",
 -                reason_summary="retried after the assistant returned an empty response",
 -                retry_message=(
 -                    "[EMPTY ASSISTANT RESPONSE]\n"
 -                    "Your last response was empty. Respond directly to the task "
 -                    "or call tools if needed. Do not return an empty response."
 +                reason_summary=(
 +                    "retried after the assistant returned an empty response"
 +                ),
 +                retry_message=self._build_empty_response_retry_message(
 +                    dod,
 +                    retry_number=empty_retry_count,
 +                    max_empty_retries=effective_max_empty_retries,
                  ),
+             )
          return EmptyResponseDecision(
              should_continue=False,
              reason_code="empty_response_retry_exhausted",
 -            reason_summary="stopped after the assistant returned empty responses twice",
 +            reason_summary="stopped after the assistant returned empty responses repeatedly",
              final_response=(
 -                "I didn't get a usable response from the model after retrying once. "
 -                "Please try again or switch to a different backend/model."
 +                "I didn't get a usable response from the model after "
 +                f"retrying {effective_max_empty_retries} times. Please try again or "
 +                "switch to a different backend/model."
              ),
              failure="assistant returned empty output repeatedly",
+         )
              allowed_tool_names=allowed_tool_names,
+         )
          return parsed.tool_calls
++
 +    def _build_empty_response_retry_message(
 +        self,
 +        dod: DefinitionOfDone | None,
 +        *,
 +        retry_number: int,
 +        max_empty_retries: int,
 +    ) -> str:
 +        progress_lines: list[str] = []
 +        if dod is not None:
 +            reconcile_aggregate_completion_steps(
 +                dod,
 +                project_root=self.context.project_root,
 +            )
 +            latest_working_note = self._latest_working_note()
 +            if latest_working_note:
 +                progress_lines.append(
 +                    "Latest working note: " + latest_working_note
 +                )
++
 +            planned_lines = self._planned_artifact_progress_lines(dod)
 +            progress_lines.extend(planned_lines)
 +            progress_lines.extend(
 +                self._next_step_resume_lines(
 +                    dod,
 +                    retry_number=retry_number,
 +                )
 +            )
++
 +            touched = [
 +                f"`{Path(path).name or path}`"
 +                for path in dod.touched_files[-3:]
 +                if str(path).strip()
 +            ]
 +            if touched:
 +                progress_lines.append(
 +                    "Confirmed touched files: " + ", ".join(touched)
 +                )
++
 +            completed = [
 +                item
 +                for item in dod.completed_items
 +                if item not in _SPECIAL_DOD_ITEMS
 +            ]
 +            if completed:
 +                progress_lines.append(
 +                    "Confirmed completed work: " + "; ".join(completed[-2:])
 +                )
++
 +            next_pending = next(
 +                (
 +                    item
 +                    for item in dod.pending_items
 +                    if item not in _SPECIAL_DOD_ITEMS
 +                ),
 +                None,
 +            )
 +            if next_pending:
 +                progress_lines.append(f"Next pending item: {next_pending}")
 +            todo_refresh = self._todo_refresh_retry_line(dod)
 +            if todo_refresh:
 +                progress_lines.append(todo_refresh)
++
 +        if not progress_lines:
 +            return (
 +                "[EMPTY ASSISTANT RESPONSE]\n"
 +                f"Your last response was empty (retry {retry_number}/{max_empty_retries}). "
 +                "Respond directly to the task "
 +                "or call tools if needed. Do not return an empty response."
 +            )
++
 +        return "\n".join(
 +            [
 +                "[EMPTY ASSISTANT RESPONSE]",
 +                (
 +                    "Your last response was empty "
 +                    f"(retry {retry_number}/{max_empty_retries}). Continue from the "
 +                    "confirmed progress below instead of restarting."
 +                ),
 +                *[f"- {line}" for line in progress_lines],
 +                "",
 +                "Respond directly to the task or call tools if needed. Do not return an empty response.",
 +            ]
 +        )
++
 +    def _todo_refresh_retry_line(self, dod: DefinitionOfDone) -> str | None:
 +        non_special_pending = [
 +            item for item in dod.pending_items if item not in _SPECIAL_DOD_ITEMS
 +        ]
 +        non_special_completed = [
 +            item for item in dod.completed_items if item not in _SPECIAL_DOD_ITEMS
 +        ]
 +        if len(dod.touched_files) < 2 and (len(non_special_pending) + len(non_special_completed)) < 3:
 +            return None
 +        return (
 +            "If the tracked steps are stale, refresh `TodoWrite` alongside the next "
 +            "concrete mutation instead of spending a full turn on bookkeeping alone."
 +        )
++
 +    def _effective_max_empty_retries(
 +        self,
 +        dod: DefinitionOfDone | None,
 +        *,
 +        base_max_empty_retries: int,
 +    ) -> int:
 +        if dod is None:
 +            return base_max_empty_retries
 +        completed_artifacts, missing_artifacts = self._planned_artifact_counts(dod)
 +        if completed_artifacts < 3 or missing_artifacts == 0:
 +            return base_max_empty_retries
 +        return base_max_empty_retries + _LATE_STAGE_EMPTY_RETRY_EXTRA
++
 +    def _planned_artifact_counts(self, dod: DefinitionOfDone) -> tuple[int, int]:
 +        completed = 0
 +        missing = 0
 +        for target, expect_directory in collect_planned_artifact_targets(
 +            dod,
 +            project_root=self.context.project_root,
 +            max_paths=12,
 +        ):
 +            if planned_artifact_target_satisfied(
 +                dod,
 +                target=target,
 +                expect_directory=expect_directory,
 +                project_root=self.context.project_root,
 +            ):
 +                completed += 1
 +            else:
 +                missing += 1
 +        return completed, missing
++
 +    def _planned_artifact_progress_lines(self, dod: DefinitionOfDone) -> list[str]:
 +        targets = collect_planned_artifact_targets(
 +            dod,
 +            project_root=self.context.project_root,
 +            max_paths=12,
 +        )
 +        if not targets:
 +            return []
++
 +        missing_labels = [
 +            self._format_artifact_label(target, expect_directory=expect_directory)
 +            for target, expect_directory in targets
 +            if not planned_artifact_target_satisfied(
 +                dod,
 +                target=target,
 +                expect_directory=expect_directory,
 +                project_root=self.context.project_root,
 +            )
 +        ]
 +        if not missing_labels:
 +            return []
++
 +        lines = [f"Next missing planned artifact: {missing_labels[0]}"]
 +        first_missing_target, first_missing_is_directory = next(
 +            (
 +                (target, expect_directory)
 +                for target, expect_directory in targets
 +                if not planned_artifact_target_satisfied(
 +                    dod,
 +                    target=target,
 +                    expect_directory=expect_directory,
 +                    project_root=self.context.project_root,
 +                )
 +            ),
 +            (None, False),
 +        )
 +        if first_missing_target is not None and first_missing_is_directory:
 +            next_output_file = infer_next_declared_html_output_file(
 +                target=first_missing_target,
 +                project_root=self.context.project_root,
 +            )
 +            if next_output_file is not None:
 +                lines.append(
 +                    "Next declared output under "
 +                    f"{self._format_artifact_label(first_missing_target, expect_directory=True)}: "
 +                    f"{self._format_artifact_label(next_output_file, expect_directory=False)}"
 +                )
 +        if len(missing_labels) > 1:
 +            preview = ", ".join(missing_labels[:3])
 +            if len(missing_labels) > 3:
 +                preview += ", ..."
 +            lines.append("Remaining planned artifacts: " + preview)
 +        return lines
++
 +    def _next_step_resume_lines(
 +        self,
 +        dod: DefinitionOfDone,
 +        *,
 +        retry_number: int,
 +    ) -> list[str]:
 +        completed_artifacts, _ = self._planned_artifact_counts(dod)
 +        next_pending = next(
 +            (
 +                item
 +                for item in effective_pending_todo_items(
 +                    dod,
 +                    project_root=self.context.project_root,
 +                )
 +                if item not in _SPECIAL_DOD_ITEMS
 +            ),
 +            None,
 +        )
 +        if (
 +            completed_artifacts == 0
 +            and next_pending
 +            and not _todo_is_mutation_step(next_pending)
 +            and not _todo_is_consistency_review_step(next_pending)
 +        ):
 +            lines = [f"Resume with this exact next step: advance `{next_pending}`."]
 +            lines.append(
 +                "Make the next response one concrete evidence-gathering tool call that "
 +                "directly advances that step."
 +            )
 +            lines.append(
 +                "Do not jump ahead to later artifact creation, verification, or a "
 +                "completion summary until that discovery step is satisfied."
 +            )
 +            if retry_number >= 2:
 +                lines.append(
 +                    "Do not restart from scratch or return another working note; emit the "
 +                    "next evidence-gathering tool call now."
 +                )
 +            else:
 +                lines.append(
 +                    "Do not restart from scratch unless one specific missing fact blocks "
 +                    "that discovery step."
 +                )
 +            return lines
++
 +        for target, expect_directory in collect_planned_artifact_targets(
 +            dod,
 +            project_root=self.context.project_root,
 +            max_paths=12,
 +        ):
 +            if planned_artifact_target_satisfied(
 +                dod,
 +                target=target,
 +                expect_directory=expect_directory,
 +                project_root=self.context.project_root,
 +            ):
 +                continue
 +            label = self._format_artifact_label(
 +                target,
 +                expect_directory=expect_directory,
 +            )
 +            if expect_directory:
 +                next_output_file = infer_next_declared_html_output_file(
 +                    target=target,
 +                    project_root=self.context.project_root,
 +                )
 +                if next_output_file is not None:
 +                    next_output_label = self._format_artifact_label(
 +                        next_output_file,
 +                        expect_directory=False,
 +                    )
 +                    if next_pending and _todo_is_mutation_step(next_pending):
 +                        lines = [
 +                            "Resume with this exact next step: continue "
 +                            f"`{next_pending}` by creating {next_output_label}."
 +                        ]
 +                    else:
 +                        lines = [
 +                            "Resume with this exact next step: create "
 +                            f"{next_output_label}."
 +                        ]
 +                    lines.append(
 +                        f"It is the next missing declared output under {label}."
 +                    )
 +                    lines.append(
 +                        f"Prefer one `write` call for `{next_output_file}` before more research."
 +                    )
 +                    if not next_output_file.parent.exists():
 +                        lines.append(
 +                            "The `write` tool can create that file's parent directories "
 +                            "automatically, so do the write in one step instead of stopping "
 +                            "for a separate mkdir."
 +                        )
 +                    if retry_number >= 2:
 +                        lines.append(
 +                            "Do not restart discovery; emit the next mutation tool call now."
 +                        )
 +                    else:
 +                        lines.append(
 +                            "Do not restart discovery unless one specific missing fact blocks this step."
 +                        )
 +                    return lines
 +            if expect_directory and target.is_dir():
 +                if next_pending and _todo_is_mutation_step(next_pending):
 +                    lines = [
 +                        "Resume with this exact next step: continue "
 +                        f"`{next_pending}` by creating the next output file under {label}."
 +                    ]
 +                else:
 +                    lines = [
 +                        "Resume with this exact next step: create the next output file "
 +                        f"under {label}."
 +                    ]
 +                lines.append(
 +                    f"Prefer one concrete `write` call for a file inside `{target}` before more research."
 +                )
 +            else:
 +                lines = [f"Resume with this exact next step: create {label}."]
 +            if expect_directory and not target.is_dir():
 +                lines.append(
 +                    f"Prefer one concrete directory-creation step for `{target}` before more research."
 +                )
 +            elif not expect_directory:
 +                lines.append(
 +                    f"Prefer one `write` call for `{target}` before any more reference reads."
 +                )
 +                if not target.parent.exists():
 +                    lines.append(
 +                        "The `write` tool can create that file's parent directories "
 +                        "automatically, so do the write in one step instead of stopping "
 +                        "for a separate mkdir."
 +                    )
 +                lines.append(
 +                    "Shape the next response as one concrete `write(file_path=..., "
 +                    "content=...)` tool call for that exact path."
 +                )
 +            if completed_artifacts >= 3:
 +                lines.append(
 +                    "Follow the same one-file-at-a-time mutation pattern that already "
 +                    "created the confirmed planned artifacts."
 +                )
 +            lines.append(
 +                "Your next response should be the concrete mutation tool call itself, "
 +                "not TodoWrite alone, verification, or a completion summary."
 +            )
 +            if retry_number >= 2:
 +                lines.append(
 +                    "Do not restart discovery; emit the next mutation tool call now."
 +                )
 +            else:
 +                lines.append(
 +                    "Do not restart discovery unless one specific missing fact blocks this step."
 +                )
 +            return lines
 +        return []
++
 +    @staticmethod
 +    def _format_artifact_label(path: Path, *, expect_directory: bool) -> str:
 +        label = path.name or str(path)
 +        if expect_directory and not label.endswith("/"):
 +            label += "/"
 +        return f"`{label}`"
++
 +    def _latest_working_note(self) -> str | None:
 +        messages = list(getattr(self.context.session, "messages", []) or [])
 +        for message in reversed(messages):
 +            content = str(getattr(message, "content", "") or "").strip()
 +            if not content:
 +                continue
 +            for tool_name in _WORKING_NOTE_TOOL_NAMES:
 +                prefix = f"Observation [{tool_name}]: Result:"
 +                if prefix not in content:
 +                    continue
 +                note = content.split(prefix, 1)[1].strip()
 +                if not note:
 +                    continue
 +                first_line = next(
 +                    (line.strip() for line in note.splitlines() if line.strip()),
 +                    "",
 +                )
 +                if not first_line:
 +                    continue
 +                first_line = re.sub(r"^-\s*\[[^\]]+\]\s*", "", first_line).strip()
 +                return first_line or None
 +        return None
++
++
 +def _todo_is_mutation_step(label: str) -> bool:
 +    lowered = label.lower()
 +    return any(token in lowered for token in _MUTATION_TODO_HINTS)
++
++
 +def _todo_is_consistency_review_step(label: str) -> bool:
 +    lowered = label.lower()
 +    return any(token in lowered for token in _CONSISTENCY_REVIEW_HINTS)

src/loader/runtime/repair_focus.pyadded

 +"""Shared helpers for extracting and enforcing active repair focus."""
++
 +from __future__ import annotations
++
 +import re
 +from dataclasses import dataclass
 +from os import sep
 +from pathlib import Path
++
 +from ..llm.base import Message
++
++
 +@dataclass(frozen=True)
 +class ActiveRepairContext:
 +    """Concrete repair focus extracted from recent verification feedback."""
++
 +    artifact_path: str
 +    repair_lines: list[str]
 +    allowed_paths: tuple[str, ...]
 +    allowed_roots: tuple[str, ...]
++
++
 +def extract_active_repair_context(
 +    messages: list[Message],
 +) -> ActiveRepairContext | None:
 +    """Return the most recent concrete repair target from session history."""
++
 +    for message in reversed(messages):
 +        content = str(getattr(message, "content", "") or "")
 +        if "Repair focus:" not in content:
 +            continue
++
 +        repair_lines: list[str] = []
 +        artifact_path = ""
 +        absolute_paths: list[str] = []
 +        capture = False
 +        for raw_line in content.splitlines():
 +            line = raw_line.strip()
 +            if not capture:
 +                if line == "Repair focus:":
 +                    capture = True
 +                continue
 +            if not line:
 +                if repair_lines:
 +                    break
 +                continue
 +            if not line.startswith("- "):
 +                if repair_lines:
 +                    break
 +                continue
++
 +            repair_lines.append(line)
 +            if not artifact_path:
 +                match = re.search(r"Immediate next step: edit `([^`]+)`", line)
 +                if match:
 +                    artifact_path = normalize_repair_path(match.group(1))
++
 +            for candidate in re.findall(r"`([^`]+)`", line):
 +                if not candidate.startswith(("/", "~")):
 +                    continue
 +                normalized = normalize_repair_path(candidate)
 +                if normalized not in absolute_paths:
 +                    absolute_paths.append(normalized)
++
 +        if repair_lines:
 +            if artifact_path:
 +                if artifact_path not in absolute_paths:
 +                    absolute_paths.insert(0, artifact_path)
 +            allowed_paths = tuple(
 +                sorted(
 +                    absolute_paths,
 +                    key=lambda item: (not Path(item).exists(), item),
 +                )
 +            )
 +            allowed_roots = _collapse_roots(_path_roots(set(absolute_paths)))
 +            return ActiveRepairContext(
 +                artifact_path=artifact_path,
 +                repair_lines=repair_lines,
 +                allowed_paths=allowed_paths,
 +                allowed_roots=allowed_roots,
 +            )
 +    return None
++
++
 +def path_within_allowed_roots(path: str, allowed_roots: tuple[str, ...]) -> bool:
 +    """Return whether the normalized path stays within the repair artifact set."""
++
 +    normalized = normalize_repair_path(path)
 +    normalized_roots = tuple(
 +        normalize_repair_path(root) for root in allowed_roots if str(root).strip()
 +    )
 +    return any(
 +        normalized == root or normalized.startswith(f"{root}{sep}")
 +        for root in normalized_roots
 +    )
++
++
 +def path_matches_allowed_paths(path: str, allowed_paths: tuple[str, ...]) -> bool:
 +    """Return whether the normalized path matches one concrete repair file."""
++
 +    normalized = normalize_repair_path(path)
 +    normalized_paths = {
 +        normalize_repair_path(candidate) for candidate in allowed_paths if str(candidate).strip()
 +    }
 +    return normalized in normalized_paths
++
++
 +def normalize_repair_path(raw_path: str) -> str:
 +    text = str(raw_path or "").strip()
 +    if not text:
 +        return ""
 +    try:
 +        return str(Path(text).expanduser().resolve(strict=False))
 +    except (OSError, RuntimeError, ValueError):
 +        return str(Path(text).expanduser())
++
++
 +def _path_roots(paths: set[str]) -> set[str]:
 +    roots: set[str] = set()
 +    for raw_path in paths:
 +        path = Path(raw_path)
 +        roots.add(str(path.parent))
 +    return roots
++
++
 +def _collapse_roots(roots: set[str]) -> tuple[str, ...]:
 +    collapsed: list[str] = []
 +    for root in sorted(roots, key=lambda item: (len(item), item)):
 +        if any(root == candidate or root.startswith(f"{candidate}{sep}") for candidate in collapsed):
 +            continue
 +        collapsed.append(root)
 +    return tuple(collapsed)

src/loader/runtime/safeguard_services.pymodified

  from difflib import get_close_matches
  from pathlib import Path
 -from .semantic_rules import html_toc as html_toc_rule
+-
  TEXT_REWRITE_SUFFIXES = frozenset(
+     {
          ".c",
      READ_REPEAT_THRESHOLD = 3
      SEARCH_REPEAT_THRESHOLD = 2
      BASH_OBSERVATION_REPEAT_THRESHOLD = 2
 -    HTML_CHAPTER_EVIDENCE_THRESHOLD = 3
      RECENT_PATH_CONTEXT_LIMIT = 12
      def __init__(self) -> None:
          self._recent_reads: dict[str, tuple[int, int, int]] = {}
          self._recent_searches: dict[str, tuple[int, int, int]] = {}
          self._recent_bash_observations: dict[str, tuple[int, int, int]] = {}
 -        self._recent_html_directory_reads: dict[str, tuple[int, set[str]]] = {}
          self._recent_path_contexts: list[str] = []
 -        self._validated_html_tocs: dict[str, int] = {}
 -        self._verified_html_inventory_dirs: set[str] = set()
      def reset(self) -> None:
          self._file_writes.clear()
          self._recent_reads.clear()
          self._recent_searches.clear()
          self._recent_bash_observations.clear()
 -        self._recent_html_directory_reads.clear()
          self._recent_path_contexts.clear()
 -        self._validated_html_tocs.clear()
 -        self._verified_html_inventory_dirs.clear()
      def _normalize_path(self, path: str) -> str:
          expanded = Path(path).expanduser()
      def recent_path_contexts(self) -> list[str]:
          return list(self._recent_path_contexts)
 -    def note_validated_html_toc(self, index_path: str) -> None:
 -        """Record that one index currently satisfies the semantic chapter-link check."""
+-
 -        normalized = self._normalize_path(index_path)
 -        if not html_toc_rule.is_html_toc_index_path(normalized):
 -            return
 -        self._validated_html_tocs[normalized] = self._mutation_epoch
+-
 -    def note_verified_html_inventory(self, index_path: str) -> None:
 -        """Record that one sibling chapter inventory is already known exactly."""
+-
 -        normalized = self._normalize_path(index_path)
 -        path = Path(normalized)
 -        chapters_dir = path if html_toc_rule.is_html_toc_chapters_dir(path) else path.parent / "chapters"
 -        self._verified_html_inventory_dirs.add(self._normalize_path(str(chapters_dir)))
+-
      def check_tool_call(self, tool_name: str, arguments: dict) -> tuple[bool, str]:
          if tool_name == "write":
              file_path = arguments.get("file_path", "")
                      return True, f"Same patch already applied to: {file_path}"
          elif tool_name == "read":
 -            inventory_duplicate, inventory_reason = self._check_verified_html_inventory_observation(
 -                tool_name,
 -                arguments,
 -            )
 -            if inventory_duplicate:
 -                return True, inventory_reason
 -            validated_duplicate, validated_reason = self._check_validated_html_toc_observation(
 -                tool_name,
 -                arguments,
 -            )
 -            if validated_duplicate:
 -                return True, validated_reason
              read_key = self._make_read_key(arguments)
              if read_key:
 -                sufficiency_duplicate, sufficiency_reason = (
 -                    self._check_html_observation_sufficiency(
 -                        tool_name,
 -                        arguments,
 -                    )
 -                )
 -                if sufficiency_duplicate:
 -                    return True, sufficiency_reason
                  duplicate, reason = self._check_recent_observation(
                      self._recent_reads,
                      read_key,
                      return True, reason
          elif tool_name in {"glob", "grep"}:
 -            inventory_duplicate, inventory_reason = self._check_verified_html_inventory_observation(
 -                tool_name,
 -                arguments,
 -            )
 -            if inventory_duplicate:
 -                return True, inventory_reason
 -            validated_duplicate, validated_reason = self._check_validated_html_toc_observation(
 -                tool_name,
 -                arguments,
 -            )
 -            if validated_duplicate:
 -                return True, validated_reason
              observation_key = self._make_search_key(tool_name, arguments)
              if observation_key:
 -                sufficiency_duplicate, sufficiency_reason = (
 -                    self._check_html_observation_sufficiency(
 -                        tool_name,
 -                        arguments,
 -                    )
 -                )
 -                if sufficiency_duplicate:
 -                    return True, sufficiency_reason
                  duplicate, reason = self._check_recent_observation(
                      self._recent_searches,
                      observation_key,
          elif tool_name == "bash":
              command = str(arguments.get("command", "")).strip()
              if self._is_observational_bash(command):
 -                inventory_duplicate, inventory_reason = self._check_verified_html_inventory_observation(
 -                    tool_name,
 -                    arguments,
 -                )
 -                if inventory_duplicate:
 -                    return True, inventory_reason
 -                validated_duplicate, validated_reason = self._check_validated_html_toc_observation(
 -                    tool_name,
 -                    arguments,
 -                )
 -                if validated_duplicate:
 -                    return True, validated_reason
                  duplicate, reason = self._check_recent_observation(
                      self._recent_bash_observations,
                      self._normalize_command(command),
              if file_path:
                  self.record_file_create(file_path, content)
                  self._record_path_context(file_path)
 -                self._clear_verified_html_inventory_for_path(file_path)
                  self._note_mutation()
          elif tool_name == "edit":
              if file_path:
                  self.record_edit(file_path, old_string, new_string)
                  self._record_path_context(file_path)
 -                self._clear_verified_html_inventory_for_path(file_path)
                  self._note_mutation()
          elif tool_name == "patch":
                  elif isinstance(raw_patch, str) and raw_patch.strip():
                      self.record_edit(file_path, raw_patch, "raw_patch")
                  self._record_path_context(file_path)
 -                self._clear_verified_html_inventory_for_path(file_path)
                  self._note_mutation()
          elif tool_name == "read":
              file_path = str(arguments.get("file_path", "")).strip()
              if file_path:
                  self._record_path_context(file_path)
 -            self._record_html_directory_read(arguments)
          elif tool_name in {"glob", "grep"}:
              observation_key = self._make_search_key(tool_name, arguments)
              if command:
                  self.record_command(command)
                  if self._is_mutating_bash(command):
 -                    target = extract_shell_text_rewrite_target(command)
 -                    if target:
 -                        self._clear_verified_html_inventory_for_path(target)
                      self._note_mutation()
                  elif self._is_observational_bash(command):
                      self._record_observation(
          if len(self._recent_path_contexts) > self.RECENT_PATH_CONTEXT_LIMIT:
              del self._recent_path_contexts[self.RECENT_PATH_CONTEXT_LIMIT :]
 -    def _record_html_directory_read(self, arguments: dict) -> None:
 -        file_path = str(arguments.get("file_path", "")).strip()
 -        if not file_path:
 -            return
 -        normalized_path = self._normalize_path(file_path)
 -        path = Path(normalized_path)
 -        if not html_toc_rule.is_html_toc_chapter_file(path):
 -            return
+-
 -        directory = str(path.parent)
 -        last_seen = self._recent_html_directory_reads.get(directory)
 -        if last_seen is None or last_seen[0] != self._mutation_epoch:
 -            self._recent_html_directory_reads[directory] = (
 -                self._mutation_epoch,
 -                {path.name},
 -            )
 -            return
+-
 -        _, seen_files = last_seen
 -        updated = set(seen_files)
 -        updated.add(path.name)
 -        self._recent_html_directory_reads[directory] = (
 -            self._mutation_epoch,
 -            updated,
 -        )
+-
 -    def _check_html_observation_sufficiency(
 -        self,
 -        tool_name: str,
 -        arguments: dict,
 -    ) -> tuple[bool, str]:
 -        if tool_name == "read":
 -            file_path = str(arguments.get("file_path", "")).strip()
 -            if not file_path:
 -                return False, ""
 -            normalized_path = self._normalize_path(file_path)
 -            path = Path(normalized_path)
 -            if not html_toc_rule.is_html_toc_index_path(path):
 -                return False, ""
 -            chapters_dir = str(path.parent / "chapters")
 -            chapter_count = self._chapter_evidence_count(chapters_dir)
 -            if chapter_count < self.HTML_CHAPTER_EVIDENCE_THRESHOLD:
 -                return False, ""
 -            read_key = self._make_read_key(arguments)
 -            if read_key is None:
 -                return False, ""
 -            last_seen = self._recent_reads.get(read_key)
 -            if last_seen is None:
 -                return False, ""
 -            _, _, repeat_count = last_seen
 -            if repeat_count < 2:
 -                return False, ""
 -            return (
 -                True,
 -                "Already confirmed multiple linked chapter files in "
 -                f"{html_toc_rule.describe_html_toc_chapters_dir(path)}; reuse that file/title "
 -                f"evidence and update {html_toc_rule.describe_html_toc_target(path)} instead of "
 -                "rereading it",
 -            )
+-
 -        if tool_name in {"glob", "grep"}:
 -            search_path = str(arguments.get("path", "")).strip()
 -            if not search_path:
 -                return False, ""
 -            normalized_path = self._normalize_path(search_path)
 -            path = Path(normalized_path)
 -            if not html_toc_rule.is_html_toc_chapters_dir(path):
 -                return False, ""
 -            chapter_count = self._chapter_evidence_count(str(path))
 -            if chapter_count < self.HTML_CHAPTER_EVIDENCE_THRESHOLD:
 -                return False, ""
 -            observation_key = self._make_search_key(tool_name, arguments)
 -            if observation_key is None or observation_key not in self._recent_searches:
 -                return False, ""
 -            return (
 -                True,
 -                "Already confirmed multiple linked chapter files in "
 -                f"{html_toc_rule.describe_html_toc_chapters_dir(path)}; reuse that filename/title "
 -                f"evidence and update {html_toc_rule.describe_html_toc_target(path)} instead of "
 -                "rerunning the directory search",
 -            )
+-
 -        return False, ""
+-
 -    def _chapter_evidence_count(self, directory: str) -> int:
 -        last_seen = self._recent_html_directory_reads.get(directory)
 -        if last_seen is None:
 -            return 0
 -        last_epoch, seen_files = last_seen
 -        if last_epoch != self._mutation_epoch:
 -            return 0
 -        return len(seen_files)
+-
 -    def _check_validated_html_toc_observation(
 -        self,
 -        tool_name: str,
 -        arguments: dict,
 -    ) -> tuple[bool, str]:
 -        related_paths = self._validated_html_related_paths(tool_name, arguments)
 -        if not related_paths:
 -            return False, ""
+-
 -        for path in related_paths:
 -            if self._matches_validated_html_toc(path):
 -                return (
 -                    True,
 -                    html_toc_rule.build_validated_html_toc_observation_reason(path),
 -                )
 -        return False, ""
+-
 -    def _check_verified_html_inventory_observation(
 -        self,
 -        tool_name: str,
 -        arguments: dict,
 -    ) -> tuple[bool, str]:
 -        related_paths = self._verified_inventory_related_paths(tool_name, arguments)
 -        if not related_paths:
 -            return False, ""
+-
 -        for path in related_paths:
 -            if self._matches_verified_html_inventory(path):
 -                return (
 -                    True,
 -                    html_toc_rule.build_verified_html_inventory_observation_reason(path),
 -                )
 -        return False, ""
+-
 -    def _validated_html_related_paths(
 -        self,
 -        tool_name: str,
 -        arguments: dict,
 -    ) -> list[str]:
 -        if tool_name == "read":
 -            file_path = str(arguments.get("file_path", "")).strip()
 -            return [self._normalize_path(file_path)] if file_path else []
+-
 -        if tool_name in {"glob", "grep"}:
 -            search_path = str(arguments.get("path", "")).strip()
 -            return [self._normalize_path(search_path)] if search_path else []
+-
 -        if tool_name == "bash":
 -            command = str(arguments.get("command", "")).strip()
 -            if not command:
 -                return []
 -            return self._extract_observational_bash_paths(command)
+-
 -        return []
+-
 -    def _verified_inventory_related_paths(
 -        self,
 -        tool_name: str,
 -        arguments: dict,
 -    ) -> list[str]:
 -        if tool_name == "read":
 -            file_path = str(arguments.get("file_path", "")).strip()
 -            return [self._normalize_path(file_path)] if file_path else []
+-
 -        if tool_name in {"glob", "grep"}:
 -            search_path = str(arguments.get("path", "")).strip()
 -            return [self._normalize_path(search_path)] if search_path else []
+-
 -        if tool_name == "bash":
 -            command = str(arguments.get("command", "")).strip()
 -            if not command:
 -                return []
 -            return self._extract_observational_bash_paths(command)
+-
 -        return []
+-
 -    def _matches_validated_html_toc(self, path: str) -> bool:
 -        normalized = self._normalize_path(path)
 -        candidate = Path(normalized)
 -        for index_path, epoch in self._validated_html_tocs.items():
 -            if epoch != self._mutation_epoch:
 -                continue
 -            index = Path(index_path)
 -            chapters = Path(self._normalize_path(str(index.parent / "chapters")))
 -            if candidate == index or candidate == chapters:
 -                return True
 -            if candidate.parent == chapters:
 -                return True
 -        return False
+-
 -    def _matches_verified_html_inventory(self, path: str) -> bool:
 -        normalized = self._normalize_path(path)
 -        candidate = Path(normalized)
 -        for directory in self._verified_html_inventory_dirs:
 -            chapters = Path(directory)
 -            if candidate == chapters or candidate.parent == chapters:
 -                return True
 -        return False
+-
 -    def _clear_verified_html_inventory_for_path(self, path_value: str) -> None:
 -        normalized = self._normalize_path(path_value)
 -        candidate = Path(normalized)
 -        stale: set[str] = set()
 -        for directory in self._verified_html_inventory_dirs:
 -            chapters = Path(directory)
 -            if candidate == chapters or candidate.parent == chapters:
 -                stale.add(directory)
 -        self._verified_html_inventory_dirs.difference_update(stale)
+-
 -    def _extract_observational_bash_paths(self, command: str) -> list[str]:
 -        norm_cmd = self._normalize_command(command)
 -        try:
 -            argv = shlex.split(norm_cmd)
 -        except ValueError:
 -            return []
 -        if not argv:
 -            return []
+-
 -        paths: list[str] = []
 -        for token in argv[1:]:
 -            candidate = _strip_shell_token(token)
 -            if not candidate or candidate.startswith("-"):
 -                continue
 -            if any(marker in candidate for marker in ("/", "~")) or Path(candidate).suffix == ".html":
 -                paths.append(self._normalize_path(candidate))
 -                continue
 -            if candidate.rstrip("/").endswith("chapters"):
 -                paths.append(self._normalize_path(candidate))
 -        return paths
+-
+-
  @dataclass
  class ValidationResult:
      """Result of pre-action validation."""
          if not path_result.valid:
              return path_result
 +        sibling_result = self._validate_numbered_sibling_conflict(str(file_path))
 +        if not sibling_result.valid:
 +            return sibling_result
++
          if content is None or (isinstance(content, str) and not content.strip()):
              return ValidationResult(
                  valid=True,
                      severity="block",
+                 )
 +        html_declared_target_result = self._validate_html_declared_target_set(
 +            str(file_path),
 +            str(content),
 +        )
 +        if not html_declared_target_result.valid:
 +            return html_declared_target_result
++
          return ValidationResult(valid=True)
      def _validate_edit(self, arguments: dict) -> ValidationResult:
          if not html_index_result.valid:
              return html_index_result
 +        html_declared_target_result = self._validate_html_declared_target_set(
 +            str(file_path),
 +            str(new_string),
 +        )
 +        if not html_declared_target_result.valid:
 +            return html_declared_target_result
++
          return ValidationResult(valid=True)
      def _validate_patch(self, arguments: dict) -> ValidationResult:
          if not path_result.valid:
              return path_result
 +        sibling_result = self._validate_numbered_sibling_conflict(str(file_path))
 +        if not sibling_result.valid:
 +            return sibling_result
++
          has_hunks = isinstance(hunks, list) and bool(hunks)
          has_raw_patch = isinstance(raw_patch, str) and bool(raw_patch.strip())
          if not has_hunks and not has_raw_patch:
          return ValidationResult(valid=True)
 +    def _validate_numbered_sibling_conflict(self, file_path: str) -> ValidationResult:
 +        path = Path(file_path).expanduser()
 +        if path.exists() or not path.suffix or not path.parent.exists():
 +            return ValidationResult(valid=True)
++
 +        prefix_match = re.match(r"^(\d+)[-_]", path.name)
 +        if prefix_match is None:
 +            return ValidationResult(valid=True)
++
 +        prefix = prefix_match.group(1)
 +        siblings = sorted(
 +            candidate
 +            for candidate in path.parent.iterdir()
 +            if (
 +                candidate.is_file()
 +                and candidate.suffix == path.suffix
 +                and candidate.name != path.name
 +                and re.match(rf"^{re.escape(prefix)}[-_]", candidate.name)
 +            )
 +        )
 +        if not siblings:
 +            return ValidationResult(valid=True)
++
 +        preview = ", ".join(candidate.name for candidate in siblings[:3])
 +        if len(siblings) > 3:
 +            preview += ", ..."
 +        return ValidationResult(
 +            valid=False,
 +            reason="New file conflicts with an existing numbered sibling",
 +            suggestion=(
 +                f"Reuse the confirmed numbered file in `{path.parent}` instead of "
 +                f"creating an alternate filename for step {prefix}, for example: {preview}"
 +            ),
 +            severity="error",
 +        )
++
      def _validate_read(self, arguments: dict) -> ValidationResult:
          file_path = arguments.get("file_path", "")
                  severity="error",
+             )
 -        return self._validate_path(file_path)
 +        path_result = self._validate_path(file_path)
 +        if not path_result.valid:
 +            return path_result
++
 +        sibling_result = self._validate_numbered_sibling_conflict(str(file_path))
 +        if not sibling_result.valid:
 +            return ValidationResult(
 +                valid=False,
 +                reason="Read target conflicts with an existing numbered sibling",
 +                suggestion=sibling_result.suggestion,
 +                severity="error",
 +            )
 +        return path_result
      def _validate_search(self, tool_name: str, arguments: dict) -> ValidationResult:
          pattern = arguments.get("pattern", "")
          content: str,
      ) -> ValidationResult:
          normalized = Path(file_path).expanduser()
 -        if not html_toc_rule.is_html_toc_index_path(normalized) or "<a " not in content:
 +        if normalized.suffix.lower() != ".html" or "<a " not in content:
              return ValidationResult(valid=True)
          link_pairs = re.findall(r'<a\s+href="([^"]+)">([^<]+)</a>', content)
          root = normalized.parent
          missing: list[str] = []
 -        mismatched: list[str] = []
 -        for href, label in link_pairs:
 +        for href, _label in link_pairs:
 +            target_text = href.strip()
 +            if not target_text or target_text.startswith(("#", "mailto:", "tel:", "javascript:")):
 +                continue
 +            if "://" in target_text:
 +                continue
              target = (root / href).resolve(strict=False)
              if not target.exists():
                  if href not in missing:
                      missing.append(href)
 -                continue
+-
 -            title = html_toc_rule.read_html_title(target)
 -            if title and label.strip() != title:
 -                if href not in mismatched:
 -                    mismatched.append(href)
          if missing:
 -            suggestions = self._suggest_existing_html_targets(root, missing)
 -            preview_items = [
 -                html_toc_rule.format_html_inventory_entry(root, root / suggestion)
 -                for suggestion in suggestions
 -            ]
 -            if not preview_items:
 -                preview_items = missing
 -            preview = ", ".join(preview_items[:3])
 -            if len(preview_items) > 3:
 +            preview = ", ".join(missing[:3])
 +            if len(missing) > 3:
                  preview += ", ..."
              return ValidationResult(
                  valid=False,
 -                reason="Edited TOC references chapter files that do not exist",
 -                suggestion=(
 -                    f"Use only existing chapter href/title pairs from beside "
 -                    f"{html_toc_rule.describe_html_toc_target(normalized)}, for example: "
 -                    f"{preview}"
 -                ),
 -                severity="error",
 -            )
+-
 -        if mismatched:
 -            exact_entries = [
 -                html_toc_rule.format_html_inventory_entry(root, (root / href).resolve(strict=False))
 -                for href in mismatched
 -                if (root / href).resolve(strict=False).exists()
 -            ]
 -            if not exact_entries:
 -                exact_entries = mismatched
 -            preview = "; ".join(exact_entries[:2])
 -            if len(exact_entries) > 2:
 -                preview += "; ..."
 -            return ValidationResult(
 -                valid=False,
 -                reason="Edited TOC labels do not match the linked chapter titles",
 +                reason="Edited HTML links point to files that do not exist",
                  suggestion=(
 -                    f"Copy the exact href/title pair from the linked HTML file for "
 -                    f"{html_toc_rule.describe_html_toc_target(normalized)}, for example: "
 -                    f"{preview}"
 +                    "Use only existing local targets for href values and avoid "
 +                    f"introducing missing links, for example fix: {preview}"
                  ),
                  severity="error",
+             )
          return ValidationResult(valid=True)
 +    def _validate_html_declared_target_set(
 +        self,
 +        file_path: str,
 +        content: str,
 +    ) -> ValidationResult:
 +        normalized = Path(file_path).expanduser()
 +        if normalized.suffix.lower() != ".html" or normalized.name.lower() == "index.html":
 +            return ValidationResult(valid=True)
++
 +        local_targets = self._collect_local_html_targets(normalized, content)
 +        if not local_targets:
 +            return ValidationResult(valid=True)
++
 +        root = self._resolve_html_artifact_root(normalized)
 +        existing_html_files = [
 +            path
 +            for path in root.rglob("*.html")
 +            if path.is_file() and path != normalized
 +        ]
 +        if not existing_html_files:
 +            return ValidationResult(valid=True)
++
 +        declared_targets = self._collect_declared_html_targets(root, existing_html_files)
 +        undeclared_missing: list[str] = []
 +        for href, resolved in local_targets:
 +            if resolved.exists():
 +                continue
 +            relative_target = self._relative_html_target(root, resolved)
 +            if relative_target is None:
 +                continue
 +            if relative_target not in declared_targets and href not in undeclared_missing:
 +                undeclared_missing.append(href)
++
 +        if not undeclared_missing:
 +            return ValidationResult(valid=True)
++
 +        preview = ", ".join(undeclared_missing[:3])
 +        if len(undeclared_missing) > 3:
 +            preview += ", ..."
 +        declared_preview = ", ".join(sorted(declared_targets)[:3])
 +        suggestion = (
 +            "Keep non-root HTML pages within the current declared local-link set and "
 +            f"avoid introducing new missing sibling targets, for example fix: {preview}"
 +        )
 +        if declared_preview:
 +            suggestion += f". Already-declared local targets include: {declared_preview}"
 +        return ValidationResult(
 +            valid=False,
 +            reason="HTML page introduces new local targets outside the current declared artifact set",
 +            suggestion=suggestion,
 +            severity="error",
 +        )
++
 +    def _collect_local_html_targets(
 +        self,
 +        file_path: Path,
 +        content: str,
 +    ) -> list[tuple[str, Path]]:
 +        pattern = re.compile(r'href\s*=\s*["\']([^"\']+)["\']', re.IGNORECASE)
 +        targets: list[tuple[str, Path]] = []
 +        seen: set[str] = set()
 +        for href in pattern.findall(content):
 +            target_text = href.strip()
 +            if not self._is_local_html_link_target(target_text):
 +                continue
 +            resolved = (file_path.parent / target_text).resolve(strict=False)
 +            key = f"{target_text}::{resolved}"
 +            if key in seen:
 +                continue
 +            seen.add(key)
 +            targets.append((target_text, resolved))
 +        return targets
++
 +    def _collect_declared_html_targets(
 +        self,
 +        root: Path,
 +        html_files: list[Path],
 +    ) -> set[str]:
 +        declared: set[str] = set()
 +        for html_file in html_files:
 +            try:
 +                text = html_file.read_text()
 +            except OSError:
 +                continue
 +            for _href, resolved in self._collect_local_html_targets(html_file, text):
 +                relative_target = self._relative_html_target(root, resolved)
 +                if relative_target is not None:
 +                    declared.add(relative_target)
 +        return declared
++
 +    def _resolve_html_artifact_root(self, file_path: Path) -> Path:
 +        for candidate in [file_path.parent, *file_path.parents]:
 +            if (candidate / "index.html").exists():
 +                return candidate
 +        return file_path.parent
++
 +    def _relative_html_target(self, root: Path, target: Path) -> str | None:
 +        try:
 +            return str(target.relative_to(root))
 +        except ValueError:
 +            return None
++
 +    @staticmethod
 +    def _is_local_html_link_target(href: str) -> bool:
 +        target = href.strip()
 +        if not target:
 +            return False
 +        if target.startswith(("#", "mailto:", "tel:", "javascript:")):
 +            return False
 +        if "://" in target:
 +            return False
 +        normalized = target.split("#", 1)[0].split("?", 1)[0].strip().lower()
 +        return normalized.endswith(".html")
++
      def _suggest_existing_html_targets(self, root: Path, missing: list[str]) -> list[str]:
          available_by_directory: dict[Path, list[str]] = {}
          suggestions: list[str] = []

src/loader/runtime/tool_batch_recovery.pymodified

  from collections.abc import Awaitable, Callable
  from difflib import SequenceMatcher
  from pathlib import Path
 +from typing import Any
  from ..llm.base import Message, Role, ToolCall
  from .compaction import (
  from .events import AgentEvent
  from .executor import ToolExecutionOutcome
  from .recovery import RecoveryContext, format_failure_message, format_recovery_prompt
 -from .semantic_rules import html_toc as html_toc_rule
 +from .repair_focus import ActiveRepairContext, extract_active_repair_context
  EventSink = Callable[[AgentEvent], Awaitable[None]]
                      type="error",
                      content=(
                          "Loop detected: already tried a similar command. "
 -                        "Try a DIFFERENT approach (e.g., read a config file first)."
 +                        "Try a different next step using the files and facts you already have "
 +                        "(for example, make the specific edit, verify the current result, or "
 +                        "inspect one concrete unresolved target)."
                      ),
                      tool_name=tool_call.name,
+                 )
          session = self.context.session
          current_task = getattr(session, "current_task", None)
 -        focus_path = self._preferred_focus_path(
 -            tool_call=tool_call,
 -            current_task=current_task,
 -        )
 +        active_repair = self._active_repair_context()
 +        effective_task = current_task
 +        if active_repair is not None and active_repair.artifact_path:
 +            effective_task = (
 +                "Repair the current artifact using the failed verification evidence: "
 +                f"{active_repair.artifact_path}"
 +            )
 +            focus_path = active_repair.artifact_path
 +            preferred_next_step = (
 +                f"Update `{active_repair.artifact_path}` to resolve the current "
 +                "verification failures."
 +            )
 +        else:
 +            focus_path = self._preferred_focus_path(
 +                tool_call=tool_call,
 +                current_task=current_task,
 +            )
 +            preferred_next_step = infer_preferred_next_step(
 +                session.messages,
 +                current_task=effective_task,
 +                focus_path=focus_path or None,
 +            )
          confirmed_facts = summarize_confirmed_facts(session.messages)
 -        preferred_next_step = infer_preferred_next_step(
 -            session.messages,
 -            current_task=current_task,
 -            focus_path=focus_path or None,
 -        )
 -        actionable_known_state = bool(confirmed_facts and preferred_next_step)
          lines = [prompt]
 -        if confirmed_facts or preferred_next_step or current_task:
 +        candidate_lines = self._file_not_found_candidate_lines(
 +            tool_call,
 +            outcome,
 +            active_repair=active_repair,
 +        )
 +        actionable_known_state = bool(
 +            active_repair or current_task or confirmed_facts or preferred_next_step or candidate_lines
 +        )
 +        if active_repair is not None:
 +            lines.extend(["", "## ACTIVE REPAIR TARGET"])
 +            lines.append(
 +                "- Verification already failed on the current artifact set. "
 +                "Stay on this repair until the broken local references are fixed."
 +            )
 +            lines.extend(active_repair.repair_lines)
 +            drifted_path = self._canonicalize_path(
 +                str(
 +                    tool_call.arguments.get("file_path")
 +                    or tool_call.arguments.get("path")
 +                    or ""
 +                ).strip()
 +            )
 +            if (
 +                drifted_path
 +                and active_repair.artifact_path
 +                and drifted_path != active_repair.artifact_path
 +            ):
 +                lines.append(
 +                    f"- The failed tool call drifted to `{drifted_path}`. "
 +                    f"Return to `{active_repair.artifact_path}` instead of reopening "
 +                    "the original discovery task."
 +                )
 +            lines.append(
 +                "- Treat this repair as higher priority than the original discovery "
 +                "prompt until verification passes."
 +            )
 +        if active_repair or confirmed_facts or preferred_next_step or current_task:
              lines.extend(["", "## CONTINUE FROM KNOWN STATE"])
 -            if current_task:
 +            if active_repair is not None and active_repair.artifact_path:
 +                lines.append(f"- Active repair target: `{active_repair.artifact_path}`")
 +            elif current_task:
                  lines.append(f"- Current task: {current_task}")
              if confirmed_facts:
                  lines.append(f"- Confirmed facts: {confirmed_facts}")
                  "- Preserve progress: do not restart by rereading already-confirmed files "
                  "unless you need genuinely new evidence."
+             )
 +            if active_repair is not None:
 +                lines.append(
 +                    "- Do not go back to the original reference guide or invent alternate "
 +                    "paths while this repair target is unresolved."
 +                )
              if actionable_known_state:
 +                target_line = (
 +                    f"- Prefer edit/write/patch on `{active_repair.artifact_path}` over "
 +                    "rereading the same files."
 +                    if active_repair is not None and active_repair.artifact_path
 +                    else "- Prefer edit/write/patch on the target file over rereading the same files."
 +                )
                  lines.extend(
+                     [
                          "",
                          "## ACTION BIAS FOR THIS RECOVERY",
                          "- The confirmed findings above are already enough to keep moving.",
 -                        "- Prefer edit/write/patch on the target file over rereading the same files.",
 +                        target_line,
                          "- Only inspect one more file if a specific filename, href, or title is still unknown.",
                          "- Treat the preferred next step as the default path forward.",
+                     ]
+                 )
 -        candidate_lines = self._file_not_found_candidate_lines(tool_call, outcome)
          if candidate_lines:
              lines.extend(["", "## LIKELY FILE CANDIDATES", *candidate_lines])
          target_excerpt_lines = self._target_excerpt_lines(tool_call)
          self,
          tool_call: ToolCall,
          outcome: ToolExecutionOutcome,
 +        *,
 +        active_repair: ActiveRepairContext | None = None,
      ) -> list[str]:
          if tool_call.name not in {"read", "write", "edit", "patch"}:
              return []
          candidates = self._rank_known_file_candidates(missing_path)
          if not candidates:
 +            if active_repair is not None and active_repair.artifact_path:
 +                return [
 +                    f"- Requested file does not exist: `{missing_path}`",
 +                    f"- Active repair target is `{active_repair.artifact_path}`.",
 +                    "- Repair the known target instead of inventing a new path.",
 +                ]
              return []
          names = ", ".join(self._describe_candidate(candidate) for candidate in candidates[:3])
 -        return [
 +        lines = [
              f"- Requested file does not exist: `{missing_path}`",
              f"- Closest known files in the same directory: {names}",
              "- Prefer one of those exact filenames instead of retrying the missing path.",
+         ]
 +        if active_repair is not None and active_repair.artifact_path:
 +            lines.append(
 +                f"- Keep the repair centered on `{active_repair.artifact_path}` rather than "
 +                "switching back to broad discovery."
 +            )
 +        return lines
      def _rank_known_file_candidates(self, missing_path: str) -> list[str]:
          missing_parent = str(Path(missing_path).parent)
      def _describe_candidate(self, candidate: str) -> str:
          path = Path(candidate)
 -        label = f"`{path.name}`"
 -        if path.suffix == ".html":
 -            title = html_toc_rule.read_html_title(path)
 -            if title:
 -                return f"{label} = {title}"
 -        return label
 +        return f"`{path.name}`"
      def _target_excerpt_lines(self, tool_call: ToolCall) -> list[str]:
 -        file_path = str(
 +        if tool_call.name not in {"edit", "patch"}:
 +            return []
++
 +        raw_path = str(
              tool_call.arguments.get("file_path")
              or tool_call.arguments.get("path")
              or ""
          ).strip()
 -        if not file_path:
 +        target_path = self._canonicalize_path(raw_path)
 +        if not target_path:
              return []
 -        current_task = getattr(self.context.session, "current_task", None)
 -        if not html_toc_rule.task_targets_html_toc(current_task):
++
 +        path = Path(target_path)
 +        if not path.is_file():
              return []
 -        inventory = html_toc_rule.summarize_html_inventory(file_path, limit=12)
 -        excerpt = html_toc_rule.extract_html_toc_excerpt(file_path)
 -        if not inventory and not excerpt:
 +        try:
 +            content = path.read_text()
 +        except Exception:
              return []
 -        lines: list[str] = []
 -        if inventory:
 -            lines.append(f"- Verified chapter inventory: {inventory}")
 -        if excerpt:
 -            lines.append("- Current TOC block:")
 -            lines.extend(f"  {line}" for line in excerpt.splitlines())
 -        replacement = html_toc_rule.build_html_toc_replacement_block(file_path)
 -        if replacement:
 -            lines.append("- Suggested replacement block:")
 -            lines.extend(f"  {line}" for line in replacement.splitlines())
 -        if excerpt and replacement:
 -            lines.append("- Exact edit guidance:")
 -            lines.append(f"  file_path: {file_path}")
 -            lines.append("  old_string: use the Current TOC block above exactly")
 -            lines.append("  new_string: use the Suggested replacement block above exactly")
 -            lines.append("  Do not rewrite the whole file.")
 -        edit_template = html_toc_rule.build_html_toc_edit_call_template(file_path)
 -        if edit_template:
 -            lines.append("- Suggested edit call:")
 -            lines.extend(f"  {line}" for line in edit_template.splitlines())
 -        return lines
 +        file_lines = content.splitlines()
 +        if not file_lines:
 +            return [
 +                f"- Target file: `{target_path}`",
 +                "- The file is currently empty.",
 +                "- Use the exact on-disk state above when preparing the next mutation.",
 +            ]
++
 +        start, end, label = self._excerpt_window_for_tool_call(
 +            file_lines=file_lines,
 +            content=content,
 +            tool_call=tool_call,
 +        )
 +        excerpt = self._format_excerpt_lines(file_lines, start, end)
 +        if not excerpt:
 +            return []
++
 +        return [
 +            f"- Target file: `{target_path}`",
 +            f"- {label}",
 +            *excerpt,
 +            "- Use the exact on-disk text above when preparing the next mutation.",
 +            "- If several adjacent lines are wrong, replace the containing block in one edit instead of retrying a smaller substitution.",
 +        ]
++
 +    def _excerpt_window_for_tool_call(
 +        self,
 +        *,
 +        file_lines: list[str],
 +        content: str,
 +        tool_call: ToolCall,
 +    ) -> tuple[int, int, str]:
 +        if tool_call.name == "edit":
 +            window = self._edit_excerpt_window(
 +                file_lines=file_lines,
 +                content=content,
 +                arguments=tool_call.arguments,
 +            )
 +            if window is not None:
 +                return window
 +        if tool_call.name == "patch":
 +            window = self._patch_excerpt_window(
 +                file_lines=file_lines,
 +                arguments=tool_call.arguments,
 +            )
 +            if window is not None:
 +                return window
 +        return self._bounded_window(
 +            file_lines=file_lines,
 +            start=0,
 +            length=min(10, len(file_lines)),
 +            label="Current file contents:",
 +        )
++
 +    def _edit_excerpt_window(
 +        self,
 +        *,
 +        file_lines: list[str],
 +        content: str,
 +        arguments: dict[str, Any],
 +    ) -> tuple[int, int, str] | None:
 +        old_string = str(arguments.get("old_string") or "")
 +        new_string = str(arguments.get("new_string") or "")
++
 +        if old_string:
 +            exact_window = self._exact_string_window(
 +                content=content,
 +                file_lines=file_lines,
 +                needle=old_string,
 +                label="Current file contents for the requested edit:",
 +            )
 +            if exact_window is not None:
 +                return exact_window
++
 +        anchor = old_string or new_string
 +        approximate_window = self._approximate_string_window(
 +            file_lines=file_lines,
 +            needle=anchor,
 +            label="Closest on-disk block to the requested edit:",
 +        )
 +        if approximate_window is not None:
 +            return approximate_window
 +        return None
++
 +    def _patch_excerpt_window(
 +        self,
 +        *,
 +        file_lines: list[str],
 +        arguments: dict[str, Any],
 +    ) -> tuple[int, int, str] | None:
 +        hunks = arguments.get("hunks")
 +        if not isinstance(hunks, list) or not hunks:
 +            return None
++
 +        first_hunk = hunks[0]
 +        if not isinstance(first_hunk, dict):
 +            return None
++
 +        anchor_lines: list[str] = []
 +        raw_lines = first_hunk.get("lines")
 +        if isinstance(raw_lines, list):
 +            for raw_line in raw_lines:
 +                if not isinstance(raw_line, str) or not raw_line:
 +                    continue
 +                if raw_line[0] in {" ", "-"}:
 +                    anchor_lines.append(raw_line[1:])
++
 +        anchor = "\n".join(anchor_lines).strip()
 +        approximate_window = self._approximate_string_window(
 +            file_lines=file_lines,
 +            needle=anchor,
 +            label="Closest on-disk block to the requested patch:",
 +        )
 +        if approximate_window is not None:
 +            return approximate_window
++
 +        old_start = first_hunk.get("old_start", 1)
 +        old_lines = first_hunk.get("old_lines", len(anchor_lines) or 1)
 +        try:
 +            start = max(0, int(old_start) - 1)
 +        except (TypeError, ValueError):
 +            start = 0
 +        try:
 +            length = max(1, int(old_lines))
 +        except (TypeError, ValueError):
 +            length = max(1, len(anchor_lines) or 1)
 +        return self._bounded_window(
 +            file_lines=file_lines,
 +            start=start,
 +            length=length,
 +            label="Current file contents near the requested patch location:",
 +        )
++
 +    def _exact_string_window(
 +        self,
 +        *,
 +        content: str,
 +        file_lines: list[str],
 +        needle: str,
 +        label: str,
 +    ) -> tuple[int, int, str] | None:
 +        if not needle:
 +            return None
 +        index = content.find(needle)
 +        if index == -1:
 +            return None
 +        start_line = content[:index].count("\n")
 +        block_length = max(1, len(needle.splitlines()))
 +        return self._bounded_window(
 +            file_lines=file_lines,
 +            start=start_line,
 +            length=block_length,
 +            label=label,
 +        )
++
 +    def _approximate_string_window(
 +        self,
 +        *,
 +        file_lines: list[str],
 +        needle: str,
 +        label: str,
 +    ) -> tuple[int, int, str] | None:
 +        normalized_needle = self._normalize_match_text(needle)
 +        if not normalized_needle:
 +            return None
++
 +        needle_lines = [line for line in needle.splitlines() if line.strip()]
 +        if not needle_lines:
 +            needle_lines = [needle.strip()]
++
 +        min_window = 1
 +        max_window = min(len(file_lines), max(1, len(needle_lines) + 2))
 +        best_score = 0.0
 +        best_start = 0
 +        best_length = min(max_window, max(1, len(needle_lines)))
 +        for window_length in range(min_window, max_window + 1):
 +            for start in range(0, len(file_lines) - window_length + 1):
 +                candidate = "\n".join(file_lines[start : start + window_length])
 +                score = SequenceMatcher(
 +                    None,
 +                    normalized_needle,
 +                    self._normalize_match_text(candidate),
 +                ).ratio()
 +                if score > best_score:
 +                    best_score = score
 +                    best_start = start
 +                    best_length = window_length
++
 +        if best_score < 0.25:
 +            return None
++
 +        return self._bounded_window(
 +            file_lines=file_lines,
 +            start=best_start,
 +            length=best_length,
 +            label=label,
 +        )
++
 +    def _bounded_window(
 +        self,
 +        *,
 +        file_lines: list[str],
 +        start: int,
 +        length: int,
 +        label: str,
 +    ) -> tuple[int, int, str]:
 +        context_before = 2
 +        context_after = 2
 +        start_index = max(0, start - context_before)
 +        end_index = min(len(file_lines), start + max(1, length) + context_after)
 +        return start_index, end_index, label
++
 +    def _format_excerpt_lines(
 +        self,
 +        file_lines: list[str],
 +        start: int,
 +        end: int,
 +    ) -> list[str]:
 +        if start >= end:
 +            return []
 +        width = len(str(end))
 +        return [
 +            f"  {line_number:>{width}} | {file_lines[line_number - 1]}"
 +            for line_number in range(start + 1, end + 1)
 +        ]
++
 +    def _normalize_match_text(self, text: str) -> str:
 +        return " ".join(str(text or "").split())
++
 +    def _active_repair_context(self) -> ActiveRepairContext | None:
 +        return extract_active_repair_context(self.context.session.messages)
      def _canonicalize_path(self, raw_path: str) -> str:
          if not raw_path:

src/loader/runtime/tool_batches.pymodified

1089 lines changed — click to load

  from pathlib import Path
  from typing import Any
 -from ..llm.base import Role, ToolCall
 +from ..llm.base import ToolCall
  from .compaction import infer_preferred_next_step, summarize_confirmed_facts
  from .context import RuntimeContext
  from .dod import (
      DefinitionOfDone,
      DefinitionOfDoneStore,
 +    all_planned_artifacts_exist,
      begin_new_verification_attempt,
 +    collect_planned_artifact_targets,
      derive_verification_commands,
      ensure_active_verification_attempt,
 +    infer_next_declared_html_output_file,
      is_state_mutating_tool_call,
 +    planned_artifact_target_satisfied,
      record_successful_tool_call,
      synthesize_todo_items,
+ )
  from .executor import ToolExecutionState, ToolExecutor
  from .logging import get_runtime_logger
  from .policy_timeline import append_verification_timeline_entry
 +from .repair_focus import extract_active_repair_context
  from .safeguard_services import extract_shell_text_rewrite_target
 -from .semantic_rules import html_toc as html_toc_rule
  from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate
  from .tool_batch_recovery import ToolBatchRecoveryController
  from .verification_observations import (
      VerificationObservation,
      VerificationObservationStatus,
+ )
 -from .workflow import advance_todos_from_tool_call, sync_todos_to_definition_of_done
 +from .workflow import (
 +    advance_todos_from_tool_call,
 +    effective_pending_todo_items,
 +    reconcile_aggregate_completion_steps,
 +    sync_todos_to_definition_of_done,
 +)
  EventSink = Callable[[AgentEvent], Awaitable[None]]
  ConfirmationHandler = (
+ }
  _MUTATION_TODO_HINTS = (
      "create",
 +    "creating",
      "update",
 +    "updating",
      "edit",
 +    "editing",
      "write",
 +    "writing",
      "fix",
 +    "fixing",
      "modify",
 +    "modifying",
      "change",
 +    "changing",
      "patch",
 +    "patching",
      "replace",
 +    "replacing",
      "correct",
 +    "correcting",
      "rewrite",
 +    "rewriting",
 +)
 +_CONSISTENCY_REVIEW_HINTS = (
 +    "consistent",
 +    "consistently",
 +    "formatted",
 +    "link",
 +    "linked",
 +    "navigation",
 +    "work properly",
 +    "all files",
 +    "every file",
 +    "ensure",
+ )
 +_BOOKKEEPING_NOTE_TOOL_NAMES = {
 +    "notepad_write_working",
 +    "notepad_append",
 +    "notepad_write_priority",
 +    "notepad_write_manual",
 +}
  @dataclass
          self.confidence_gate = confidence_gate or ToolBatchConfidenceGate(context)
          self.recovery_controller = recovery_controller or ToolBatchRecoveryController(context)
          self.verification_gate = verification_gate or ToolBatchVerificationGate(context)
 -        self._inventory_hint_targets: set[str] = set()
      async def execute_batch(
          self,
                  if label:
                      completed_labels.append(label)
                  await _emit_batch_todos()
 -                self._annotate_verified_html_inventory(executed_tool_call, outcome)
 -                self._queue_verified_html_inventory_nudge(executed_tool_call)
 -                self._annotate_validated_html_toc_completion(executed_tool_call, outcome)
 -                self._queue_validated_html_toc_completion_nudge(executed_tool_call)
                  if loop_response is not None:
                      result.halted = True
                      result.final_response = loop_response
              if outcome.state == ToolExecutionState.DUPLICATE:
                  self._queue_duplicate_observation_nudge(tool_call, dod=dod)
              elif outcome.state == ToolExecutionState.BLOCKED:
 +                self._queue_blocked_active_repair_nudge(outcome.event_content)
 +                self._queue_blocked_active_repair_mutation_nudge(outcome.event_content)
 +                self._queue_blocked_completed_artifact_scope_nudge(
 +                    outcome.event_content,
 +                    dod=dod,
 +                )
 +                self._queue_blocked_late_reference_drift_nudge(
 +                    outcome.event_content,
 +                    dod=dod,
 +                )
                  self._queue_blocked_shell_rewrite_nudge(tool_call)
                  self._queue_blocked_html_edit_nudge(tool_call, outcome.event_content)
              return
          current_task = getattr(self.context.session, "current_task", None)
 +        missing_artifact = _next_missing_planned_artifact(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
          next_pending = next(
+             (
                  item
 -                for item in dod.pending_items
 +                for item in effective_pending_todo_items(
 +                    dod,
 +                    project_root=self.context.project_root,
 +                )
                  if item not in _TODO_NUDGE_EXCLUDED_ITEMS
              ),
              None,
              self.context.session.messages,
              max_items=2,
+         )
 -        if next_pending and not html_toc_rule.task_targets_html_toc(current_task):
 +        if _should_prioritize_missing_artifact(
 +            next_pending=next_pending,
 +            missing_artifact=missing_artifact,
 +        ):
 +            prefix = "Reuse the earlier observation instead of repeating it. "
 +            if confirmed_facts:
 +                prefix += f"Confirmed facts: {confirmed_facts}. "
 +            self.context.queue_steering_message(
 +                prefix
 +                + "An explicitly planned artifact is still missing."
 +                + _missing_artifact_resume_suffix(
 +                    missing_artifact,
 +                    project_root=self.context.project_root,
 +                )
 +                + " Do not switch into review or consistency-check mode until the missing artifact exists."
 +            )
 +            return
 +        if next_pending:
              mutation_suffix = ""
              if _todo_is_mutation_step(next_pending):
 -                mutation_suffix = (
 -                    " You already have enough evidence for that step, so stop gathering "
 -                    "more reference material and perform the change now."
 +                mutation_suffix = _missing_artifact_resume_suffix(
 +                    missing_artifact,
 +                    project_root=self.context.project_root,
+                 )
 +                if not mutation_suffix:
 +                    mutation_suffix = (
 +                        " You already have enough evidence for that step, so stop gathering "
 +                        "more reference material and perform the change now."
 +                    )
              if confirmed_facts:
                  self.context.queue_steering_message(
                      "Reuse the earlier observation instead of repeating it. "
+                 )
              return
 +        if missing_artifact is not None:
 +            self.context.queue_steering_message(
 +                "Reuse the earlier observation instead of repeating it. "
 +                + _missing_artifact_resume_suffix(
 +                    missing_artifact,
 +                    project_root=self.context.project_root,
 +                ).strip()
 +            )
 +            return
++
 +        if all_planned_artifacts_exist(dod, project_root=self.context.project_root):
 +            verification_commands = dod.verification_commands or derive_verification_commands(
 +                dod,
 +                project_root=self.context.project_root,
 +                task_statement=current_task,
 +                supplement_existing=True,
 +            )
 +            verification_suffix = (
 +                "Move to verification or final confirmation using the files already on disk."
 +                if verification_commands
 +                else "Finish the current review using the files already on disk."
 +            )
 +            self.context.queue_steering_message(
 +                "Reuse the earlier observation instead of repeating it. "
 +                "All explicitly planned artifacts already exist. "
 +                "Use the current task artifacts as the source of truth and do not reopen "
 +                "reference materials unless one specific gap is still unknown. "
 +                + verification_suffix
 +            )
 +            return
++
          preferred_next_step = infer_preferred_next_step(
              self.context.session.messages,
              current_task=current_task,
              f"Apply the change to `{target}` with edit/patch/write."
+         )
 -    def _queue_blocked_html_edit_nudge(self, tool_call: ToolCall, event_content: str) -> None:
 -        """Steer blocked TOC edits back to the confirmed chapter inventory."""
+-
 -        if tool_call.name not in {"edit", "patch"}:
 -            return
 -        if not self._targets_html_toc_task():
 -            return
+-
 -        target_path = str(tool_call.arguments.get("file_path", "")).strip()
 -        if not html_toc_rule.is_html_toc_index_path(target_path):
 -            return
 +    def _queue_blocked_active_repair_nudge(self, event_content: str) -> None:
 +        """Reinforce active repair focus after an out-of-scope blocked observation."""
 -        validation = html_toc_rule.validate_html_toc(target_path)
 -        if (
 -            "old_string and new_string are identical" in event_content
 -            and validation is not None
 -            and validation.valid
 -        ):
 -            action_tracker = getattr(self.context.safeguards, "action_tracker", None)
 -            note_validated = getattr(action_tracker, "note_validated_html_toc", None)
 -            if callable(note_validated):
 -                note_validated(target_path)
 -            target_label = html_toc_rule.describe_html_toc_target(target_path)
 -            self.context.queue_steering_message(
 -                f"The HTML table-of-contents target {target_label} already matches the "
 -                "validated replacement block. "
 -                f"Semantic verification preview: validated {validation.link_count} linked "
 -                "entries. "
 -                "Do not call `edit`, `patch`, or reread the same TOC again. Briefly state "
 -                f"that {target_label} is already updated so Loader can continue the "
 -                "verification gate or finish the task."
 -            )
 +        if "[Blocked - active repair scope:" not in event_content:
              return
 -        current_task = getattr(self.context.session, "current_task", None)
 -        confirmed_facts = summarize_confirmed_facts(
 -            self.context.session.messages,
 -            max_items=2,
 -            focus_path=target_path,
 -        )
 -        preferred_next_step = infer_preferred_next_step(
 -            self.context.session.messages,
 -            current_task=current_task,
 -            focus_path=target_path,
 -        )
 -        verified_inventory = html_toc_rule.summarize_html_inventory(target_path, limit=12)
 -        current_excerpt = html_toc_rule.extract_html_toc_excerpt(target_path)
 -        suggested_replacement = html_toc_rule.build_html_toc_replacement_block(target_path)
 -        suggested_call = html_toc_rule.build_html_toc_edit_call_template(target_path)
 -        target_label = html_toc_rule.describe_html_toc_target(target_path)
 -        excerpt_suffix = (
 -            f"\nCurrent TOC block:\n{current_excerpt}"
 -            if current_excerpt
 -            else ""
 -        )
 -        replacement_suffix = (
 -            f"\nSuggested replacement block:\n{suggested_replacement}"
 -            if suggested_replacement
 -            else ""
 -        )
 -        call_suffix = (
 -            f"\nSuggested edit call:\n{suggested_call}"
 -            if suggested_call
 -            else ""
 -        )
+-
 -        if preferred_next_step and confirmed_facts and verified_inventory:
 -            self.context.queue_steering_message(
 -                f"Use the current TOC target contents plus the verified sibling inventory for "
 -                f"{target_label} instead of guessing. "
 -                f"Confirmed facts: {confirmed_facts}. "
 -                f"Known chapter inventory: {verified_inventory}. "
 -                f"{preferred_next_step} "
 -                f"Apply those exact href/title pairs in {target_label}. "
 -                "Do not rewrite the whole document. For `edit`, set `old_string` to the "
 -                "current TOC block above exactly and set `new_string` to the suggested "
 -                "replacement block below exactly."
 -                f"{excerpt_suffix}"
 -                f"{replacement_suffix}"
 -                f"{call_suffix}"
 -            )
 +        repair = extract_active_repair_context(self.context.session.messages)
 +        if repair is None:
              return
 -        if verified_inventory:
 +        if repair.allowed_paths:
 +            allowed_preview = ", ".join(f"`{path}`" for path in repair.allowed_paths[:3])
 +            if len(repair.allowed_paths) > 3:
 +                allowed_preview += ", ..."
              self.context.queue_steering_message(
 -                f"Use the current TOC target contents plus the verified sibling inventory for "
 -                f"{target_label} instead of guessing. "
 -                f"Known chapter inventory: {verified_inventory}. "
 -                f"Apply those exact href/title pairs in {target_label}. "
 -                "Do not rewrite the whole document. For `edit`, set `old_string` to the "
 -                "current TOC block above exactly and set `new_string` to the suggested "
 -                "replacement block below exactly."
 -                f"{excerpt_suffix}"
 -                f"{replacement_suffix}"
 -                f"{call_suffix}"
 +                "Verification already identified the active repair target. "
 +                f"Stay on the concrete repair files {allowed_preview} "
 +                f"and repair `{repair.artifact_path}` directly. "
 +                "Do not reopen unrelated reference materials while this repair target is unresolved."
+             )
              return
 +        roots_preview = ", ".join(f"`{root}`" for root in repair.allowed_roots[:2])
 +        if len(repair.allowed_roots) > 2:
 +            roots_preview += ", ..."
          self.context.queue_steering_message(
 -            f"Use the current TOC target contents when retrying the edit for {target_label} "
 -            "instead of guessing. "
 -            f"{excerpt_suffix}".strip()
 +            "Verification already identified the active repair target. "
 +            f"Stay within the current artifact set under {roots_preview} "
 +            f"and repair `{repair.artifact_path}` directly. "
 +            "Do not reopen unrelated reference materials while this repair target is unresolved."
+         )
 -    def _queue_verified_html_inventory_nudge(self, tool_call: ToolCall) -> None:
 -        """Proactively hand off verified chapter inventory after sibling discovery."""
+-
 -        if tool_call.name != "glob":
 -            return
+-
 -        chapters_path = str(tool_call.arguments.get("path", "")).strip()
 -        if not chapters_path.endswith("chapters"):
 -            return
+-
 -        index_path = str(Path(chapters_path).expanduser().parent / "index.html")
 -        if index_path in self._inventory_hint_targets:
 -            return
 +    def _queue_blocked_active_repair_mutation_nudge(self, event_content: str) -> None:
 +        """Keep repair-phase mutations pinned to the named repair files."""
 -        if not self._targets_html_toc_task():
 +        if "[Blocked - active repair mutation scope:" not in event_content:
              return
 -        verified_inventory = html_toc_rule.summarize_html_inventory(index_path, limit=12)
 -        if not verified_inventory:
 +        repair = extract_active_repair_context(self.context.session.messages)
 +        if repair is None or not repair.allowed_paths:
              return
 -        self._inventory_hint_targets.add(index_path)
 -        target_label = html_toc_rule.describe_html_toc_target(index_path)
 -        chapters_label = html_toc_rule.describe_html_toc_chapters_dir(index_path)
 +        allowed_preview = ", ".join(f"`{path}`" for path in repair.allowed_paths[:3])
 +        if len(repair.allowed_paths) > 3:
 +            allowed_preview += ", ..."
          self.context.queue_steering_message(
 -            f"You already have the verified sibling inventory needed for {target_label}. "
 -            f"Known chapter inventory: {verified_inventory}. "
 -            f"Update {target_label} using those exact href/title pairs instead of rereading "
 -            f"files in {chapters_label} unless one specific title is still unknown."
 +            "Verification already identified the concrete repair files. "
 +            f"Keep mutations pinned to {allowed_preview} "
 +            f"and repair `{repair.artifact_path}` before widening the change set."
+         )
 -    def _annotate_verified_html_inventory(self, tool_call: ToolCall, outcome) -> None:
 -        """Attach verified chapter inventory directly to a successful discovery result."""
+-
 -        if tool_call.name != "glob":
 -            return
+-
 -        chapters_path = str(tool_call.arguments.get("path", "")).strip()
 -        if not chapters_path.endswith("chapters"):
 -            return
+-
 -        if not self._targets_html_toc_task():
 -            return
+-
 -        index_path = str(Path(chapters_path).expanduser().parent / "index.html")
 -        verified_inventory = html_toc_rule.summarize_html_inventory(index_path, limit=12)
 -        if not verified_inventory:
 -            return
+-
 -        action_tracker = getattr(self.context.safeguards, "action_tracker", None)
 -        note_inventory = getattr(action_tracker, "note_verified_html_inventory", None)
 -        if callable(note_inventory):
 -            note_inventory(index_path)
+-
 -        note = f"Verified chapter inventory: {verified_inventory}"
 -        merged_event = outcome.event_content
 -        if note not in merged_event:
 -            merged_event = f"{note}\n{merged_event}".strip()
 -            outcome.event_content = merged_event
 -            outcome.result_output = merged_event
 -            outcome.message.content = f"{note}\n{outcome.message.content}".strip()
 -            if outcome.message.tool_results:
 -                outcome.message.tool_results[0].content = merged_event
+-
 -    def _annotate_validated_html_toc_completion(self, tool_call: ToolCall, outcome) -> None:
 -        """Attach semantic TOC validation evidence to a successful mutating result."""
 +    def _queue_blocked_late_reference_drift_nudge(
 +        self,
 +        event_content: str,
 +        *,
 +        dod: DefinitionOfDone,
 +    ) -> None:
 +        """Reinforce missing-artifact progress after late-stage reference drift is blocked."""
 -        if not self._targets_html_toc_task():
 -            return
 -        target_path = self._validated_html_toc_target(tool_call)
 -        if target_path is None:
 +        if "[Blocked - late reference drift:" not in event_content:
              return
 -        validation = html_toc_rule.validate_html_toc(target_path)
 -        if validation is None or not validation.valid:
 +        missing_artifact = _next_missing_planned_artifact(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
 +        if missing_artifact is None:
              return
 -        action_tracker = getattr(self.context.safeguards, "action_tracker", None)
 -        note_validated = getattr(action_tracker, "note_validated_html_toc", None)
 -        if callable(note_validated):
 -            note_validated(target_path)
 +        planned_roots: list[str] = []
 +        seen_roots: set[str] = set()
 +        for target, expect_directory in collect_planned_artifact_targets(
 +            dod,
 +            project_root=self.context.project_root,
 +        ):
 +            root = str(target if expect_directory else target.parent)
 +            if root in seen_roots:
 +                continue
 +            seen_roots.add(root)
 +            planned_roots.append(root)
 -        note = (
 -            "Semantic verification preview: "
 -            f"validated {validation.link_count} toc links in {Path(target_path).name}"
 +        roots_preview = ", ".join(f"`{root}`" for root in planned_roots[:2])
 +        if len(planned_roots) > 2:
 +            roots_preview += ", ..."
 +        self.context.queue_steering_message(
 +            "Late-stage reference rereads are no longer helping. "
 +            "One explicitly planned artifact is still missing."
 +            + _missing_artifact_resume_suffix(
 +                missing_artifact,
 +                project_root=self.context.project_root,
 +            )
 +            + f" Stay within the current output roots under {roots_preview}"
 +            + " and finish that artifact before reopening older reference materials."
+         )
 -        merged_event = outcome.event_content
 -        if note not in merged_event:
 -            merged_event = f"{merged_event}\n{note}".strip()
 -            outcome.event_content = merged_event
 -            outcome.result_output = merged_event
 -            outcome.message.content = f"{outcome.message.content}\n{note}".strip()
 -            if outcome.message.tool_results:
 -                outcome.message.tool_results[0].content = merged_event
 -    def _queue_validated_html_toc_completion_nudge(self, tool_call: ToolCall) -> None:
 -        """Push the next model turn toward finishing once the TOC already validates."""
 +    def _queue_blocked_completed_artifact_scope_nudge(
 +        self,
 +        event_content: str,
 +        *,
 +        dod: DefinitionOfDone,
 +    ) -> None:
 +        """Keep post-build review anchored to the generated artifact set."""
 -        if not self._targets_html_toc_task():
 -            return
 -        target_path = self._validated_html_toc_target(tool_call)
 -        if target_path is None:
 +        if "[Blocked - completed artifact set scope:" not in event_content:
              return
 -        validation = html_toc_rule.validate_html_toc(target_path)
 -        if validation is None or not validation.valid:
 -            return
 +        planned_roots: list[str] = []
 +        seen_roots: set[str] = set()
 +        for target, expect_directory in collect_planned_artifact_targets(
 +            dod,
 +            project_root=self.context.project_root,
 +        ):
 +            root = str(target if expect_directory else target.parent)
 +            if root in seen_roots:
 +                continue
 +            seen_roots.add(root)
 +            planned_roots.append(root)
 -        if tool_call.name == "read":
 -            target_label = html_toc_rule.describe_html_toc_target(target_path)
 -            chapters_label = html_toc_rule.describe_html_toc_chapters_dir(target_path)
 +        next_pending = next(
 +            (
 +                item
 +                for item in effective_pending_todo_items(
 +                    dod,
 +                    project_root=self.context.project_root,
 +                )
 +                if item not in _TODO_NUDGE_EXCLUDED_ITEMS
 +            ),
 +            None,
 +        )
 +        roots_preview = ", ".join(f"`{root}`" for root in planned_roots[:2])
 +        if len(planned_roots) > 2:
 +            roots_preview += ", ..."
 +        if next_pending and _todo_is_consistency_review_step(next_pending):
              self.context.queue_steering_message(
 -                f"The HTML table-of-contents target {target_label} already satisfies the "
 -                "verified link/title constraints. "
 -                f"Semantic verification preview: validated {validation.link_count} linked "
 -                "entries. "
 -                "No TOC edit is required unless you can point to one specific incorrect href or "
 -                f"title. Do not reread {target_label} or files in {chapters_label} again. "
 -                "Briefly state that the table of contents is already correct so Loader can "
 -                "finish the task."
 +                "All explicitly planned artifacts already exist. "
 +                f"Stay within the current output roots under {roots_preview} and continue "
 +                f"with `{next_pending}` using the generated files as the source of truth. "
 +                "Do not reopen earlier reference materials."
+             )
              return
 -        target_label = html_toc_rule.describe_html_toc_target(target_path)
 -        chapters_label = html_toc_rule.describe_html_toc_chapters_dir(target_path)
          self.context.queue_steering_message(
 -            f"The HTML table-of-contents target {target_label} already satisfies the "
 -            "verified link/title constraints. "
 -            f"Semantic verification preview: validated {validation.link_count} linked "
 -            "entries. "
 -            f"Do not reread {target_label} or files in {chapters_label} unless a specific "
 -            "href or title is still unresolved. Briefly state that the table of contents has "
 -            "been updated so Loader can run the verification gate."
 -        )
+-
 -    @staticmethod
 -    def _validated_html_toc_target(tool_call: ToolCall) -> str | None:
 -        """Return the index target for a validated HTML TOC action."""
+-
 -        target_path = ""
 -        if tool_call.name in {"write", "edit", "patch", "read"}:
 -            target_path = str(tool_call.arguments.get("file_path", "")).strip()
 -        elif tool_call.name == "bash":
 -            target_path = (
 -                extract_shell_text_rewrite_target(
 -                    str(tool_call.arguments.get("command", ""))
 -                )
 -                or ""
 -            ).strip()
+-
 -        if not target_path:
 -            return None
 -        if not html_toc_rule.is_html_toc_index_path(target_path):
 -            return None
 -        return str(Path(target_path).expanduser())
+-
 -    def _targets_html_toc_task(self) -> bool:
 -        current_task = str(getattr(self.context.session, "current_task", "") or "").lower()
 -        if not current_task:
 -            for message in reversed(getattr(self.context.session, "messages", [])):
 -                if getattr(message, "role", None) != Role.USER:
 -                    continue
 -                content = str(getattr(message, "content", "") or "").strip().lower()
 -                if content:
 -                    current_task = content
 -                    break
 -        return html_toc_rule.task_targets_html_toc(current_task)
 +            "All explicitly planned artifacts already exist. "
 +            f"Stay within the current output roots under {roots_preview} "
 +            "and move to verification or final confirmation using the generated files. "
 +            "Do not reopen earlier reference materials."
 +        )
++
 +    def _queue_blocked_html_edit_nudge(self, tool_call: ToolCall, event_content: str) -> None:
 +        """Keep blocked edit feedback generic; avoid task-class-specific steering."""
++
 +        _ = tool_call, event_content
 +        return
      async def _record_successful_execution(
          self,
          if tool_call.name == "TodoWrite" and outcome.registry_result is not None:
              new_todos = outcome.registry_result.metadata.get("new_todos", [])
              if isinstance(new_todos, list):
 -                sync_todos_to_definition_of_done(dod, new_todos)
 +                sync_todos_to_definition_of_done(
 +                    dod,
 +                    new_todos,
 +                    project_root=self.context.project_root,
 +                )
 +            self._queue_todowrite_resume_nudge(dod=dod)
          else:
              pending_before = list(dod.pending_items)
              if advance_todos_from_tool_call(dod, tool_call):
 +                reconcile_aggregate_completion_steps(
 +                    dod,
 +                    project_root=self.context.project_root,
 +                )
                  self._queue_next_pending_todo_nudge(
                      tool_call=tool_call,
                      pending_before=pending_before,
                      dod=dod,
+                 )
 +            self._queue_bookkeeping_resume_nudge(
 +                tool_call=tool_call,
 +                dod=dod,
 +            )
 +            self._queue_missing_artifact_progress_nudge(
 +                tool_call=tool_call,
 +                dod=dod,
 +            )
 +            self._queue_planned_artifact_handoff_nudge(
 +                tool_call=tool_call,
 +                dod=dod,
 +            )
          self.dod_store.save(dod)
          recovery_context = self.context.recovery_context
          if recovery_context is not None:
          next_pending = next(
+             (
                  item
 -                for item in dod.pending_items
 +                for item in effective_pending_todo_items(
 +                    dod,
 +                    project_root=self.context.project_root,
 +                )
                  if item not in _TODO_NUDGE_EXCLUDED_ITEMS
              ),
              None,
          if not completed_label or not next_pending or next_pending == completed_label:
              return
 +        missing_artifact = _next_missing_planned_artifact(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
 +        if _should_prioritize_missing_artifact(
 +            next_pending=next_pending,
 +            missing_artifact=missing_artifact,
 +        ):
 +            self.context.queue_steering_message(
 +                f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
 +                f"`{tool_call.name}` result. One explicitly planned artifact is still missing."
 +                + _missing_artifact_resume_suffix(
 +                    missing_artifact,
 +                    project_root=self.context.project_root,
 +                )
 +                + " Do not switch into review or consistency-check mode until the missing artifact exists."
 +            )
 +            return
++
          mutation_suffix = ""
          if _todo_is_mutation_step(next_pending):
 -            mutation_suffix = (
 -                " You already have enough evidence for that step, so stop gathering "
 -                "more reference material and perform the change now."
 +            mutation_suffix = _missing_artifact_resume_suffix(
 +                missing_artifact,
 +                project_root=self.context.project_root,
+             )
 +            if not mutation_suffix:
 +                mutation_suffix = (
 +                    " You already have enough evidence for that step, so stop gathering "
 +                    "more reference material and perform the change now."
 +                )
          self.context.queue_steering_message(
              f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
              f"`{next_pending}` instead of rereading the same evidence.{mutation_suffix}"
+         )
 +    def _queue_planned_artifact_handoff_nudge(
 +        self,
 +        *,
 +        tool_call: ToolCall,
 +        dod: DefinitionOfDone,
 +    ) -> None:
 +        if not is_state_mutating_tool_call(tool_call):
 +            return
 +        if not all_planned_artifacts_exist(dod, project_root=self.context.project_root):
 +            return
++
 +        next_pending = next(
 +            (
 +                item
 +                for item in effective_pending_todo_items(
 +                    dod,
 +                    project_root=self.context.project_root,
 +                )
 +                if item not in _TODO_NUDGE_EXCLUDED_ITEMS
 +            ),
 +            None,
 +        )
 +        verification_commands = dod.verification_commands or derive_verification_commands(
 +            dod,
 +            project_root=self.context.project_root,
 +            task_statement=getattr(self.context.session, "current_task", "") or "",
 +            supplement_existing=True,
 +        )
++
 +        if next_pending and _todo_is_consistency_review_step(next_pending):
 +            verification_suffix = (
 +                " Move to verification once no specific mismatch remains."
 +                if verification_commands
 +                else " Avoid another full reread unless one specific inconsistency is still unknown."
 +            )
 +            self.context.queue_steering_message(
 +                "All explicitly planned artifacts now exist. "
 +                f"Continue with the next pending item: `{next_pending}`. "
 +                "Use the files already on disk as the source of truth instead of restarting "
 +                "discovery or inventing alternate filenames."
 +                + verification_suffix
 +            )
 +            return
++
 +        if verification_commands:
 +            self.context.queue_steering_message(
 +                "All explicitly planned artifacts now exist. "
 +                "Do not expand the artifact set or restart discovery unless a specific gap is "
 +                "still known. Move to verification or final confirmation using the files that "
 +                "already exist."
 +            )
++
 +    def _queue_missing_artifact_progress_nudge(
 +        self,
 +        *,
 +        tool_call: ToolCall,
 +        dod: DefinitionOfDone,
 +    ) -> None:
 +        if not is_state_mutating_tool_call(tool_call):
 +            return
 +        missing_artifact = _next_missing_planned_artifact(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
 +        if missing_artifact is None:
 +            return
++
 +        current_label = _current_mutation_label(tool_call)
 +        todo_refresh = _todo_refresh_guidance(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
 +        self.context.queue_steering_message(
 +            f"Confirmed progress: {current_label} is now recorded."
 +            " One explicitly planned artifact is still missing."
 +            + _missing_artifact_resume_suffix(
 +                missing_artifact,
 +                project_root=self.context.project_root,
 +            )
 +            + todo_refresh
 +            + " Do not move to verification, final confirmation, or TodoWrite-only "
 +            "bookkeeping until that artifact exists."
 +            + " Do not spend another turn on working notes or rediscovery alone."
 +        )
++
 +    def _queue_todowrite_resume_nudge(
 +        self,
 +        *,
 +        dod: DefinitionOfDone,
 +    ) -> None:
 +        missing_artifact = _next_missing_planned_artifact(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
 +        next_pending = next(
 +            (
 +                item
 +                for item in effective_pending_todo_items(
 +                    dod,
 +                    project_root=self.context.project_root,
 +                )
 +                if item not in _TODO_NUDGE_EXCLUDED_ITEMS
 +            ),
 +            None,
 +        )
 +        if missing_artifact is None:
 +            if next_pending and _todo_is_mutation_step(next_pending):
 +                self.context.queue_steering_message(
 +                    "Todo tracking is updated. Continue with the next pending item: "
 +                    f"`{next_pending}`. Use the current output files as the source of "
 +                    "truth, and do not reopen reference materials unless one specific "
 +                    "fact required for that step is still unknown. Perform the mutation "
 +                    "now instead of spending another turn on planning, rereads, or "
 +                    "verification."
 +                )
 +                return
++
 +            if (
 +                next_pending
 +                and _todo_is_consistency_review_step(next_pending)
 +                and not all_planned_artifacts_exist(
 +                    dod,
 +                    project_root=self.context.project_root,
 +                )
 +            ):
 +                self.context.queue_steering_message(
 +                    "Todo tracking is updated. Continue with the next pending item: "
 +                    f"`{next_pending}`. Use the current output files as the source of "
 +                    "truth, and do not reopen reference materials unless one specific "
 +                    "mismatch is still unknown."
 +                )
 +                return
++
 +            if not all_planned_artifacts_exist(dod, project_root=self.context.project_root):
 +                return
++
 +            verification_commands = dod.verification_commands or derive_verification_commands(
 +                dod,
 +                project_root=self.context.project_root,
 +                task_statement=getattr(self.context.session, "current_task", "") or "",
 +                supplement_existing=True,
 +            )
 +            if next_pending and _todo_is_consistency_review_step(next_pending):
 +                verification_suffix = (
 +                    " Move to verification once no specific mismatch remains."
 +                    if verification_commands
 +                    else " Finish the targeted consistency pass without reopening reference materials."
 +                )
 +                self.context.queue_steering_message(
 +                    "Todo tracking is updated. All explicitly planned artifacts now exist. "
 +                    f"Continue with the next pending item: `{next_pending}`. "
 +                    "Use the current output files as the source of truth, and do not restart "
 +                    "early discovery or reopen reference materials."
 +                    + verification_suffix
 +                )
 +                return
++
 +            verification_suffix = (
 +                " Move to verification or final confirmation using the files already on disk."
 +                if verification_commands
 +                else " Finish the task using the files already on disk."
 +            )
 +            self.context.queue_steering_message(
 +                "Todo tracking is updated. All explicitly planned artifacts now exist. "
 +                "Do not restart discovery, reopen reference materials, or spend another turn "
 +                "on TodoWrite alone."
 +                + verification_suffix
 +            )
 +            return
++
 +        todo_refresh = _todo_refresh_guidance(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
 +        next_pending_suffix = (
 +            f" Continue with the next pending item: `{next_pending}`."
 +            if next_pending
 +            else ""
 +        )
 +        self.context.queue_steering_message(
 +            "Todo tracking is updated. An explicitly planned artifact is still missing."
 +            + next_pending_suffix
 +            + _missing_artifact_resume_suffix(
 +                missing_artifact,
 +                project_root=self.context.project_root,
 +            )
 +            + todo_refresh
 +            + " Do not spend the next turn on TodoWrite alone, bookkeeping notes, "
 +            "verification, or final confirmation until that artifact exists."
 +        )
++
 +    def _queue_bookkeeping_resume_nudge(
 +        self,
 +        *,
 +        tool_call: ToolCall,
 +        dod: DefinitionOfDone,
 +    ) -> None:
 +        if tool_call.name not in _BOOKKEEPING_NOTE_TOOL_NAMES:
 +            return
++
 +        missing_artifact = _next_missing_planned_artifact(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
 +        if missing_artifact is None:
 +            return
++
 +        next_pending = next(
 +            (
 +                item
 +                for item in effective_pending_todo_items(
 +                    dod,
 +                    project_root=self.context.project_root,
 +                )
 +                if item not in _TODO_NUDGE_EXCLUDED_ITEMS
 +            ),
 +            None,
 +        )
 +        todo_refresh = _todo_refresh_guidance(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
 +        if (
 +            next_pending
 +            and not _todo_is_mutation_step(next_pending)
 +            and not _todo_is_consistency_review_step(next_pending)
 +        ):
 +            self.context.queue_steering_message(
 +                "Bookkeeping note is recorded. Continue with the next pending item: "
 +                f"`{next_pending}`. Make your next response one concrete evidence-gathering "
 +                "tool call that advances that step, not another bookkeeping-only turn."
 +                + todo_refresh
 +                + " Do not jump ahead to later artifact creation, verification, or final "
 +                "confirmation until that step is satisfied."
 +            )
 +            return
++
 +        self.context.queue_steering_message(
 +            "Bookkeeping note is recorded. An explicitly planned artifact is still missing."
 +            + _missing_artifact_resume_suffix(
 +                missing_artifact,
 +                project_root=self.context.project_root,
 +            )
 +            + todo_refresh
 +            + " Do not spend the next turn on additional notes, rediscovery, "
 +            "verification, or final confirmation until that artifact exists."
 +        )
++
++
 +def _todo_is_consistency_review_step(item: str) -> bool:
 +    text = item.lower()
 +    return any(hint in text for hint in _CONSISTENCY_REVIEW_HINTS)
++
++
 +def _should_prioritize_missing_artifact(
 +    *,
 +    next_pending: str | None,
 +    missing_artifact: tuple[Path, bool] | None,
 +) -> bool:
 +    if missing_artifact is None:
 +        return False
 +    if not next_pending:
 +        return True
 +    if _todo_is_consistency_review_step(next_pending):
 +        return True
 +    return not _todo_is_mutation_step(next_pending)
++
++
 +def _next_missing_planned_artifact(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +) -> tuple[Path, bool] | None:
 +    for target, expect_directory in collect_planned_artifact_targets(
 +        dod,
 +        project_root=project_root,
 +        max_paths=12,
 +    ):
 +        if not planned_artifact_target_satisfied(
 +            dod,
 +            target=target,
 +            expect_directory=expect_directory,
 +            project_root=project_root,
 +        ):
 +            return target, expect_directory
 +    return None
++
++
 +def _missing_artifact_resume_suffix(
 +    missing_artifact: tuple[Path, bool] | None,
 +    *,
 +    project_root: Path,
 +) -> str:
 +    if missing_artifact is None:
 +        return ""
++
 +    target, expect_directory = missing_artifact
 +    label = target.name or str(target)
 +    if expect_directory and not label.endswith("/"):
 +        label += "/"
 +    if expect_directory:
 +        next_output_file = infer_next_declared_html_output_file(
 +            target=target,
 +            project_root=project_root,
 +        )
 +        if next_output_file is not None:
 +            guidance = (
 +                f" Resume by creating `{next_output_file.name}` now. It is the next missing "
 +                f"declared output under `{label}`. Prefer one `write` call for "
 +                f"`{next_output_file}` instead of more rereads."
 +            )
 +            if not next_output_file.parent.exists():
 +                guidance += (
 +                    " The `write` tool can create that file's parent directories automatically,"
 +                    " so do the write in one step instead of stopping for a separate mkdir."
 +                )
 +            guidance += (
 +                " Make your next response the concrete mutation tool call itself, not another"
 +                " bookkeeping-only turn."
 +            )
 +            return guidance
 +        if target.is_dir():
 +            return (
 +                f" Resume by creating the next output file under `{label}` now. Prefer one "
 +                f"concrete `write` call for a file inside `{target}` instead of more rereads."
 +                " Make your next response the concrete mutation tool call itself, not another"
 +                " bookkeeping-only turn."
 +            )
 +        return (
 +            f" Resume by creating `{label}` now. Prefer one concrete directory-creation "
 +            f"step for `{target}` instead of more rereads."
 +        )
 +    guidance = (
 +        f" Resume by creating `{label}` now. Prefer one `write` call for `{target}` "
 +        "instead of more rereads."
 +    )
 +    if not target.parent.exists():
 +        guidance += (
 +            " The `write` tool can create that file's parent directories automatically,"
 +            " so do the write in one step instead of stopping for a separate mkdir."
 +        )
 +    guidance += (
 +        " Make your next response the concrete mutation tool call itself, not another"
 +        " bookkeeping-only turn."
 +    )
 +    return guidance
++
++
 +def _todo_refresh_guidance(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path | None = None,
 +) -> str:
 +    non_special_pending = [
 +        item
 +        for item in effective_pending_todo_items(dod, project_root=project_root)
 +        if item not in _TODO_NUDGE_EXCLUDED_ITEMS
 +    ]
 +    non_special_completed = [
 +        item for item in dod.completed_items if item not in _TODO_NUDGE_EXCLUDED_ITEMS
 +    ]
 +    if len(dod.touched_files) < 2 and (len(non_special_pending) + len(non_special_completed)) < 3:
 +        return ""
 +    return (
 +        " If the tracked steps no longer match the confirmed progress, refresh `TodoWrite` "
 +        "in the same response as the next concrete step instead of spending a full turn on "
 +        "bookkeeping alone."
 +    )
++
  def _mark_verification_stale(
      *,
      return f"{tool_call.name} changed the workspace"
 +def _current_mutation_label(tool_call: ToolCall) -> str:
 +    if tool_call.name in {"write", "edit", "patch"}:
 +        file_path = str(tool_call.arguments.get("file_path", "")).strip()
 +        if file_path:
 +            return f"`{Path(file_path).name or file_path}`"
 +    if tool_call.name == "bash":
 +        command = str(tool_call.arguments.get("command", "")).strip()
 +        if command:
 +            return f"`{command}`"
 +    return f"the successful `{tool_call.name}` result"
++
++
  def _tool_call_label(tool_call: ToolCall) -> str:
      """Human-readable label for one tool call."""
      name = tool_call.name

src/loader/runtime/turn_completion.pymodified

              actions_taken=actions_taken,
+         )
 -        final_message = Message(role=Role.ASSISTANT, content=response_content)
 -        self.context.session.append(final_message)
 -        summary.assistant_messages.append(final_message)
+-
          gate_result = await self.finalizer.run_definition_of_done_gate(
              dod=dod,
              candidate_response=final_response,
                  continuation_count=continuation_count,
+             )
          final_response = gate_result.final_response
 +        final_message = Message(role=Role.ASSISTANT, content=response_content)
 +        self.context.session.append(final_message)
 +        summary.assistant_messages.append(final_message)
          self._record_completion_decision(
              summary=summary,
              decision_code=gate_result.reason_code,

src/loader/runtime/turn_iteration.pymodified

                  extracted_iterations=extracted_iterations,
                  continuation_count=continuation_count,
                  consecutive_errors=consecutive_errors,
 +                dod=dod,
                  emit=emit,
                  summary=summary,
+             )
 +        reset_empty_retry_count = 0
          analysis = self.repairer.analyze_response(
              content=assistant_turn.content,
              return TurnIterationDecision(
                  action=TurnIterationAction.CONTINUE,
                  continuation_count=route_decision.continuation_count,
 -                empty_retry_count=empty_retry_count,
 +                empty_retry_count=reset_empty_retry_count,
                  extracted_iterations=extracted_iterations,
                  consecutive_errors=route_decision.consecutive_errors,
                  new_actions_taken=route_decision.new_actions_taken,
              return TurnIterationDecision(
                  action=TurnIterationAction.FINALIZE,
                  continuation_count=route_decision.continuation_count,
 -                empty_retry_count=empty_retry_count,
 +                empty_retry_count=reset_empty_retry_count,
                  extracted_iterations=extracted_iterations,
                  consecutive_errors=route_decision.consecutive_errors,
                  new_actions_taken=route_decision.new_actions_taken,
          return TurnIterationDecision(
              action=TurnIterationAction.COMPLETE,
              continuation_count=route_decision.continuation_count,
 -            empty_retry_count=empty_retry_count,
 +            empty_retry_count=reset_empty_retry_count,
              extracted_iterations=extracted_iterations,
              consecutive_errors=route_decision.consecutive_errors,
              new_actions_taken=route_decision.new_actions_taken,
          extracted_iterations: int,
          continuation_count: int,
          consecutive_errors: int,
 +        dod: DefinitionOfDone,
          emit: EventSink,
          summary: TurnSummary,
      ) -> TurnIterationDecision:
              original_task=original_task,
              empty_retry_count=next_empty_retry_count,
              max_empty_retries=max_empty_retries,
 +            dod=dod,
+         )
          if empty_decision.should_continue and empty_decision.retry_message:
              if empty_decision.reason_code and empty_decision.reason_summary:
+             )
          await emit(AgentEvent(type="response", content=final_response))
          return TurnIterationDecision(
 -            action=TurnIterationAction.COMPLETE,
 +            action=TurnIterationAction.FINALIZE,
              continuation_count=continuation_count,
              empty_retry_count=next_empty_retry_count,
              extracted_iterations=extracted_iterations,
              consecutive_errors=consecutive_errors,
 +            finalize_reason_code=empty_decision.reason_code,
 +            finalize_reason_summary=empty_decision.reason_summary,
+         )

src/loader/runtime/turn_loop.pymodified

      empty_retry_count: int = 0
      extracted_iterations: int = 0
      consecutive_errors: int = 0
 -    max_empty_retries: int = 5
 +    max_empty_retries: int = 2
      max_extracted_iterations: int = 3

src/loader/runtime/turn_preparation.pymodified

                  registry=self.context.registry,
                  rollback_plan=rollback_plan,
                  workspace_root=self.context.project_root,
 +                session=self.context.session,
              ),
+         )
          return executor, rollback_plan

src/loader/runtime/workflow.pymodified

  from ..llm.base import ToolCall
  from .clarify_grounding import ClarifyGrounding
 -from .dod import slugify
 +from .dod import (
 +    all_planned_artifacts_exist,
 +    collect_planned_artifact_targets,
 +    planned_artifact_target_satisfied,
 +    slugify,
 +)
  from .workflow_policy import (
      ArtifactEvidence,
      ArtifactEvidenceKind,
      "WorkflowTimelineEntryKind",
      "advance_todos_from_tool_call",
      "build_execute_bridge",
 +    "effective_pending_todo_items",
      "enrich_clarify_brief_with_grounding",
      "extract_verification_commands_from_markdown",
      "load_brief",
      "load_planning_artifacts",
      "merge_refreshed_todos_with_existing_scope",
      "preserve_task_grounded_acceptance_criteria",
 +    "reconcile_aggregate_completion_steps",
      "sync_todos_to_definition_of_done",
+ ]
+ )
  _MUTATION_STEP_HINTS = (
      "create",
 +    "creating",
      "update",
 +    "updating",
      "edit",
 +    "editing",
      "write",
 +    "writing",
      "fix",
 +    "fixing",
      "modify",
 +    "modifying",
      "change",
 +    "changing",
      "patch",
 +    "patching",
      "replace",
 +    "replacing",
      "correct",
 +    "correcting",
      "rewrite",
 +    "rewriting",
 +)
 +_CREATION_STEP_HINTS = (
 +    "create",
 +    "creating",
 +    "generate",
 +    "generating",
 +    "scaffold",
 +    "scaffolding",
+ )
  _VERIFY_STEP_HINTS = (
      "verify",
      "properly linked",
      "directory structure",
+ )
 +_ARTIFACT_SET_COMPLETION_HINTS = (
 +    "link",
 +    "links",
 +    "linked",
 +    "navigation",
 +    "consistency",
 +    "consistent",
 +    "formatted",
 +    "formatting",
 +    "review",
 +)
 +_TODO_FILE_CANDIDATE_PATTERN = re.compile(
 +    r"(?:[A-Za-z0-9_.-]+/)*[A-Za-z0-9_.-]+\.[A-Za-z0-9]+"
 +)
  _ACTIONABLE_STEP_VERBS = {
      "add",
      "apply",
              implementation_steps=list(self.implementation_steps),
+         )
 +    def with_file_changes(self, file_changes: list[str]) -> PlanningArtifacts:
 +        """Return one copy with a rewritten file-changes section."""
++
 +        normalized = [item.strip() for item in file_changes if item.strip()]
 +        if not normalized:
 +            return self
++
 +        return PlanningArtifacts(
 +            implementation_markdown=_replace_markdown_section_items(
 +                self.implementation_markdown,
 +                "File Changes",
 +                normalized,
 +            ),
 +            verification_markdown=self.verification_markdown,
 +            verification_commands=list(self.verification_commands),
 +            acceptance_criteria=list(self.acceptance_criteria),
 +            implementation_steps=list(self.implementation_steps),
 +        )
++
      def with_progress_context(
          self,
          *,
  def sync_todos_to_definition_of_done(
      dod,
      todos: list[dict[str, str]],
 +    *,
 +    project_root: Path | None = None,
  ) -> None:
      """Reflect todo state into DoD pending/completed items."""
              "Collect verification evidence",
+         }
+     ]
 +    existing_completed = {
 +        item.strip()
 +        for item in dod.completed_items
 +        if item.strip() and item not in _SPECIAL_TODO_ITEMS
 +    }
      pending: list[str] = []
      completed: list[str] = []
      for item in todos:
          status = str(item.get("status", "")).strip().lower()
 -        label = str(
 -            item.get("active_form") if status == "in_progress" else item.get("content", "")
 -        ).strip()
 -        if not label:
 +        content = str(item.get("content", "")).strip()
 +        active_form = str(item.get("active_form", "")).strip()
 +        label = active_form if status == "in_progress" else content
 +        if not label and not content:
 +            continue
 +        # Treat exact todo items as monotonic. If a successful tool call already
 +        # marked the same todo complete, a stale TodoWrite snapshot should not
 +        # regress it back to pending / in progress.
 +        if status != "completed" and (
 +            content in existing_completed or active_form in existing_completed
 +        ):
 +            completed.append(content or active_form or label)
              continue
          if status == "completed":
 -            completed.append(str(item.get("content", label)).strip())
 +            completed.append(content or label)
          else:
              pending.append(label)
      dod.pending_items = list(dict.fromkeys(pending + special_pending))
      dod.completed_items = list(dict.fromkeys(completed + special_completed))
 +    if project_root is not None:
 +        _reopen_aggregate_completion_steps_for_missing_artifacts(
 +            dod,
 +            project_root=project_root,
 +        )
 +        _reopen_directory_content_steps_for_incomplete_artifacts(
 +            dod,
 +            project_root=project_root,
 +        )
 +        dod.pending_items = effective_pending_todo_items(
 +            dod,
 +            project_root=project_root,
 +        )
++
++
 +def effective_pending_todo_items(
 +    dod,
 +    *,
 +    project_root: Path | None = None,
 +) -> list[str]:
 +    """Return pending todo items after filtering stale artifact-expansion drift."""
++
 +    pending_items = [item for item in dod.pending_items if item.strip()]
 +    if not pending_items or project_root is None or dod.status == "fixing":
 +        return pending_items
++
 +    planned_targets = collect_planned_artifact_targets(
 +        dod,
 +        project_root=project_root,
 +        max_paths=24,
 +    )
 +    if not planned_targets:
 +        return pending_items
 +    if not all_planned_artifacts_exist(dod, project_root=project_root, max_paths=24):
 +        return pending_items
++
 +    planned_files = {
 +        target.name.lower()
 +        for target, expect_directory in planned_targets
 +        if not expect_directory
 +    }
 +    if not planned_files:
 +        return pending_items
++
 +    filtered_items = [
 +        item
 +        for item in pending_items
 +        if not _todo_targets_unplanned_artifact(item, planned_files)
 +    ]
 +    filtered_items = [
 +        item
 +        for item in filtered_items
 +        if not _todo_describes_stale_creation_after_artifacts_exist(
 +            item,
 +            planned_files,
 +        )
 +    ]
 +    return [
 +        item
 +        for item in filtered_items
 +        if not _todo_describes_stale_discovery_after_artifacts_exist(item)
 +    ]
++
  def preserve_task_grounded_acceptance_criteria(
      task_statement: str,
      existing_pending_items: list[str],
      existing_completed_items: list[str],
      refreshed_steps: list[str],
 +    planned_files: set[str] | None = None,
  ) -> list[dict[str, str]]:
      """Merge one refreshed plan with task-grounded todo scope already in flight."""
              or _looks_actionable_refresh_step(item)
+         )
+     ]
 +    if planned_files:
 +        refreshed_candidates = [
 +            item
 +            for item in refreshed_candidates
 +            if not _todo_targets_unplanned_artifact(item, planned_files)
 +        ]
      todos: list[dict[str, str]] = []
      seen: set[str] = set()
          if _contains_any(text, _PARSE_STEP_HINTS) and ".html" in combined:
              score += 1
      elif name in {"glob", "grep"}:
 +        if not (
 +            _contains_any(text, _SEARCH_STEP_HINTS)
 +            or _contains_any(text, _READ_STEP_HINTS)
 +            or _contains_any(text, _PARSE_STEP_HINTS)
 +        ):
 +            return 0
          if _contains_any(text, _SEARCH_STEP_HINTS):
              score += 2
          if name == "glob" and _contains_any(text, _READ_STEP_HINTS) and ".html" in combined:
  def _todo_describes_aggregate_mutation(text: str) -> bool:
 -    return _contains_any(text, _AGGREGATE_TODO_HINTS) and _contains_any(
 +    return (
 +        _contains_any(text, _AGGREGATE_TODO_HINTS)
 +        or _todo_mentions_plural_output_set(text)
 +    ) and _contains_any(
          text,
          _MUTATION_STEP_HINTS,
+     )
 +def _todo_requires_complete_artifact_set(text: str) -> bool:
 +    return (
 +        _contains_any(text, _AGGREGATE_TODO_HINTS)
 +        or _todo_mentions_plural_output_set(text)
 +    ) and _contains_any(
 +        text,
 +        _ARTIFACT_SET_COMPLETION_HINTS,
 +    )
++
++
 +def _todo_mentions_plural_output_set(text: str) -> bool:
 +    if _TODO_FILE_CANDIDATE_PATTERN.search(text):
 +        return False
 +    return any(
 +        phrase in text
 +        for phrase in (
 +            "chapter files",
 +            "all chapters",
 +            "chapters",
 +            "files following",
 +            "files with",
 +            "output files",
 +            "artifacts",
 +            "documents",
 +            "sections",
 +            "pages",
 +        )
 +    )
++
++
 +def _todo_targets_unplanned_artifact(item: str, planned_files: set[str]) -> bool:
 +    if item in _SPECIAL_TODO_ITEMS:
 +        return False
++
 +    text = item.strip().lower()
 +    if not text or not _contains_any(text, _MUTATION_STEP_HINTS):
 +        return False
++
 +    candidates = {
 +        Path(match).name.lower()
 +        for match in _TODO_FILE_CANDIDATE_PATTERN.findall(text)
 +    }
 +    if not candidates:
 +        return False
++
 +    return candidates.isdisjoint(planned_files)
++
++
 +def _todo_describes_stale_discovery_after_artifacts_exist(item: str) -> bool:
 +    text = item.strip().lower()
 +    if not text or item in _SPECIAL_TODO_ITEMS:
 +        return False
 +    if _contains_any(text, _VERIFY_STEP_HINTS):
 +        return False
 +    if _contains_any(text, _MUTATION_STEP_HINTS):
 +        return False
 +    if _contains_any(text, _ARTIFACT_SET_COMPLETION_HINTS):
 +        return False
 +    return (
 +        _contains_any(text, _READ_STEP_HINTS)
 +        or _contains_any(text, _SEARCH_STEP_HINTS)
 +        or _contains_any(text, _PARSE_STEP_HINTS)
 +    )
++
++
 +def _todo_describes_stale_creation_after_artifacts_exist(
 +    item: str,
 +    planned_files: set[str],
 +) -> bool:
 +    text = item.strip().lower()
 +    if not text or item in _SPECIAL_TODO_ITEMS:
 +        return False
 +    if _contains_any(text, _VERIFY_STEP_HINTS):
 +        return False
 +    if not _contains_any(text, _CREATION_STEP_HINTS):
 +        return False
 +    candidates = {
 +        Path(match).name.lower()
 +        for match in _TODO_FILE_CANDIDATE_PATTERN.findall(text)
 +    }
 +    if not candidates:
 +        return False
 +    return not candidates.isdisjoint(planned_files)
++
++
 +def _todo_describes_directory_content_creation(
 +    item: str,
 +    directories: list[Path],
 +) -> bool:
 +    text = item.strip().lower()
 +    if not text or item in _SPECIAL_TODO_ITEMS:
 +        return False
 +    if not _contains_any(text, _CREATION_STEP_HINTS):
 +        return False
 +    if not any(
 +        token in text
 +        for token in (
 +            "file",
 +            "files",
 +            "chapter",
 +            "chapters",
 +            "page",
 +            "pages",
 +            "artifact",
 +            "artifacts",
 +            "content",
 +            "test",
 +            "tests",
 +        )
 +    ):
 +        return False
++
 +    for directory in directories:
 +        name = directory.name.lower()
 +        tokens = {name}
 +        if name.endswith("ies") and len(name) > 3:
 +            tokens.add(f"{name[:-3]}y")
 +        elif name.endswith("s") and len(name) > 3:
 +            tokens.add(name[:-1])
 +        if any(token in text for token in tokens):
 +            return True
 +    return False
++
++
 +def _reopen_aggregate_completion_steps_for_missing_artifacts(
 +    dod,
 +    *,
 +    project_root: Path,
 +) -> None:
 +    planned_targets = collect_planned_artifact_targets(
 +        dod,
 +        project_root=project_root,
 +        max_paths=12,
 +    )
 +    if not planned_targets:
 +        return
++
 +    if all_planned_artifacts_exist(dod, project_root=project_root, max_paths=12):
 +        return
++
 +    retained_completed: list[str] = []
 +    reopened_pending: list[str] = []
 +    for item in dod.completed_items:
 +        text = item.strip().lower()
 +        if item in _SPECIAL_TODO_ITEMS or not _todo_requires_complete_artifact_set(text):
 +            retained_completed.append(item)
 +            continue
 +        reopened_pending.append(item)
++
 +    if not reopened_pending:
 +        return
++
 +    dod.completed_items = retained_completed
 +    dod.pending_items = list(dict.fromkeys(dod.pending_items + reopened_pending))
++
++
 +def _reopen_directory_content_steps_for_incomplete_artifacts(
 +    dod,
 +    *,
 +    project_root: Path,
 +) -> None:
 +    planned_targets = collect_planned_artifact_targets(
 +        dod,
 +        project_root=project_root,
 +        max_paths=12,
 +    )
 +    if not planned_targets:
 +        return
++
 +    incomplete_directories = [
 +        target
 +        for target, expect_directory in planned_targets
 +        if expect_directory
 +        and not planned_artifact_target_satisfied(
 +            dod,
 +            target=target,
 +            expect_directory=True,
 +            project_root=project_root,
 +        )
 +    ]
 +    if not incomplete_directories:
 +        return
++
 +    retained_completed: list[str] = []
 +    reopened_pending: list[str] = []
 +    for item in dod.completed_items:
 +        if item in _SPECIAL_TODO_ITEMS:
 +            retained_completed.append(item)
 +            continue
 +        if _todo_describes_directory_content_creation(item, incomplete_directories):
 +            reopened_pending.append(item)
 +            continue
 +        retained_completed.append(item)
++
 +    if not reopened_pending:
 +        return
++
 +    dod.completed_items = retained_completed
 +    dod.pending_items = list(dict.fromkeys(dod.pending_items + reopened_pending))
++
++
 +def reconcile_aggregate_completion_steps(
 +    dod,
 +    *,
 +    project_root: Path | None,
 +) -> None:
 +    """Reopen aggregate completion steps when planned artifacts are still missing."""
++
 +    if project_root is None:
 +        return
 +    _reopen_aggregate_completion_steps_for_missing_artifacts(
 +        dod,
 +        project_root=project_root,
 +    )
++
++
  def _looks_like_search_command(command: str) -> bool:
      return any(token in command for token in (" ls", "ls ", "find ", "rg ", "grep ", "glob "))

src/loader/runtime/workflow_lanes.pymodified

      describe_clarify_stage,
+ )
  from .context import RuntimeContext
 -from .dod import DefinitionOfDone, DefinitionOfDoneStore
 +from .dod import DefinitionOfDone, DefinitionOfDoneStore, collect_planned_artifact_targets
  from .events import AgentEvent, TurnSummary
  from .executor import ToolExecutor
  from .workflow import (
                  refreshed_acceptance_criteria=list(artifacts.acceptance_criteria),
+             )
              artifacts = artifacts.with_acceptance_criteria(preserved_acceptance)
 +            preserved_file_changes = _preserved_file_change_items(
 +                dod,
 +                project_root=self.context.project_root,
 +            )
 +            if preserved_file_changes:
 +                artifacts = artifacts.with_file_changes(preserved_file_changes)
              artifacts = artifacts.with_progress_context(
                  touched_files=list(dod.touched_files),
                  completed_items=list(dod.completed_items),
          assert executor is not None
          if preserve_existing_scope:
 +            planned_files = _planned_file_names_for_refresh(
 +                dod,
 +                project_root=self.context.project_root,
 +            )
              todos = merge_refreshed_todos_with_existing_scope(
                  task_statement,
                  existing_pending_items=list(dod.pending_items),
                  existing_completed_items=list(dod.completed_items),
                  refreshed_steps=list(artifacts.implementation_steps[:8]),
 +                planned_files=planned_files,
+             )
          else:
              todos = [
          if outcome.registry_result is not None:
              new_todos = outcome.registry_result.metadata.get("new_todos", [])
              if isinstance(new_todos, list):
 -                sync_todos_to_definition_of_done(dod, new_todos)
 +                sync_todos_to_definition_of_done(
 +                    dod,
 +                    new_todos,
 +                    project_root=self.context.project_root,
 +                )
                  self.dod_store.save(dod)
      async def _run_clarify_round(
              decision_boundaries=list(brief.decision_boundaries),
              likely_touchpoints=list(brief.likely_touchpoints),
+         )
++
++
 +def _preserved_file_change_items(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +) -> list[str]:
 +    items: list[str] = []
 +    for target, expect_directory in collect_planned_artifact_targets(
 +        dod,
 +        project_root=project_root,
 +        max_paths=24,
 +    ):
 +        path_text = str(target)
 +        if expect_directory and not path_text.endswith("/"):
 +            path_text += "/"
 +        items.append(f"`{path_text}`")
 +    return items
++
++
 +def _planned_file_names_for_refresh(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +) -> set[str]:
 +    return {
 +        target.name.lower()
 +        for target, expect_directory in collect_planned_artifact_targets(
 +            dod,
 +            project_root=project_root,
 +            max_paths=24,
 +        )
 +        if not expect_directory
 +    }

src/loader/runtime/workflow_recovery.pymodified

      WorkflowRecoveryStrategy,
+ )
  from .context import RuntimeContext
 -from .dod import DefinitionOfDone
 +from .dod import DefinitionOfDone, collect_planned_artifact_targets
  from .events import AgentEvent, TurnSummary
  from .executor import ToolExecutor
  from .workflow import (
      def plan_freshness(self, dod: DefinitionOfDone) -> ArtifactFreshness:
          """Assess whether the persisted workflow artifacts are stale."""
 +        planned_artifacts_complete = not _first_missing_planned_artifact(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
          return self.artifact_invalidation.assess(
              task_statement=dod.task_statement,
              clarify_text=self._artifact_text(dod.clarify_brief),
              acceptance_criteria=list(dod.acceptance_criteria),
              touched_files=list(dod.touched_files),
              last_verification_result=dod.last_verification_result,
 +            retry_count=dod.retry_count,
 +            planned_artifacts_complete=planned_artifacts_complete,
+         )
      async def _run_plan_refresh_reentry(
              ),
              None,
+         )
 +        missing_artifact = _first_missing_planned_artifact(
 +            dod,
 +            project_root=self.context.project_root,
 +        )
 +        if _should_prioritize_missing_artifact(
 +            next_pending=next_pending,
 +            missing_artifact=missing_artifact,
 +        ):
 +            target, expect_directory = missing_artifact
 +            label = target.name or str(target)
 +            if expect_directory and not label.endswith("/"):
 +                label += "/"
 +            self.context.queue_steering_message(
 +                "Plan refresh preserved the progress already made. "
 +                "Reuse the existing files and confirmed facts, then resume by creating "
 +                f"`{label}`. Prefer one concrete mutation step for `{target}` before "
 +                "any more review or consistency-check work."
 +            )
 +            return True
          if next_pending:
              self.context.queue_steering_message(
                  "Plan refresh preserved the progress already made. "
      @staticmethod
      def _recovery_evidence_summary(freshness: ArtifactFreshness) -> list[str]:
          return list(freshness.evidence_summary)
++
++
 +def _first_missing_planned_artifact(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +) -> tuple[Path, bool] | None:
 +    for target, expect_directory in collect_planned_artifact_targets(
 +        dod,
 +        project_root=project_root,
 +        max_paths=12,
 +    ):
 +        exists = target.is_dir() if expect_directory else target.is_file()
 +        if not exists:
 +            return target, expect_directory
 +    return None
++
++
 +def _should_prioritize_missing_artifact(
 +    *,
 +    next_pending: str | None,
 +    missing_artifact: tuple[Path, bool] | None,
 +) -> bool:
 +    if missing_artifact is None:
 +        return False
 +    if not next_pending:
 +        return True
 +    lowered = next_pending.lower()
 +    if any(
 +        hint in lowered
 +        for hint in (
 +            "verify",
 +            "validation",
 +            "validate",
 +            "review",
 +            "consistent",
 +            "consistently",
 +            "linked",
 +            "format",
 +            "formatted",
 +        )
 +    ):
 +        return True
 +    return not any(
 +        hint in lowered
 +        for hint in (
 +            "create",
 +            "update",
 +            "edit",
 +            "write",
 +            "fix",
 +            "modify",
 +            "change",
 +            "patch",
 +            "replace",
 +            "correct",
 +            "rewrite",
 +        )
 +    )

src/loader/tools/workflow_tools.pymodified

          store_path = self._store_path()
          old_todos = await asyncio.to_thread(self._read_existing_items, store_path)
 +        items = self._merge_partial_update(old_todos, items)
          all_done = all(item.status == "completed" for item in items)
          persisted_items = [] if all_done else [item.to_dict() for item in items]
              metadata=payload,
+         )
 +    def _merge_partial_update(
 +        self,
 +        old_todos: list[dict[str, Any]],
 +        items: list[TodoItem],
 +    ) -> list[TodoItem]:
 +        """Preserve omitted todos when the model sends a narrow status update."""
++
 +        old_items = [TodoItem.from_dict(item) for item in old_todos if isinstance(item, dict)]
 +        if not old_items or len(items) >= len(old_items):
 +            return items
++
 +        old_by_content = {item.content: item for item in old_items if item.content}
 +        if not old_by_content:
 +            return items
 +        if not all(item.content in old_by_content for item in items):
 +            return items
++
 +        updates = {item.content: item for item in items}
 +        merged: list[TodoItem] = []
 +        for old_item in old_items:
 +            merged.append(updates.get(old_item.content, old_item))
 +        return merged
++
      def _store_path(self) -> Path:
          return active_todo_store_path(self.workspace_root or Path.cwd())

tests/test_artifact_invalidation.pymodified

      assert freshness.stale_plan is False
      assert freshness.stale_brief is False
      assert "touched_files_outside_plan" not in freshness.reason_codes
++
++
 +def test_artifact_invalidation_allows_supplemental_repair_files_after_failed_verification() -> None:
 +    assessor = ArtifactInvalidationAssessor()
++
 +    freshness = assessor.assess(
 +        task_statement="Build a multi-file nginx guide.",
 +        clarify_text=None,
 +        implementation_text=(
 +            "# Implementation Plan\n"
 +            "- Create index.html.\n"
 +            "- Create 01-getting-started.html.\n"
 +            "- Create 02-installation.html.\n"
 +        ),
 +        verification_text=(
 +            "# Verification Plan\n"
 +            "## Acceptance Criteria\n"
 +            "- index.html exists.\n"
 +            "- 01-getting-started.html exists.\n"
 +            "- 02-installation.html exists.\n"
 +        ),
 +        acceptance_criteria=[
 +            "index.html exists.",
 +            "01-getting-started.html exists.",
 +            "02-installation.html exists.",
 +        ],
 +        touched_files=[
 +            "/tmp/guides/nginx/index.html",
 +            "/tmp/guides/nginx/chapters/01-getting-started.html",
 +            "/tmp/guides/nginx/chapters/02-installation.html",
 +            "/tmp/guides/nginx/styles.css",
 +        ],
 +        last_verification_result="planned",
 +        retry_count=1,
 +        planned_artifacts_complete=True,
 +    )
++
 +    assert freshness.stale_plan is False
 +    assert freshness.stale_brief is False
 +    assert freshness.recovery_strategy == WorkflowRecoveryStrategy.NONE.value
 +    assert "touched_files_outside_plan" not in freshness.reason_codes
 +    assert any(
 +        item.kind == ArtifactEvidenceKind.CONFIRMED_TOUCHPOINT.value
 +        and "styles.css" in item.summary
 +        for item in freshness.evidence
 +    )

tests/test_compaction.pymodified

      assert "Confirmed facts:" in summary
      assert "02-basic-syntax.html -> 02-setup.html" in summary
 -    assert "02-setup.html = Chapter 2: Setting Up Fortran" in summary
 +    assert "02-setup.html = Chapter 2: Setting Up Fortran" not in summary
      assert "Preferred next step:" in summary
      assert "`~/Loader/guides/fortran/index.html`" in summary
 -def test_summarize_confirmed_facts_extracts_chapter_titles_from_read_results() -> None:
 +def test_summarize_confirmed_facts_ignores_reference_chapter_title_reads() -> None:
      messages = [
          Message(
              role=Role.ASSISTANT,
      confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
 -    assert confirmed_facts is not None
 -    assert "Chapter titles confirmed:" in confirmed_facts
 -    assert "01-introduction.html = Chapter 1: Introduction to Fortran" in confirmed_facts
 -    assert "02-setup.html = Chapter 2: Setting Up Fortran" in confirmed_facts
 +    assert confirmed_facts is None
  def test_infer_preferred_next_step_uses_confirmed_chapter_pairs() -> None:
          current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
+     )
 -    assert next_step == (
 -        "Update `/tmp/fortran/index.html` using the confirmed chapter file/title pairs "
 -        "instead of rereading files."
 -    )
 +    assert next_step is None
  def test_infer_preferred_next_step_uses_latest_verification_gap() -> None:
          current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
+     )
 -    assert confirmed_facts is not None
 -    assert "Verification gaps: missing TOC links chapters/05-control-structures.html" in confirmed_facts
 -    assert next_step == (
 -        "Update `/tmp/fortran/index.html` to fix the specific verification failures "
 -        "(missing TOC links chapters/05-control-structures.html, "
 -        "chapters/06-input-output.html) instead of restarting discovery."
 -    )
 +    assert confirmed_facts is None
 +    assert next_step is None
  def test_compact_session_messages_uses_single_continuation_instruction_block() -> None:

tests/test_dod.pymodified

  from loader.runtime.dod import (
      DefinitionOfDoneStore,
      VerificationEvidence,
 +    all_planned_artifacts_exist,
      begin_new_verification_attempt,
      build_verification_summary,
 +    collect_planned_artifact_targets,
      create_definition_of_done,
      derive_verification_commands,
      determine_task_size,
      assert commands == [f"test -f {external_index}"]
 +def test_derive_verification_commands_adds_generic_local_html_link_check(
 +    tmp_path: Path,
 +) -> None:
 +    docs = tmp_path / "docs"
 +    docs.mkdir()
 +    index = docs / "index.html"
 +    index.write_text('<a href="chapters/01-intro.html">Intro</a>\n')
++
 +    dod = create_definition_of_done("Create a small multi-page HTML guide.")
 +    dod.touched_files = [str(index)]
++
 +    commands = derive_verification_commands(
 +        dod,
 +        project_root=tmp_path,
 +        task_statement=dod.task_statement,
 +        supplement_existing=True,
 +    )
++
 +    assert any("Missing local HTML links:" in command for command in commands)
++
++
 +def test_derive_verification_commands_adds_planned_artifact_existence_checks(
 +    tmp_path: Path,
 +) -> None:
 +    implementation_plan = tmp_path / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                "- `docs/index.html`",
 +                "- `docs/chapters/01-intro.html`",
 +                "- `docs/chapters/02-installation.html`",
 +                "- `docs/chapters/`",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-page HTML guide.")
 +    dod.implementation_plan = str(implementation_plan)
++
 +    commands = derive_verification_commands(
 +        dod,
 +        project_root=tmp_path,
 +        task_statement=dod.task_statement,
 +        supplement_existing=True,
 +    )
++
 +    assert f"test -f {tmp_path / 'docs/index.html'}" in commands
 +    assert f"test -f {tmp_path / 'docs/chapters/01-intro.html'}" in commands
 +    assert f"test -f {tmp_path / 'docs/chapters/02-installation.html'}" in commands
 +    assert f"test -d {tmp_path / 'docs/chapters'}" in commands
++
++
 +def test_collect_planned_artifact_targets_ignores_prose_path_fragments_in_refreshed_plan(
 +    tmp_path: Path,
 +) -> None:
 +    implementation_plan = tmp_path / "implementation.md"
 +    touched_index = tmp_path / "external" / "guides" / "nginx" / "index.html"
 +    touched_index.parent.mkdir(parents=True)
 +    touched_index.write_text("<html></html>\n")
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                "- Created main index.html file with proper structure and navigation",
 +                "- Created the nginx guide directory structure (chapters/)",
 +                "- Created the first chapter file (01-introduction.html) with appropriate content",
 +                "",
 +                "## Confirmed Progress",
 +                f"- Already touched during execution: `{touched_index}`.",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create an external nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
++
 +    targets = collect_planned_artifact_targets(dod, project_root=tmp_path)
++
 +    assert (tmp_path / "chapters", True) not in targets
 +    assert (tmp_path / "01-introduction.html", False) not in targets
 +    assert targets == [(touched_index, False)]
++
++
 +def test_all_planned_artifacts_exist_requires_file_contents_for_planned_output_directory(
 +    tmp_path: Path,
 +) -> None:
 +    implementation_plan = tmp_path / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{tmp_path / 'guide' / 'index.html'}`",
 +                f"- `{tmp_path / 'guide' / 'chapters'}/` (directory for chapter files)",
 +                "",
 +                "## Execution Order",
 +                "- Create chapter files with appropriate content",
 +            ]
 +        )
 +    )
++
 +    guide_root = tmp_path / "guide"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir()
 +    chapters.mkdir()
 +    (guide_root / "index.html").write_text("<html></html>\n")
++
 +    dod = create_definition_of_done("Create a multi-file guide with chapters.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.completed_items = ["Create chapter files with appropriate content"]
++
 +    assert all_planned_artifacts_exist(dod, project_root=tmp_path) is False
++
 +    (chapters / "01-getting-started.html").write_text("<h1>Intro</h1>\n")
++
 +    assert all_planned_artifacts_exist(dod, project_root=tmp_path) is True
++
++
 +def test_all_planned_artifacts_exist_stays_false_while_touched_html_links_missing(
 +    tmp_path: Path,
 +) -> None:
 +    implementation_plan = tmp_path / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{tmp_path / 'guide' / 'index.html'}`",
 +                f"- `{tmp_path / 'guide' / 'chapters'}/` (directory for chapter files)",
 +                "",
 +                "## Execution Order",
 +                "- Create chapter files with appropriate content",
 +            ]
 +        )
 +    )
++
 +    guide_root = tmp_path / "guide"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir()
 +    chapters.mkdir()
 +    index = guide_root / "index.html"
 +    index.write_text(
 +        '<a href="chapters/01-introduction.html">Intro</a>\n'
 +        '<a href="chapters/02-setup.html">Setup</a>\n'
 +    )
 +    (chapters / "01-introduction.html").write_text("<h1>Intro</h1>\n")
++
 +    dod = create_definition_of_done("Create a multi-file guide with chapters.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files = [str(index), str(chapters / "01-introduction.html")]
 +    dod.completed_items = ["Create chapter files with appropriate content"]
++
 +    assert all_planned_artifacts_exist(dod, project_root=tmp_path) is False
++
 +    (chapters / "02-setup.html").write_text("<h1>Setup</h1>\n")
++
 +    assert all_planned_artifacts_exist(dod, project_root=tmp_path) is True
++
++
  def test_build_verification_summary_keeps_concrete_missing_link_details() -> None:
      summary = build_verification_summary(
+         [

tests/test_finalization.pymodified

  from loader.llm.base import Message, Role, ToolCall
  from loader.runtime.completion_trace import CompletionTraceEntry
  from loader.runtime.context import RuntimeContext
 -from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
 +from loader.runtime.dod import (
 +    DefinitionOfDoneStore,
 +    VerificationEvidence,
 +    create_definition_of_done,
 +)
  from loader.runtime.events import TurnSummary
  from loader.runtime.executor import ToolExecutionOutcome, ToolExecutionState
 -from loader.runtime.finalization import TurnFinalizer
 +from loader.runtime.finalization import (
 +    TurnFinalizer,
 +    _build_verification_repair_guidance,
 +)
  from loader.runtime.permissions import (
      PermissionMode,
      build_permission_policy,
+         )
 +class SelectiveRecordingExecutor:
 +    def __init__(self, failing_match: str) -> None:
 +        self.commands: list[str] = []
 +        self.failing_match = failing_match
++
 +    async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
 +        command = str(tool_call.arguments.get("command", ""))
 +        self.commands.append(command)
 +        failed = self.failing_match in command
 +        return tool_outcome(
 +            tool_call=tool_call,
 +            output="failed" if failed else "ok",
 +            is_error=failed,
 +            exit_code=1 if failed else 0,
 +            stdout="" if failed else "ok",
 +            stderr="failed" if failed else "",
 +        )
++
++
  def build_context(temp_dir: Path, session: FakeSession) -> RuntimeContext:
      registry = create_default_registry(temp_dir)
      registry.configure_workspace_root(temp_dir)
+     ]
 +def test_verification_repair_guidance_uses_existing_artifacts_as_source_of_truth(
 +    temp_dir: Path,
 +) -> None:
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    chapter_three = chapters / "03-first-website.html"
 +    chapter_four = chapters / "04-configuration-basics.html"
++
 +    for path in (index_path, chapter_one, chapter_two, chapter_three, chapter_four):
 +        path.write_text("<html></html>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                f"- `{chapter_three}`",
 +                f"- `{chapter_four}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Repair the nginx guide index.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.evidence = [
 +        VerificationEvidence(
 +            command="verify-links",
 +            passed=False,
 +            output=(
 +                "Missing local HTML links:\n"
 +                f"{index_path}:chapters/01-introduction.html -> {chapters / '01-introduction.html'}\n"
 +                f"{index_path}:chapters/04-server-blocks.html -> {chapters / '04-server-blocks.html'}\n"
 +            ),
 +        )
 +    ]
++
 +    guidance = _build_verification_repair_guidance(
 +        dod,
 +        project_root=temp_dir,
 +    )
++
 +    assert "Use the existing artifact files as the source of truth" in guidance
 +    assert str(chapter_one) in guidance
 +    assert str(chapter_two) in guidance
 +    assert str(chapter_four) in guidance
++
++
  @pytest.mark.asyncio
  async def test_turn_finalizer_records_skipped_verification_observation(
      temp_dir: Path,
          "verification was skipped because no mutating work required checks"
+     ]
      assert summary.verification_status == "skipped"
 +    assert "Complete the requested work" not in dod.pending_items
 +    assert "Complete the requested work" in dod.completed_items
      assert session.workflow_timeline[-1].kind == "verify_skip"
      assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
          VerificationObservationStatus.SKIPPED.value
+     ]
 +@pytest.mark.asyncio
 +async def test_turn_finalizer_blocks_completion_when_planned_artifacts_are_missing(
 +    temp_dir: Path,
 +) -> None:
 +    docs = temp_dir / "docs"
 +    chapters = docs / "chapters"
 +    chapters.mkdir(parents=True)
 +    index = docs / "index.html"
 +    first = chapters / "01-intro.html"
 +    second = chapters / "02-installation.html"
 +    index.write_text(
 +        "\n".join(
 +            [
 +                '<a href="chapters/01-intro.html">Intro</a>',
 +                '<a href="chapters/02-installation.html">Installation</a>',
 +            ]
 +        )
 +    )
 +    first.write_text("<h1>Intro</h1>\n")
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{index}`",
 +                f"- `{first}`",
 +                f"- `{second}`",
 +            ]
 +        )
 +    )
++
 +    session = FakeSession()
 +    context = build_context(temp_dir, session)
 +    finalizer = TurnFinalizer(
 +        context,
 +        RuntimeTracer(),
 +        DefinitionOfDoneStore(temp_dir),
 +        set_workflow_mode=_noop_set_workflow_mode,
 +    )
 +    dod = create_definition_of_done("Create a small multi-page HTML guide.")
 +    dod.mutating_actions.append("write")
 +    dod.touched_files.extend([str(index), str(first)])
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.verification_commands = [f"ls -la {docs}"]
 +    summary = TurnSummary(final_response="")
 +    executor = RecordingExecutor()
++
 +    async def capture(event) -> None:
 +        return None
++
 +    result = await finalizer.run_definition_of_done_gate(
 +        dod=dod,
 +        candidate_response="Finished the guide.",
 +        emit=capture,
 +        summary=summary,
 +        executor=executor,  # type: ignore[arg-type]
 +    )
++
 +    assert result.should_continue is True
 +    assert result.reason_code == "planned_artifacts_missing_continue"
 +    assert executor.commands == []
 +    assert dod.status == "draft"
 +    assert "Complete the requested work" in dod.pending_items
 +    assert "Complete the requested work" not in dod.completed_items
 +    assert session.messages[-1].content.startswith("[PLANNED ARTIFACTS STILL MISSING]")
 +    assert "`02-installation.html`" in session.messages[-1].content
++
++
  @pytest.mark.asyncio
  async def test_turn_finalizer_records_missing_verification_observation(
      temp_dir: Path,
      assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
 +@pytest.mark.asyncio
 +async def test_turn_finalizer_ignores_unplanned_expansion_pending_items_once_plan_exists(
 +    temp_dir: Path,
 +) -> None:
 +    session = FakeSession()
 +    context = build_context(temp_dir, session)
 +    finalizer = TurnFinalizer(
 +        context,
 +        RuntimeTracer(),
 +        DefinitionOfDoneStore(temp_dir),
 +        set_workflow_mode=_noop_set_workflow_mode,
 +    )
++
 +    docs = temp_dir / "guides" / "nginx"
 +    chapters = docs / "chapters"
 +    docs.mkdir(parents=True)
 +    chapters.mkdir()
 +    index = docs / "index.html"
 +    first = chapters / "01-getting-started.html"
 +    second = chapters / "02-installation.html"
 +    index.write_text("<html></html>\n")
 +    first.write_text("<h1>One</h1>\n")
 +    second.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{docs}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index}`",
 +                f"- `{first}`",
 +                f"- `{second}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a small multi-page HTML guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.pending_items = [
 +        "Create 07-performance-tuning.html",
 +        "Complete the requested work",
 +    ]
 +    summary = TurnSummary(final_response="")
++
 +    async def capture(event) -> None:
 +        return None
++
 +    result = await finalizer.run_definition_of_done_gate(
 +        dod=dod,
 +        candidate_response="Finished the guide.",
 +        emit=capture,
 +        summary=summary,
 +        executor=FakeExecutor([]),  # type: ignore[arg-type]
 +    )
++
 +    assert result.should_continue is False
 +    assert result.reason_code == "non_mutating_response_accepted"
++
++
 +@pytest.mark.asyncio
 +async def test_turn_finalizer_verification_failure_reentry_points_at_concrete_repair(
 +    temp_dir: Path,
 +    monkeypatch: pytest.MonkeyPatch,
 +) -> None:
 +    session = FakeSession()
 +    context = build_context(temp_dir, session)
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    finalizer = TurnFinalizer(
 +        context,
 +        RuntimeTracer(),
 +        DefinitionOfDoneStore(temp_dir),
 +        set_workflow_mode=_noop_set_workflow_mode,
 +    )
 +    broken_file = temp_dir / "guides" / "nginx" / "chapters" / "05-advanced-configurations.html"
 +    broken_file.parent.mkdir(parents=True, exist_ok=True)
 +    broken_file.write_text('<link rel="stylesheet" href="../styles.css">\n')
 +    missing_target = temp_dir / "guides" / "nginx" / "styles.css"
 +    dod = create_definition_of_done("Create the nginx guide.")
 +    dod.mutating_actions.append("write")
 +    dod.touched_files.append(str(broken_file))
 +    dod.verification_commands = ["python3 verify_links.py"]
 +    summary = TurnSummary(final_response="")
 +    verify_call = ToolCall(
 +        id="verify-1-1",
 +        name="bash",
 +        arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
 +    )
 +    failure_output = (
 +        "Missing local HTML links:\n"
 +        f"{broken_file}:../styles.css -> {missing_target}\n"
 +    )
++
 +    async def capture(event) -> None:
 +        return None
++
 +    monkeypatch.setattr(
 +        "loader.runtime.finalization.derive_verification_commands",
 +        lambda *args, **kwargs: [],
 +    )
++
 +    result = await finalizer.run_definition_of_done_gate(
 +        dod=dod,
 +        candidate_response="The guide is complete.",
 +        emit=capture,
 +        summary=summary,
 +        executor=FakeExecutor(
 +            [
 +                tool_outcome(
 +                    tool_call=verify_call,
 +                    output=failure_output,
 +                    is_error=True,
 +                    exit_code=1,
 +                    stdout=failure_output,
 +                )
 +            ]
 +        ),  # type: ignore[arg-type]
 +    )
++
 +    assert result.should_continue is True
 +    assert result.reason_code == "verification_failed_reentry"
 +    assert queued_messages
 +    assert str(broken_file) in queued_messages[-1]
 +    assert "../styles.css" in queued_messages[-1]
 +    assert str(missing_target) in queued_messages[-1]
 +    assert "Do not restart discovery or reread unrelated references." in queued_messages[-1]
 +    assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
 +    assert f"Immediate next step: edit `{broken_file}`." in session.messages[-1].content
 +    assert f"create `{missing_target}`" in session.messages[-1].content
 +    assert (
 +        "Do not reread unrelated reference materials or restart discovery"
 +        in session.messages[-1].content
 +    )
++
++
  @pytest.mark.asyncio
  async def test_turn_finalizer_does_not_reverify_without_new_changes(
      temp_dir: Path,

tests/test_permissions.pymodified

1036 lines changed — click to load

  import pytest
 -from loader.llm.base import ToolCall
 +from loader.llm.base import Message, Role, ToolCall
 +from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
  from loader.runtime.executor import ToolExecutionState, ToolExecutor
  from loader.runtime.hooks import (
 +    ActiveRepairMutationScopeHook,
 +    ActiveRepairScopeHook,
      BaseToolHook,
      FilePathAliasHook,
 -    HookDecision,
      HookContext,
 +    HookDecision,
      HookManager,
      HookResult,
 +    LateReferenceDriftHook,
 +    RelativePathContextHook,
      SearchPathAliasHook,
+ )
  from loader.runtime.permissions import (
      PermissionRuleSet,
      build_permission_policy,
+ )
 +from loader.runtime.safeguard_services import ActionTracker
  from loader.runtime.tracing import RuntimeTracer
  from loader.tools.base import create_default_registry
      assert result.updated_arguments is not None
      assert result.updated_arguments["path"] == str(chapters)
      assert result.updated_arguments["pattern"] == "*.html"
++
++
 +@pytest.mark.asyncio
 +async def test_relative_path_context_hook_remaps_workspace_mirror_of_external_root(
 +    temp_dir: Path,
 +) -> None:
 +    workspace_root = temp_dir / "workspace"
 +    workspace_root.mkdir()
 +    external_root = temp_dir / "external-home"
 +    external_fortran = external_root / "Loader" / "guides" / "fortran"
 +    external_fortran.mkdir(parents=True)
 +    (external_fortran / "index.html").write_text("<html></html>\n")
 +    (external_root / "Loader" / "guides").mkdir(exist_ok=True)
++
 +    registry = create_default_registry(workspace_root)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=workspace_root,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    action_tracker = ActionTracker()
 +    action_tracker.record_tool_call(
 +        "read",
 +        {"file_path": str(external_fortran / "index.html")},
 +    )
 +    hook = RelativePathContextHook(action_tracker, workspace_root)
++
 +    mirrored_workspace_path = workspace_root / "Loader" / "guides" / "nginx" / "index.html"
 +    expected_external_path = external_root / "Loader" / "guides" / "nginx" / "index.html"
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="write-1",
 +                name="write",
 +                arguments={
 +                    "file_path": str(mirrored_workspace_path),
 +                    "content": "<html></html>\n",
 +                },
 +            ),
 +            tool=registry.get("write"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.updated_arguments is not None
 +    assert Path(result.updated_arguments["file_path"]).resolve() == expected_external_path.resolve()
++
++
 +class FakeSession:
 +    def __init__(self, *, active_dod_path: str, messages: list[Message]) -> None:
 +        self.active_dod_path = active_dod_path
 +        self.messages = messages
++
++
 +@pytest.mark.asyncio
 +async def test_active_repair_scope_hook_blocks_reference_reads_while_fixing(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Repair the active artifact set")
 +    dod.status = "fixing"
 +    dod_path = dod_store.save(dod)
 +    repair_target = temp_dir / "guide" / "index.html"
 +    session = FakeSession(
 +        active_dod_path=str(dod_path),
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `chapters/01-introduction.html` in `{repair_target}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '01-introduction.html'}`; otherwise remove or replace `chapters/01-introduction.html`.\n"
 +                ),
 +            )
 +        ],
 +    )
 +    hook = ActiveRepairScopeHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="read-1",
 +                name="read",
 +                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
 +            ),
 +            tool=registry.get("read"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.DENY
 +    assert result.terminal_state == "blocked"
 +    assert result.message is not None
 +    assert "active repair scope" in result.message
 +    assert str(repair_target) in result.message
++
++
 +@pytest.mark.asyncio
 +async def test_active_repair_scope_hook_allows_reads_inside_active_artifact_set(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Repair the active artifact set")
 +    dod.status = "fixing"
 +    dod_path = dod_store.save(dod)
 +    repair_target = temp_dir / "guide" / "index.html"
 +    chapter_path = temp_dir / "guide" / "chapters" / "01-getting-started.html"
 +    session = FakeSession(
 +        active_dod_path=str(dod_path),
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `chapters/01-getting-started.html` in `{repair_target}`.\n"
 +                    f"- Fix the broken local reference `../styles.css` in `{chapter_path}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{chapter_path}`; otherwise remove or replace `chapters/01-getting-started.html`.\n"
 +                ),
 +            )
 +        ],
 +    )
 +    hook = ActiveRepairScopeHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="read-1",
 +                name="read",
 +                arguments={"file_path": str(chapter_path)},
 +            ),
 +            tool=registry.get("read"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.CONTINUE
++
++
 +@pytest.mark.asyncio
 +async def test_active_repair_scope_hook_allows_verification_source_outside_repair_target(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Repair the active artifact set")
 +    dod.status = "in_progress"
 +    dod_path = dod_store.save(dod)
 +    repair_target = temp_dir / "guide" / "chapters" / "06-troubleshooting.html"
 +    session = FakeSession(
 +        active_dod_path=str(dod_path),
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `01-introduction.html` in `{repair_target}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
 +                ),
 +            )
 +        ],
 +    )
 +    hook = ActiveRepairScopeHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="verify-1",
 +                name="read",
 +                arguments={"file_path": str(temp_dir / "guide" / "index.html")},
 +            ),
 +            tool=registry.get("read"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="verification",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.CONTINUE
++
++
 +@pytest.mark.asyncio
 +async def test_active_repair_scope_hook_blocks_local_rereads_outside_concrete_repair_files(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Repair the active artifact set")
 +    dod.status = "in_progress"
 +    dod_path = dod_store.save(dod)
 +    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
 +    stylesheet = temp_dir / "guide" / "styles.css"
 +    other_chapter = temp_dir / "guide" / "chapters" / "01-getting-started.html"
 +    session = FakeSession(
 +        active_dod_path=str(dod_path),
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
 +                    f"- Fix the broken local reference `../styles.css` in `{temp_dir / 'guide' / 'chapters' / '06-troubleshooting.html'}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
 +                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
 +                ),
 +            )
 +        ],
 +    )
 +    hook = ActiveRepairScopeHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="read-1",
 +                name="read",
 +                arguments={"file_path": str(other_chapter)},
 +            ),
 +            tool=registry.get("read"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.DENY
 +    assert result.terminal_state == "blocked"
 +    assert result.message is not None
 +    assert "active repair scope" in result.message
 +    assert str(repair_target) in result.message
 +    assert str(stylesheet) in result.message
++
++
 +@pytest.mark.asyncio
 +async def test_active_repair_scope_hook_allows_scoped_glob_within_active_artifact_roots(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Repair the active artifact set")
 +    dod.status = "in_progress"
 +    dod_path = dod_store.save(dod)
 +    repair_target = temp_dir / "guide" / "index.html"
 +    guide_root = temp_dir / "guide"
 +    session = FakeSession(
 +        active_dod_path=str(dod_path),
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `chapters/troubleshooting.html` in `{repair_target}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{guide_root / 'chapters' / 'troubleshooting.html'}`; otherwise remove or replace `chapters/troubleshooting.html`.\n"
 +                    "- Use the existing artifact files as the source of truth while repairing this file: "
 +                    f"`{guide_root / 'chapters' / 'introduction.html'}`, `{guide_root / 'chapters' / 'installation.html'}`, `{guide_root / 'chapters' / 'configuration.html'}`.\n"
 +                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
 +                ),
 +            )
 +        ],
 +    )
 +    hook = ActiveRepairScopeHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="glob-1",
 +                name="glob",
 +                arguments={
 +                    "path": str(temp_dir),
 +                    "pattern": "**/guide/chapters/*.html",
 +                },
 +            ),
 +            tool=registry.get("glob"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.CONTINUE
++
++
 +@pytest.mark.asyncio
 +async def test_active_repair_scope_hook_allows_declared_missing_sibling_reads(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Repair the active artifact set")
 +    dod.status = "in_progress"
 +    dod_path = dod_store.save(dod)
 +    guide_root = temp_dir / "guide"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    repair_target = guide_root / "index.html"
 +    existing_chapter = chapters / "overview.html"
 +    next_chapter = chapters / "installation.html"
 +    repair_target.write_text(
 +        "\n".join(
 +            [
 +                "<html>",
 +                '<a href="chapters/overview.html">Overview</a>',
 +                '<a href="chapters/installation.html">Installation</a>',
 +                "</html>",
 +            ]
 +        )
 +        + "\n"
 +    )
 +    existing_chapter.write_text("<h1>Overview</h1>\n")
++
 +    session = FakeSession(
 +        active_dod_path=str(dod_path),
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `chapters/overview.html` in `{repair_target}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{existing_chapter}`; otherwise remove or replace `chapters/overview.html`.\n"
 +                    "- Use the existing artifact files as the source of truth while repairing this file: "
 +                    f"`{existing_chapter}`.\n"
 +                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
 +                ),
 +            )
 +        ],
 +    )
 +    hook = ActiveRepairScopeHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="read-allowed-sibling",
 +                name="read",
 +                arguments={"file_path": str(next_chapter)},
 +            ),
 +            tool=registry.get("read"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.CONTINUE
++
++
 +@pytest.mark.asyncio
 +async def test_active_repair_scope_hook_blocks_reference_reads_during_in_progress_repair(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Repair the active artifact set")
 +    dod.status = "in_progress"
 +    dod_path = dod_store.save(dod)
 +    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
 +    session = FakeSession(
 +        active_dod_path=str(dod_path),
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
 +                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
 +                ),
 +            )
 +        ],
 +    )
 +    hook = ActiveRepairScopeHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="read-1",
 +                name="read",
 +                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
 +            ),
 +            tool=registry.get("read"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.DENY
 +    assert result.terminal_state == "blocked"
 +    assert result.message is not None
 +    assert "active repair scope" in result.message
++
++
 +@pytest.mark.asyncio
 +async def test_active_repair_mutation_scope_hook_blocks_writes_outside_named_repair_files(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Repair the active artifact set")
 +    dod.status = "in_progress"
 +    dod_path = dod_store.save(dod)
 +    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
 +    chapter_path = temp_dir / "guide" / "chapters" / "01-getting-started.html"
 +    session = FakeSession(
 +        active_dod_path=str(dod_path),
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
 +                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
 +                ),
 +            )
 +        ],
 +    )
 +    hook = ActiveRepairMutationScopeHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="edit-1",
 +                name="edit",
 +                arguments={"file_path": str(chapter_path), "old_string": "old", "new_string": "new"},
 +            ),
 +            tool=registry.get("edit"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.DENY
 +    assert result.terminal_state == "blocked"
 +    assert result.message is not None
 +    assert "active repair mutation scope" in result.message
 +    assert str(repair_target) in result.message
++
++
 +@pytest.mark.asyncio
 +async def test_active_repair_mutation_scope_hook_allows_expected_repair_file_writes(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Repair the active artifact set")
 +    dod.status = "in_progress"
 +    dod_path = dod_store.save(dod)
 +    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
 +    stylesheet = temp_dir / "guide" / "styles.css"
 +    session = FakeSession(
 +        active_dod_path=str(dod_path),
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
 +                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
 +                ),
 +            )
 +        ],
 +    )
 +    hook = ActiveRepairMutationScopeHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="write-1",
 +                name="write",
 +                arguments={"file_path": str(stylesheet), "content": "body { color: #222; }\n"},
 +            ),
 +            tool=registry.get("write"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.CONTINUE
++
++
 +@pytest.mark.asyncio
 +async def test_active_repair_mutation_scope_hook_allows_declared_missing_sibling_outputs(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Repair the active artifact set")
 +    dod.status = "in_progress"
 +    dod_path = dod_store.save(dod)
 +    guide_root = temp_dir / "guide"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    repair_target = guide_root / "index.html"
 +    existing_chapter = chapters / "01-introduction.html"
 +    next_chapter = chapters / "02-installation.html"
 +    repair_target.write_text(
 +        "\n".join(
 +            [
 +                "<html>",
 +                '<a href="chapters/01-introduction.html">Introduction</a>',
 +                '<a href="chapters/02-installation.html">Installation</a>',
 +                "</html>",
 +            ]
 +        )
 +        + "\n"
 +    )
 +    existing_chapter.write_text("<h1>Introduction</h1>\n")
++
 +    session = FakeSession(
 +        active_dod_path=str(dod_path),
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `chapters/01-introduction.html` in `{repair_target}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{existing_chapter}`; otherwise remove or replace `chapters/01-introduction.html`.\n"
 +                    "- Use the existing artifact files as the source of truth while repairing this file: "
 +                    f"`{existing_chapter}`.\n"
 +                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
 +                ),
 +            )
 +        ],
 +    )
 +    hook = ActiveRepairMutationScopeHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="write-2",
 +                name="write",
 +                arguments={"file_path": str(next_chapter), "content": "<h1>Installation</h1>\n"},
 +            ),
 +            tool=registry.get("write"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.CONTINUE
++
++
 +@pytest.mark.asyncio
 +async def test_active_repair_mutation_scope_hook_blocks_broad_mutating_bash(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Repair the active artifact set")
 +    dod.status = "in_progress"
 +    dod_path = dod_store.save(dod)
 +    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
 +    session = FakeSession(
 +        active_dod_path=str(dod_path),
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'styles.css'}`; otherwise remove or replace `../styles.css`.\n"
 +                    "- Do not reread unrelated reference materials or restart discovery while this concrete repair target is unresolved.\n"
 +                ),
 +            )
 +        ],
 +    )
 +    hook = ActiveRepairMutationScopeHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="bash-1",
 +                name="bash",
 +                arguments={"command": f"mkdir -p {temp_dir / 'guide' / 'assets'}"},
 +            ),
 +            tool=registry.get("bash"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.DENY
 +    assert result.terminal_state == "blocked"
 +    assert result.message is not None
 +    assert "active repair mutation scope" in result.message
 +    assert str(repair_target) in result.message
++
++
 +@pytest.mark.asyncio
 +async def test_late_reference_drift_hook_blocks_out_of_scope_reference_reads(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Create a multi-file guide from a reference")
 +    dod.status = "in_progress"
 +    plan_path = temp_dir / "implementation.md"
 +    plan_path.write_text(
 +        "# File Changes\n"
 +        "- `guide/index.html`\n"
 +        "- `guide/chapters/01-getting-started.html`\n"
 +        "- `guide/chapters/02-installation.html`\n"
 +        "- `guide/chapters/03-first-website.html`\n"
 +    )
 +    dod.implementation_plan = str(plan_path)
 +    dod_path = dod_store.save(dod)
 +    guide_dir = temp_dir / "guide" / "chapters"
 +    guide_dir.mkdir(parents=True, exist_ok=True)
 +    (temp_dir / "guide" / "index.html").write_text("index")
 +    (guide_dir / "01-getting-started.html").write_text("one")
 +    (guide_dir / "02-installation.html").write_text("two")
 +    session = FakeSession(active_dod_path=str(dod_path), messages=[])
 +    hook = LateReferenceDriftHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="read-1",
 +                name="read",
 +                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
 +            ),
 +            tool=registry.get("read"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.DENY
 +    assert result.terminal_state == "blocked"
 +    assert result.message is not None
 +    assert "late reference drift" in result.message
 +    assert "03-first-website.html" in result.message
++
++
 +@pytest.mark.asyncio
 +async def test_late_reference_drift_hook_allows_reads_inside_planned_artifact_set(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Create a multi-file guide from a reference")
 +    dod.status = "in_progress"
 +    plan_path = temp_dir / "implementation.md"
 +    plan_path.write_text(
 +        "# File Changes\n"
 +        "- `guide/index.html`\n"
 +        "- `guide/chapters/01-getting-started.html`\n"
 +        "- `guide/chapters/02-installation.html`\n"
 +        "- `guide/chapters/03-first-website.html`\n"
 +    )
 +    dod.implementation_plan = str(plan_path)
 +    dod_path = dod_store.save(dod)
 +    guide_dir = temp_dir / "guide" / "chapters"
 +    guide_dir.mkdir(parents=True, exist_ok=True)
 +    target = guide_dir / "02-installation.html"
 +    (temp_dir / "guide" / "index.html").write_text("index")
 +    (guide_dir / "01-getting-started.html").write_text("one")
 +    target.write_text("two")
 +    session = FakeSession(active_dod_path=str(dod_path), messages=[])
 +    hook = LateReferenceDriftHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="read-1",
 +                name="read",
 +                arguments={"file_path": str(target)},
 +            ),
 +            tool=registry.get("read"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.CONTINUE
++
++
 +@pytest.mark.asyncio
 +async def test_late_reference_drift_hook_blocks_reference_reads_after_artifacts_exist(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Create a multi-file guide from a reference")
 +    dod.status = "in_progress"
 +    plan_path = temp_dir / "implementation.md"
 +    plan_path.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{temp_dir / 'guide'}`",
 +                f"- `{temp_dir / 'guide' / 'chapters'}`",
 +                f"- `{temp_dir / 'guide' / 'index.html'}`",
 +                f"- `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`",
 +                f"- `{temp_dir / 'guide' / 'chapters' / '02-installation.html'}`",
 +                "",
 +            ]
 +        )
 +    )
 +    dod.implementation_plan = str(plan_path)
 +    guide_dir = temp_dir / "guide" / "chapters"
 +    guide_dir.mkdir(parents=True, exist_ok=True)
 +    (temp_dir / "guide" / "index.html").write_text("index")
 +    (guide_dir / "01-getting-started.html").write_text("one")
 +    (guide_dir / "02-installation.html").write_text("two")
 +    dod_path = dod_store.save(dod)
 +    session = FakeSession(active_dod_path=str(dod_path), messages=[])
 +    hook = LateReferenceDriftHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="read-1",
 +                name="read",
 +                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
 +            ),
 +            tool=registry.get("read"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.DENY
 +    assert result.terminal_state == "blocked"
 +    assert result.message is not None
 +    assert "completed artifact set scope" in result.message
 +    assert str(temp_dir / "guide") in result.message
++
++
 +@pytest.mark.asyncio
 +async def test_late_reference_drift_hook_does_not_treat_empty_output_dir_as_complete_artifact_set(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Create a multi-file guide from a reference")
 +    dod.status = "in_progress"
 +    dod.completed_items = ["Create chapter files with appropriate content"]
 +    plan_path = temp_dir / "implementation.md"
 +    plan_path.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{temp_dir / 'guide' / 'index.html'}`",
 +                f"- `{temp_dir / 'guide' / 'chapters'}/` (directory for chapter files)",
 +                "",
 +                "## Execution Order",
 +                "- Create chapter files with appropriate content",
 +            ]
 +        )
 +    )
 +    dod.implementation_plan = str(plan_path)
 +    guide_dir = temp_dir / "guide" / "chapters"
 +    guide_dir.mkdir(parents=True, exist_ok=True)
 +    (temp_dir / "guide" / "index.html").write_text("index")
 +    dod_path = dod_store.save(dod)
 +    session = FakeSession(active_dod_path=str(dod_path), messages=[])
 +    hook = LateReferenceDriftHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="read-1",
 +                name="read",
 +                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
 +            ),
 +            tool=registry.get("read"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.CONTINUE
++
++
 +@pytest.mark.asyncio
 +async def test_late_reference_drift_hook_does_not_block_when_html_outputs_still_link_to_missing_files(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    dod_store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Create a multi-file guide from a reference")
 +    dod.status = "in_progress"
 +    dod.completed_items = ["Create chapter files with appropriate content"]
 +    plan_path = temp_dir / "implementation.md"
 +    plan_path.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{temp_dir / 'guide' / 'index.html'}`",
 +                f"- `{temp_dir / 'guide' / 'chapters'}/` (directory for chapter files)",
 +                "",
 +                "## Execution Order",
 +                "- Create chapter files with appropriate content",
 +            ]
 +        )
 +    )
 +    dod.implementation_plan = str(plan_path)
 +    guide_dir = temp_dir / "guide"
 +    chapters = guide_dir / "chapters"
 +    chapters.mkdir(parents=True, exist_ok=True)
 +    index = guide_dir / "index.html"
 +    index.write_text(
 +        '<a href="chapters/01-getting-started.html">One</a>\n'
 +        '<a href="chapters/02-installation.html">Two</a>\n'
 +    )
 +    (chapters / "01-getting-started.html").write_text("one")
 +    dod.touched_files = [str(index), str(chapters / "01-getting-started.html")]
 +    dod_path = dod_store.save(dod)
 +    session = FakeSession(active_dod_path=str(dod_path), messages=[])
 +    hook = LateReferenceDriftHook(
 +        dod_store=dod_store,
 +        project_root=temp_dir,
 +        session=session,
 +    )
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="read-1",
 +                name="read",
 +                arguments={"file_path": str(temp_dir / "reference" / "index.html")},
 +            ),
 +            tool=registry.get("read"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.decision == HookDecision.CONTINUE

tests/test_repair.pymodified

  from loader.llm.base import ToolCall
  from loader.runtime.context import RuntimeContext
 +from loader.runtime.dod import create_definition_of_done
  from loader.runtime.permissions import (
      PermissionMode,
      build_permission_policy,
+     )
      assert analysis.failure == "raw-text tool recovery budget exhausted"
      assert "Let me know if you'd like me to continue" not in analysis.final_response
++
++
 +def test_empty_response_retry_message_surfaces_missing_planned_artifacts_and_working_note(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
 +                f"- `{temp_dir / 'guides' / 'nginx' / 'chapters' / '01-getting-started.html'}`",
 +                f"- `{temp_dir / 'guides' / 'nginx' / 'chapters' / '02-installation.html'}`",
 +                "",
 +            ]
 +        )
 +    )
 +    first_artifact = temp_dir / "guides" / "nginx" / "index.html"
 +    first_artifact.parent.mkdir(parents=True)
 +    first_artifact.write_text("<html></html>\n")
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.append(str(first_artifact))
 +    dod.completed_items.append("Create the main index.html file")
 +    dod.pending_items.append("Create each chapter file in sequence")
++
 +    context.session.append(
 +        SimpleNamespace(
 +            role="tool",
 +            content=(
 +                "Observation [notepad_write_working]: Result: "
 +                "- [2026-04-21T19:17:34Z] Creating fifth chapter file: Advanced configurations"
 +            ),
 +        )
 +    )
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=1,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.should_continue is True
 +    assert decision.retry_message is not None
 +    assert "Latest working note: Creating fifth chapter file: Advanced configurations" in decision.retry_message
 +    assert "Next missing planned artifact: `01-getting-started.html`" in decision.retry_message
 +    assert "Remaining planned artifacts: `01-getting-started.html`, `02-installation.html`" in decision.retry_message
 +    assert "Resume with this exact next step: create `01-getting-started.html`." in decision.retry_message
 +    assert f"Prefer one `write` call for `{temp_dir / 'guides' / 'nginx' / 'chapters' / '01-getting-started.html'}` before any more reference reads." in decision.retry_message
 +    assert (
 +        "Shape the next response as one concrete `write(file_path=..., content=...)` "
 +        "tool call for that exact path."
 +        in decision.retry_message
 +    )
 +    assert (
 +        "Your next response should be the concrete mutation tool call itself, "
 +        "not TodoWrite alone, verification, or a completion summary."
 +        in decision.retry_message
 +    )
 +    assert "Do not restart discovery unless one specific missing fact blocks this step." in decision.retry_message
++
++
 +def test_empty_response_retry_mentions_write_can_create_missing_parent_directories(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    index_path = guide_root / "index.html"
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{index_path}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.pending_items.extend(
 +        [
 +            "Create nginx guide directory structure",
 +            "Write main index.html for nginx guide",
 +        ]
 +    )
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=1,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.should_continue is True
 +    assert decision.retry_message is not None
 +    assert "Resume with this exact next step: create `index.html`." in decision.retry_message
 +    assert (
 +        "The `write` tool can create that file's parent directories automatically"
 +        in decision.retry_message
 +    )
 +    assert (
 +        "Shape the next response as one concrete `write(file_path=..., content=...)` "
 +        "tool call for that exact path."
 +        in decision.retry_message
 +    )
++
++
 +def test_empty_response_retry_respects_discovery_first_pending_step(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
 +                f"- `{temp_dir / 'guides' / 'nginx' / 'chapters'}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.pending_items.extend(
 +        [
 +            "First, examine the existing fortran guide structure and content to understand the format",
 +            "Create the nginx directory structure",
 +            "Develop the main index.html file for the nginx guide",
 +        ]
 +    )
++
 +    context.session.append(
 +        SimpleNamespace(
 +            role="tool",
 +            content=(
 +                "Observation [notepad_write_working]: Result: "
 +                "- [2026-04-22T22:42:18Z] Analyzing the fortran guide structure before creating nginx guide"
 +            ),
 +        )
 +    )
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=1,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.should_continue is True
 +    assert decision.retry_message is not None
 +    assert (
 +        "Resume with this exact next step: advance `First, examine the existing fortran guide structure and content to understand the format`."
 +        in decision.retry_message
 +    )
 +    assert "one concrete evidence-gathering tool call" in decision.retry_message
 +    assert "Resume with this exact next step: create `index.html`." not in decision.retry_message
++
++
 +def test_empty_response_retry_budget_extends_for_late_stage_multi_artifact_progress(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    chapter_three = chapters / "03-first-website.html"
 +    chapter_four = chapters / "04-configuration-basics.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
 +    chapter_three.write_text("<h1>Three</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                f"- `{chapter_three}`",
 +                f"- `{chapter_four}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.extend(
 +        [str(index_path), str(chapter_one), str(chapter_two), str(chapter_three)]
 +    )
 +    dod.completed_items.extend(
 +        [
 +            "Create the directory structure for the new nginx guide",
 +            "Create the main index.html file with proper structure",
 +        ]
 +    )
 +    dod.pending_items.append("Create each chapter file in sequence")
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=3,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.should_continue is True
 +    assert decision.retry_message is not None
 +    assert "retry 3/4" in decision.retry_message
 +    assert "Follow the same one-file-at-a-time mutation pattern" in decision.retry_message
++
++
 +def test_empty_response_retry_points_at_next_output_file_when_planned_directory_is_empty(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    index_path.write_text("<html></html>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.append(str(index_path))
 +    dod.pending_items.append("Write the introduction chapter")
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=1,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.should_continue is True
 +    assert decision.retry_message is not None
 +    assert "Next missing planned artifact: `chapters/`" in decision.retry_message
 +    assert (
 +        "Resume with this exact next step: continue `Write the introduction chapter` "
 +        "by creating the next output file under `chapters/`."
 +        in decision.retry_message
 +    )
 +    assert (
 +        f"Prefer one concrete `write` call for a file inside `{chapters}` before more research."
 +        in decision.retry_message
 +    )
++
++
 +def test_empty_response_retry_points_at_declared_child_file_within_incomplete_output_directory(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    index_path.write_text(
 +        "\n".join(
 +            [
 +                "<html>",
 +                '<a href="chapters/introduction.html">Introduction</a>',
 +                '<a href="chapters/installation.html">Installation</a>',
 +                "</html>",
 +            ]
 +        )
 +        + "\n"
 +    )
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.append(str(index_path))
 +    dod.pending_items.append("Write the introduction chapter")
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=1,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.should_continue is True
 +    assert decision.retry_message is not None
 +    assert "Next missing planned artifact: `chapters/`" in decision.retry_message
 +    assert "Next declared output under `chapters/`: `introduction.html`" in decision.retry_message
 +    assert (
 +        "Resume with this exact next step: continue `Write the introduction chapter` "
 +        "by creating `introduction.html`."
 +        in decision.retry_message
 +    )
 +    assert "It is the next missing declared output under `chapters/`." in decision.retry_message
 +    assert "Prefer one `write` call for `" in decision.retry_message
 +    assert "introduction.html` before more research." in decision.retry_message
++
++
 +def test_empty_response_retry_fails_after_extended_late_stage_budget_is_exhausted(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    chapter_three = chapters / "03-first-website.html"
 +    chapter_four = chapters / "04-configuration-basics.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
 +    chapter_three.write_text("<h1>Three</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                f"- `{chapter_three}`",
 +                f"- `{chapter_four}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.extend(
 +        [str(index_path), str(chapter_one), str(chapter_two), str(chapter_three)]
 +    )
 +    dod.completed_items.extend(
 +        [
 +            "Create the directory structure for the new nginx guide",
 +            "Create the main index.html file with proper structure",
 +        ]
 +    )
 +    dod.pending_items.append("Create each chapter file in sequence")
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=5,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.should_continue is False
 +    assert decision.final_response is not None
 +    assert "retrying 4 times" in decision.final_response
++
++
 +def test_empty_response_retry_mentions_todowrite_when_progress_has_outpaced_tracking(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root / 'index.html'}`",
 +                f"- `{chapters / '01-getting-started.html'}`",
 +                f"- `{chapters / '02-installation.html'}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.extend(
 +        [
 +            str(guide_root / "index.html"),
 +            str(chapters / "01-getting-started.html"),
 +        ]
 +    )
 +    dod.completed_items.extend(
 +        [
 +            "Create the directory structure for the new nginx guide",
 +            "Create the main index.html file with proper structure",
 +        ]
 +    )
 +    dod.pending_items.append("Create each chapter file in sequence")
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=1,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.retry_message is not None
 +    assert (
 +        "refresh `TodoWrite` alongside the next concrete mutation"
 +        in decision.retry_message
 +    )
++
++
 +def test_empty_response_retry_omits_stale_aggregate_completed_work_when_artifacts_missing(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    chapter_three = chapters / "03-first-website.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                f"- `{chapter_three}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.extend([str(index_path), str(chapter_one), str(chapter_two)])
 +    dod.completed_items.extend(
 +        [
 +            "Create the main index.html file with proper structure",
 +            "Link all chapters together properly",
 +        ]
 +    )
 +    dod.pending_items.append("Create each chapter file in sequence")
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=1,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.retry_message is not None
 +    assert "Link all chapters together properly" not in decision.retry_message
 +    assert "Create the main index.html file with proper structure" in decision.retry_message

tests/test_runtime_harness.pymodified

          if event.type == "steering" and event.content
+     ]
 -    assert any("TOC references chapter files that do not exist" in message for message in messages)
 -    assert any(
 -        "Use the current TOC target contents plus the verified sibling inventory" in message
 -        for message in steering_messages
 -    )
 -    assert any(str(index_file) in message for message in steering_messages)
 -    assert any(
 -        "chapters/05-input-output.html = Chapter 5: Input and Output" in message
 -        for message in steering_messages
 -    )
 -    assert any("<ul class=\"chapter-list\">" in message for message in steering_messages)
 -    assert any("Suggested replacement block:" in message for message in steering_messages)
 -    assert any("Do not rewrite the whole document." in message for message in steering_messages)
 -    assert any("set `old_string` to the current TOC block above exactly" in message for message in steering_messages)
 -    assert any("Suggested edit call:" in message for message in steering_messages)
 -    assert any('old_string="""' in message for message in steering_messages)
 -    assert any(
 -        '<li><a href="chapters/05-input-output.html">Chapter 5: Input and Output</a></li>' in message
 -        for message in steering_messages
 -    )
 +    assert any("Edited HTML links point to files that do not exist" in message for message in messages)
 +    assert steering_messages == []
  @pytest.mark.asyncio
      assert tool_event_names(run) == ["glob"]
      messages = tool_result_messages(run)
 -    assert any(
 -        "Verified chapter inventory: chapters/01-introduction.html = Chapter 1: Introduction to Fortran"
 -        in message
 -        for message in messages
 -    )
 -    assert any(
 -        "chapters/02-setup.html = Chapter 2: Setting Up Fortran" in message
 -        for message in messages
 -    )
 +    assert all("Verified chapter inventory:" not in message for message in messages)
  @pytest.mark.asyncio
+     )
      messages = tool_result_messages(run)
 -    assert any(
 -        "Verified chapter inventory: chapters/01-introduction.html = Chapter 1: Introduction to Fortran"
 -        in message
 -        for message in messages
 -    )
 -    assert any(
 -        "verified sibling chapter inventory"
 -        in message
 -        for message in messages
 -    )
 +    assert all("Verified chapter inventory:" not in message for message in messages)
 +    assert all("verified sibling chapter inventory" not in message for message in messages)
  @pytest.mark.asyncio
          if event.type == "steering" and event.content
+     ]
 -    assert any(
 -        "Semantic verification preview: validated 2 toc links in index.html"
 -        in message
 -        for message in messages
 -    )
 -    assert any(
 -        "already passed semantic link validation" in message
 +    assert all(
 +        "Semantic verification preview:" not in message
          for message in messages
+     )
 -    assert any(
 -        "already satisfies the verified link/title constraints" in message
 -        for message in steering_messages
 -    )
 -    assert any(
 -        "Do not reread" in message and "chapters" in message
 -        for message in steering_messages
 -    )
 -    assert "validated 2 toc links in index.html" in run.response
 +    assert steering_messages == []
 +    assert "updated index.html" in run.response.lower()
  @pytest.mark.asyncio
          if event.type == "steering" and event.content
+     ]
 -    assert any(
 -        "Semantic verification preview: validated 2 toc links in index.html"
 -        in message
 +    assert all(
 +        "Semantic verification preview:" not in message
          for message in messages
+     )
 -    assert any(
 -        "No TOC edit is required unless you can point to one specific incorrect href or title"
 -        in message
 -        for message in steering_messages
 -    )
 -    assert any(str(index_file) in message for message in steering_messages)
 +    assert steering_messages == []
      assert (
          sum(

tests/test_runtime_repair_flows.pymodified

+     )
 +@pytest.mark.asyncio
 +async def test_empty_response_retry_carries_forward_confirmed_progress(
 +    temp_dir: Path,
 +) -> None:
 +    target = temp_dir / "hello.py"
 +    backend = ScriptedBackend(
 +        completions=[
 +            CompletionResponse(
 +                content="I'll create the file now.",
 +                tool_calls=[
 +                    ToolCall(
 +                        id="write-1",
 +                        name="write",
 +                        arguments={
 +                            "file_path": str(target),
 +                            "content": "print('hello')\n",
 +                        },
 +                    )
 +                ],
 +            ),
 +            CompletionResponse(content=""),
 +            CompletionResponse(content="Recovered after the empty response."),
 +        ]
 +    )
++
 +    run = await run_scenario(
 +        "Create hello.py with a greeting.",
 +        backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
++
 +    assert "Recovered after the empty response." in run.response
 +    retry_messages = [
 +        message.content
 +        for message in backend.invocations[2].messages
 +        if message.role == Role.USER and "[EMPTY ASSISTANT RESPONSE]" in message.content
 +    ]
 +    assert retry_messages
 +    assert "retry 1/2" in retry_messages[0]
 +    assert "Continue from the confirmed progress below instead of restarting." in retry_messages[0]
 +    assert "hello.py" in retry_messages[0]
++
++
 +@pytest.mark.asyncio
 +async def test_empty_response_retry_budget_resets_after_successful_turn(
 +    temp_dir: Path,
 +) -> None:
 +    first = temp_dir / "one.txt"
 +    second = temp_dir / "two.txt"
 +    backend = ScriptedBackend(
 +        completions=[
 +            CompletionResponse(content=""),
 +            CompletionResponse(
 +                content="I'll create the first file now.",
 +                tool_calls=[
 +                    ToolCall(
 +                        id="write-1",
 +                        name="write",
 +                        arguments={
 +                            "file_path": str(first),
 +                            "content": "one\n",
 +                        },
 +                    )
 +                ],
 +            ),
 +            CompletionResponse(content=""),
 +            CompletionResponse(
 +                content="I'll create the second file now.",
 +                tool_calls=[
 +                    ToolCall(
 +                        id="write-2",
 +                        name="write",
 +                        arguments={
 +                            "file_path": str(second),
 +                            "content": "two\n",
 +                        },
 +                    )
 +                ],
 +            ),
 +            CompletionResponse(content="Both files are created."),
 +        ]
 +    )
++
 +    run = await run_scenario(
 +        "Create one.txt and two.txt.",
 +        backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
++
 +    assert run.response.startswith("Both files are created.")
 +    retry_messages: list[str] = []
 +    for invocation in backend.invocations:
 +        for message in invocation.messages:
 +            if message.role != Role.USER or "[EMPTY ASSISTANT RESPONSE]" not in message.content:
 +                continue
 +            if retry_messages and retry_messages[-1] == message.content:
 +                continue
 +            retry_messages.append(message.content)
 +    assert len(retry_messages) >= 2
 +    assert all("retry 2/2" not in message for message in retry_messages)
 +    assert sum("retry 1/2" in message for message in retry_messages) >= 2
++
++
  @pytest.mark.asyncio
  async def test_repeated_empty_responses_fail_honestly_after_one_retry(
      temp_dir: Path,
          completions=[
              CompletionResponse(content=""),
              CompletionResponse(content=""),
 +            CompletionResponse(content=""),
+         ]
+     )
      assert tool_event_names(run) == []
      assert run.response == (
 -        "I didn't get a usable response from the model after retrying once. "
 +        "I didn't get a usable response from the model after retrying 2 times. "
          "Please try again or switch to a different backend/model."
+     )
 -    assert len(backend.invocations) == 2
 -    assert [entry.kind for entry in run.agent.last_turn_summary.workflow_timeline[-2:]] == [
 +    assert len(backend.invocations) == 3
 +    assert [entry.kind for entry in run.agent.last_turn_summary.workflow_timeline[-3:]] == [
 +        "repair_retry",
          "repair_retry",
          "repair_fail",
+     ]
      assert run.agent.last_turn_summary.workflow_timeline[-1].reason_code == (
          "empty_response_retry_exhausted"
+     )
 +    assert run.agent.session.last_turn_transition_kind == "terminal"
 +    assert run.agent.session.last_turn_transition_reason_code == (
 +        "empty_response_retry_exhausted"
 +    )
  @pytest.mark.asyncio

tests/test_safeguard_services.pymodified

  from loader.runtime.semantic_rules.html_toc import (
      build_html_toc_edit_call_template,
      build_html_toc_replacement_block,
 -    build_validated_html_toc_observation_reason,
 -    build_verified_html_inventory_observation_reason,
      format_html_inventory_entry,
      task_targets_html_toc,
      validate_html_toc,
      assert str(file_path) in reason
 -def test_action_tracker_blocks_post_validation_html_rereads_until_new_mutation(tmp_path) -> None:
 -    tracker = ActionTracker()
 -    chapters = tmp_path / "chapters"
 -    chapters.mkdir()
 -    chapter_path = chapters / "01-introduction.html"
 -    chapter_path.write_text("<h1>Chapter 1: Introduction to Fortran</h1>\n")
 -    index_path = tmp_path / "index.html"
 -    index_path.write_text(
 -        '<ul class="chapter-list">\n'
 -        '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
 -        "</ul>\n"
 -    )
+-
 -    tracker.note_validated_html_toc(str(index_path))
+-
 -    assert tracker.check_tool_call("read", {"file_path": str(index_path)}) == (
 -        True,
 -        build_validated_html_toc_observation_reason(index_path),
 -    )
 -    assert tracker.check_tool_call("read", {"file_path": str(chapter_path)}) == (
 -        True,
 -        build_validated_html_toc_observation_reason(chapter_path),
 -    )
 -    assert tracker.check_tool_call(
 -        "glob",
 -        {"path": str(chapters), "pattern": "*.html"},
 -    ) == (
 -        True,
 -        build_validated_html_toc_observation_reason(chapters),
 -    )
 -    assert tracker.check_tool_call(
 -        "bash",
 -        {"command": f"cat {index_path}"},
 -    ) == (
 -        True,
 -        build_validated_html_toc_observation_reason(index_path),
 -    )
+-
 -    tracker.record_tool_call(
 -        "edit",
 -        {
 -            "file_path": str(index_path),
 -            "old_string": "Chapter 1",
 -            "new_string": "Chapter One",
 -        },
 -    )
+-
 -    assert tracker.check_tool_call("read", {"file_path": str(index_path)}) == (False, "")
+-
+-
 -def test_action_tracker_blocks_chapter_rereads_after_verified_inventory(tmp_path) -> None:
 -    tracker = ActionTracker()
 -    chapters = tmp_path / "chapters"
 -    chapters.mkdir()
 -    chapter_path = chapters / "01-introduction.html"
 -    chapter_path.write_text("<h1>Chapter 1: Introduction to Fortran</h1>\n")
 -    index_path = tmp_path / "index.html"
 -    index_path.write_text("<ul></ul>\n")
+-
 -    tracker.note_verified_html_inventory(str(index_path))
+-
 -    assert tracker.check_tool_call("read", {"file_path": str(index_path)}) == (False, "")
 -    assert tracker.check_tool_call("read", {"file_path": str(chapter_path)}) == (
 -        True,
 -        build_verified_html_inventory_observation_reason(chapter_path),
 -    )
 -    assert tracker.check_tool_call(
 -        "glob",
 -        {"path": str(chapters), "pattern": "*.html"},
 -    ) == (
 -        True,
 -        build_verified_html_inventory_observation_reason(chapters),
 -    )
 -    assert tracker.check_tool_call(
 -        "bash",
 -        {"command": f"head -20 {chapter_path}"},
 -    ) == (
 -        True,
 -        build_verified_html_inventory_observation_reason(chapter_path),
 -    )
+-
+-
  def test_action_tracker_allows_one_interleaved_reread_without_changes(tmp_path) -> None:
      tracker = ActionTracker()
      index_path = tmp_path / "index.html"
      is_duplicate, reason = tracker.check_tool_call("read", {"file_path": str(index_path)})
      assert is_duplicate is True
 -    assert "reuse that file/title evidence" in reason
 +    assert "reuse the earlier read result instead of rereading" in reason
  def test_action_tracker_blocks_repeated_chapter_directory_search_once_titles_are_known(
      search_args = {"pattern": "*.html", "path": str(chapters)}
      tracker.record_tool_call("glob", search_args)
 -    tracker.record_tool_call("read", {"file_path": str(chapters / "01-introduction.html")})
 -    tracker.record_tool_call("read", {"file_path": str(chapters / "02-setup.html")})
 -    tracker.record_tool_call("read", {"file_path": str(chapters / "03-basics.html")})
 +    tracker.record_tool_call("glob", search_args)
      is_duplicate, reason = tracker.check_tool_call("glob", search_args)
      assert is_duplicate is True
 -    assert "reuse that filename/title evidence" in reason
 +    assert "reuse the earlier search result instead of rerunning it" in reason
  def test_action_tracker_allows_repeated_read_after_mutation(tmp_path) -> None:
+     )
      assert result.valid is False
 -    assert result.reason == "Edited TOC references chapter files that do not exist"
 -    assert "chapters/05-input-output.html = Chapter 5: Input and Output" in result.suggestion
 +    assert result.reason == "Edited HTML links point to files that do not exist"
 +    assert "chapters/05-control-structures.html" in result.suggestion
  def test_pre_action_validator_blocks_index_edit_with_title_mismatch(tmp_path) -> None:
          },
+     )
 +    assert result.valid is True
++
++
 +def test_pre_action_validator_allows_chapter_write_with_future_target_declared_by_index(
 +    tmp_path: Path,
 +) -> None:
 +    validator = PreActionValidator()
 +    guide = tmp_path / "guide"
 +    chapters = guide / "chapters"
 +    chapters.mkdir(parents=True)
 +    (guide / "index.html").write_text(
 +        "\n".join(
 +            [
 +                '<a href="chapters/introduction.html">Introduction</a>',
 +                '<a href="chapters/installation.html">Installation</a>',
 +                "",
 +            ]
 +        )
 +    )
++
 +    result = validator.validate(
 +        "write",
 +        {
 +            "file_path": str(chapters / "introduction.html"),
 +            "content": '<a href="installation.html">Next</a>\n',
 +        },
 +    )
++
 +    assert result.valid is True
++
++
 +def test_pre_action_validator_blocks_chapter_write_with_undeclared_missing_sibling(
 +    tmp_path: Path,
 +) -> None:
 +    validator = PreActionValidator()
 +    guide = tmp_path / "guide"
 +    chapters = guide / "chapters"
 +    chapters.mkdir(parents=True)
 +    (guide / "index.html").write_text(
 +        "\n".join(
 +            [
 +                '<a href="chapters/introduction.html">Introduction</a>',
 +                '<a href="chapters/installation.html">Installation</a>',
 +                '<a href="chapters/configuration.html">Configuration</a>',
 +                '<a href="chapters/usage.html">Usage</a>',
 +                '<a href="chapters/troubleshooting.html">Troubleshooting</a>',
 +                "",
 +            ]
 +        )
 +    )
 +    (chapters / "introduction.html").write_text('<a href="installation.html">Next</a>\n')
 +    (chapters / "installation.html").write_text('<a href="configuration.html">Next</a>\n')
 +    (chapters / "configuration.html").write_text('<a href="usage.html">Next</a>\n')
++
 +    result = validator.validate(
 +        "write",
 +        {
 +            "file_path": str(chapters / "usage.html"),
 +            "content": '<a href="advanced.html">Next</a>\n',
 +        },
 +    )
++
      assert result.valid is False
 -    assert result.reason == "Edited TOC labels do not match the linked chapter titles"
      assert (
 -        "chapters/12-troubleshooting-tips.html = Chapter 12: Troubleshooting and Tips"
 -        in result.suggestion
 +        result.reason
 +        == "HTML page introduces new local targets outside the current declared artifact set"
+     )
 +    assert "advanced.html" in result.suggestion
++
++
 +def test_pre_action_validator_blocks_missing_numbered_read_with_existing_sibling(
 +    tmp_path: Path,
 +) -> None:
 +    validator = PreActionValidator()
 +    chapters = tmp_path / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
++
 +    result = validator.validate(
 +        "read",
 +        {"file_path": str(chapters / "01-introduction.html")},
 +    )
++
 +    assert result.valid is False
 +    assert result.reason == "Read target conflicts with an existing numbered sibling"
 +    assert "01-getting-started.html" in result.suggestion
++
++
 +def test_pre_action_validator_blocks_new_numbered_sibling_drift(tmp_path) -> None:
 +    validator = PreActionValidator()
 +    chapters = tmp_path / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
++
 +    result = validator.validate(
 +        "write",
 +        {
 +            "file_path": str(chapters / "01-intro.html"),
 +            "content": "<h1>Intro</h1>\n",
 +        },
 +    )
++
 +    assert result.valid is False
 +    assert result.reason == "New file conflicts with an existing numbered sibling"
 +    assert "01-getting-started.html" in result.suggestion
  def test_format_html_inventory_entry_handles_tmp_alias_paths() -> None:

tests/test_tool_batch_policies.pymodified

      assert "Prefer edit/write/patch on the target file" in follow_up.content
      assert "04-variables.html" in follow_up.content
      assert "02-basic-syntax.html -> 02-setup.html" in follow_up.content
 -    assert "02-setup.html = Chapter 2: Setting Up Fortran" in follow_up.content
 -    assert "/Users/mfwolffe/Loader/guides/fortran/index.html" in follow_up.content
      assert any(event.type == "recovery" for event in events)
      assert follow_up is not None
      assert "## LIKELY FILE CANDIDATES" in follow_up.content
      assert "`04-variables.html`" in follow_up.content
 -    assert "Chapter 4: Variables and Data Types" in follow_up.content
      assert "instead of retrying the missing path" in follow_up.content
      assert follow_up is not None
      assert "## CURRENT TARGET EXCERPT" in follow_up.content
 -    assert "Verified chapter inventory:" in follow_up.content
 -    assert "<ul class=\"chapter-list\">" in follow_up.content
 -    assert "chapters/02-setup.html = Chapter 2: Setting Up Your Environment" in follow_up.content
 -    assert "Suggested replacement block:" in follow_up.content
 -    assert '<li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>' in follow_up.content
 -    assert "Exact edit guidance:" in follow_up.content
 -    assert "old_string: use the Current TOC block above exactly" in follow_up.content
 -    assert "new_string: use the Suggested replacement block above exactly" in follow_up.content
 -    assert "Do not rewrite the whole file." in follow_up.content
 -    assert "Suggested edit call:" in follow_up.content
 -    assert 'old_string="""' in follow_up.content
 +    assert "- Target file:" in follow_up.content
 +    assert "index.html" in follow_up.content
 +    assert (
 +        "Closest on-disk block to the requested patch:" in follow_up.content
 +        or "Current file contents near the requested patch location:" in follow_up.content
 +    )
 +    assert '1 | <h2>Table of Contents</h2>' in follow_up.content
 +    assert (
 +        '3 |     <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>'
 +        in follow_up.content
 +    )
 +    assert "Use the exact on-disk text above" in follow_up.content
 +    assert "Verified chapter inventory:" not in follow_up.content
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_recovery_controller_includes_current_target_excerpt_for_edit_mismatch(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence should not run here")
++
 +    async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
 +        raise AssertionError("Verification should not run here")
++
 +    guide = temp_dir / "guide.md"
 +    guide.write_text(
 +        "# Loader Guide\n"
 +        "\n"
 +        "## Overview\n"
 +        "Loader helps agentic coding workflows.\n"
 +        "\n"
 +        "## Status\n"
 +        "The runtime is stable.\n"
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    context.session.current_task = "Update guide.md to mention the runtime is resilient."
 +    controller = ToolBatchRecoveryController(context)
 +    tool_call = ToolCall(
 +        id="edit-guide",
 +        name="edit",
 +        arguments={
 +            "file_path": str(guide),
 +            "old_string": "## Runtime\nThe runtime is stable.\n",
 +            "new_string": "## Runtime\nThe runtime is resilient.\n",
 +        },
 +    )
 +    outcome = tool_outcome(
 +        tool_call=tool_call,
 +        output="old_string not found in file. Make sure it matches exactly.",
 +        is_error=True,
 +    )
++
 +    follow_up = await controller.build_follow_up(
 +        tool_call=tool_call,
 +        outcome=outcome,
 +        emit=lambda event: _noop_emit(event),
 +    )
++
 +    assert follow_up is not None
 +    assert "## CURRENT TARGET EXCERPT" in follow_up.content
 +    assert "- Target file:" in follow_up.content
 +    assert "guide.md" in follow_up.content
 +    assert "Closest on-disk block to the requested edit:" in follow_up.content
 +    assert "6 | ## Status" in follow_up.content
 +    assert "7 | The runtime is stable." in follow_up.content
 +    assert "replace the containing block in one edit" in follow_up.content
  @pytest.mark.asyncio
      ) not in follow_up.content
 +@pytest.mark.asyncio
 +async def test_tool_batch_recovery_controller_prioritizes_active_verification_repair_target(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence should not run here")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run here")
++
 +    nginx_root = temp_dir / "Loader" / "guides" / "nginx"
 +    chapters = nginx_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index = nginx_root / "index.html"
 +    index.write_text(
 +        "<ul>\n"
 +        '  <li><a href="chapters/01-introduction.html">Introduction</a></li>\n'
 +        "</ul>\n"
 +    )
 +    (chapters / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
++
 +    repair_message = (
 +        "[DEFINITION OF DONE CHECK FAILED]\n"
 +        "Repair focus:\n"
 +        f"- Fix the broken local reference `chapters/01-introduction.html` in `{index}`.\n"
 +        f"- Immediate next step: edit `{index}`.\n"
 +        f"- If the broken reference should remain, create `{chapters / '01-introduction.html'}`; "
 +        "otherwise remove or replace `chapters/01-introduction.html`.\n"
 +        "- Do not reread unrelated reference materials or restart discovery while this "
 +        "concrete repair target is unresolved.\n"
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[
 +            Message(role=Role.USER, content=repair_message),
 +            Message(
 +                role=Role.TOOL,
 +                content=(
 +                    "Observation [glob]: Result: "
 +                    f"{chapters / '01-getting-started.html'}"
 +                ),
 +            ),
 +        ],
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    context.session.current_task = (  # type: ignore[attr-defined]
 +        "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
 +        "for the structure and cadence of the guide. We are going to make an all "
 +        "new equally thorough guide on how to use the nginx tool."
 +    )
 +    controller = ToolBatchRecoveryController(context)
 +    tool_call = ToolCall(
 +        id="read-bad-path",
 +        name="read",
 +        arguments={"path": "~/nginx-guide/chapter1.html"},
 +    )
 +    outcome = tool_outcome(
 +        tool_call=tool_call,
 +        output="File not found: ~/nginx-guide/chapter1.html",
 +        is_error=True,
 +    )
++
 +    follow_up = await controller.build_follow_up(
 +        tool_call=tool_call,
 +        outcome=outcome,
 +        emit=lambda event: _noop_emit(event),
 +    )
++
 +    assert follow_up is not None
 +    assert "## ACTIVE REPAIR TARGET" in follow_up.content
 +    assert str(index) in follow_up.content
 +    assert "chapters/01-introduction.html" in follow_up.content
 +    assert "Do not go back to the original reference guide" in follow_up.content
 +    assert "Current task: Have a look at ~/Loader/guides/fortran" not in follow_up.content
 +    assert "~/nginx-guide/chapter1.html" in follow_up.content
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_recovery_controller_reuses_context_for_related_missing_files(
      temp_dir: Path,
      assert "02-basic-syntax.html" in follow_up.content
 +@pytest.mark.asyncio
 +async def test_tool_batch_recovery_controller_uses_generic_loop_guidance(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence should not run here")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run here")
++
 +    existing = RecoveryContext(
 +        original_tool="read",
 +        original_args={"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
 +        max_retries=3,
 +    )
 +    existing.add_attempt(
 +        "read",
 +        {"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
 +        "File not found: ~/Loader/guides/nginx/chapters/01-introduction.html",
 +    )
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        recovery_context=existing,
 +    )
 +    controller = ToolBatchRecoveryController(context)
 +    tool_call = ToolCall(
 +        id="read-missing-repeat",
 +        name="read",
 +        arguments={"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
 +    )
 +    outcome = tool_outcome(
 +        tool_call=tool_call,
 +        output="File not found: ~/Loader/guides/nginx/chapters/01-introduction.html",
 +        is_error=True,
 +    )
 +    events: list[AgentEvent] = []
++
 +    async def emit(event: AgentEvent) -> None:
 +        events.append(event)
++
 +    follow_up = await controller.build_follow_up(
 +        tool_call=tool_call,
 +        outcome=outcome,
 +        emit=emit,
 +    )
++
 +    assert follow_up is not None
 +    assert any(event.type == "error" for event in events)
 +    error_event = next(event for event in events if event.type == "error")
 +    assert "read a config file first" not in error_event.content
 +    assert "verify the current result" in error_event.content
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_recovery_controller_resets_context_for_unrelated_failures(
      temp_dir: Path,

tests/test_tool_batches.pymodified

2354 lines changed — click to load

      ConfidenceLevel,
+ )
  from loader.runtime.recovery import RecoveryContext
 -from loader.runtime.tool_batches import ToolBatchRunner
 +from loader.runtime.tool_batches import (
 +    ToolBatchRunner,
 +)
 +from loader.runtime.tool_batches import (
 +    _should_prioritize_missing_artifact as tool_batches_should_prioritize_missing_artifact,
 +)
  from loader.runtime.workflow import sync_todos_to_definition_of_done
  from loader.tools.base import ToolResult as RegistryToolResult
  from loader.tools.base import create_default_registry
          verify_action=verify_action,
          auto_recover=False,
+     )
 +    (temp_dir / "chapters").mkdir()
 +    (temp_dir / "index.html").write_text("<ul></ul>\n")
 +    (temp_dir / "chapters" / "01-introduction.html").write_text("<h1>Intro</h1>\n")
 +    (temp_dir / "chapters" / "02-setup.html").write_text("<h1>Setup</h1>\n")
 +    (temp_dir / "chapters" / "03-basics.html").write_text("<h1>Basics</h1>\n")
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{temp_dir / 'index.html'}`",
 +                f"- `{temp_dir / 'chapters' / '01-introduction.html'}`",
 +                f"- `{temp_dir / 'chapters' / '02-setup.html'}`",
 +                f"- `{temp_dir / 'chapters' / '03-basics.html'}`",
 +                f"- `{temp_dir / 'chapters' / '04-variables.html'}`",
 +            ]
 +        )
 +    )
      context.session.current_task = (
          f"Update {temp_dir / 'index.html'} with the right chapter links."
+     )
+     )
      summary = TurnSummary(final_response="")
 +    dod = create_definition_of_done("Fix the chapter links")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.pending_items.append("Create the remaining chapter files")
      await runner.execute_batch(
          tool_calls=[tool_call],
          tool_source="assistant",
          pending_tool_calls_seen=set(),
          emit=_noop_emit,
          summary=summary,
 -        dod=create_definition_of_done("Fix the chapter links"),
 +        dod=dod,
          executor=executor,  # type: ignore[arg-type]
          on_confirmation=None,
          on_user_question=None,
      assert len(queued_messages) == 1
      assert "Reuse the earlier observation instead of repeating it." in queued_messages[0]
 -    assert "01-introduction.html = Chapter 1: Introduction to Fortran" in queued_messages[0]
 -    assert "index.html" in queued_messages[0]
 +    assert "Continue with the next pending item: `Create the remaining chapter files`." in queued_messages[0]
 +    assert "Resume by creating `04-variables.html` now." in queued_messages[0]
 +    assert f"Prefer one `write` call for `{temp_dir / 'chapters' / '04-variables.html'}` instead of more rereads." in queued_messages[0]
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_todo_write_does_not_regress_completed_file_todo(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run for this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create 03-first-website.html",
 +                "active_form": "Creating 03-first-website.html",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Create 04-configuration-basics.html",
 +                "active_form": "Creating 04-configuration-basics.html",
 +                "status": "pending",
 +            },
 +        ],
 +    )
++
 +    chapter_path = temp_dir / "guides" / "nginx" / "chapters" / "03-first-website.html"
 +    chapter_path.parent.mkdir(parents=True)
 +    write_call = ToolCall(
 +        id="write-ch3",
 +        name="write",
 +        arguments={"file_path": str(chapter_path), "content": "<html></html>\n"},
 +    )
 +    stale_todo_call = ToolCall(
 +        id="todo-stale",
 +        name="TodoWrite",
 +        arguments={
 +            "todos": [
 +                {
 +                    "content": "Create 03-first-website.html",
 +                    "active_form": "Creating 03-first-website.html",
 +                    "status": "pending",
 +                },
 +                {
 +                    "content": "Create 04-configuration-basics.html",
 +                    "active_form": "Creating 04-configuration-basics.html",
 +                    "status": "pending",
 +                },
 +            ]
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=write_call,
 +                output=f"Successfully wrote {chapter_path}",
 +                is_error=False,
 +            ),
 +            tool_outcome(
 +                tool_call=stale_todo_call,
 +                output="Todos updated",
 +                is_error=False,
 +                metadata={
 +                    "new_todos": [
 +                        {
 +                            "content": "Create 03-first-website.html",
 +                            "active_form": "Creating 03-first-website.html",
 +                            "status": "pending",
 +                        },
 +                        {
 +                            "content": "Create 04-configuration-basics.html",
 +                            "active_form": "Creating 04-configuration-basics.html",
 +                            "status": "pending",
 +                        },
 +                    ]
 +                },
 +            ),
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[write_call, stale_todo_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert "Create 03-first-website.html" in dod.completed_items
 +    assert "Create 03-first-website.html" not in dod.pending_items
 +    assert "Create 04-configuration-basics.html" in dod.pending_items
  @pytest.mark.asyncio
          consecutive_errors=0,
+     )
 -    assert len(queued_messages) == 1
 -    assert "verified sibling inventory" in queued_messages[0]
 -    assert "chapters/01-introduction.html = Chapter 1: Introduction to Fortran" in queued_messages[0]
 -    assert str(temp_dir / "index.html") in queued_messages[0]
 +    assert queued_messages == []
      assert len(summary.tool_result_messages) == 1
 -    assert (
 -        "Verified chapter inventory: chapters/01-introduction.html = Chapter 1: Introduction to Fortran"
 -        in summary.tool_result_messages[0].content
 -    )
 +    assert "Verified chapter inventory:" not in summary.tool_result_messages[0].content
  @pytest.mark.asyncio
          consecutive_errors=0,
+     )
 -    assert any(
 -        "Semantic verification preview: validated 2 toc links in index.html"
 -        in message.content
 +    assert all(
 +        "Semantic verification preview:" not in message.content
          for message in summary.tool_result_messages
+     )
 -    assert len(queued_messages) == 1
 -    assert "already satisfies the verified link/title constraints" in queued_messages[0]
 -    assert f"`{index_path}`" in queued_messages[0]
 -    assert f"`{chapters}`" in queued_messages[0]
 +    assert queued_messages == []
  @pytest.mark.asyncio
  @pytest.mark.asyncio
 -async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
 +async def test_tool_batch_runner_duplicate_read_ignores_unplanned_expansion_after_plan_complete(
      temp_dir: Path,
  ) -> None:
      async def assess_confidence(
          tool_args: dict,
          context: str,
      ) -> ConfidenceAssessment:
 -        raise AssertionError("Confidence scoring should be disabled in this scenario")
 +        raise AssertionError("Confidence scoring should not run for this scenario")
      async def verify_action(
          tool_name: str,
      ) -> ActionVerification:
          raise AssertionError("Verification should not run for this scenario")
 -    reference = temp_dir / "fortran" / "index.html"
 -    reference.parent.mkdir(parents=True)
 -    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
      context = build_context(
          temp_dir=temp_dir,
      context.queue_steering_message_callback = queued_messages.append
      runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      dod = create_definition_of_done("Create a multi-file nginx guide.")
 -    sync_todos_to_definition_of_done(
 -        dod,
 -        [
 -            {
 -                "content": "Examine the existing Fortran guide structure to understand the cadence and format",
 -                "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
 -                "status": "pending",
 -            },
 -            {
 -                "content": "Create the nginx index.html file",
 -                "active_form": "Working on: Create the nginx index.html file",
 -                "status": "pending",
 -            },
 -        ],
 -    )
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.pending_items = [
 +        "Create 07-performance-tuning.html",
 +        "Verify all guide files are linked and complete",
 +        "Complete the requested work",
 +    ]
++
      tool_call = ToolCall(
 -        id="read-reference",
 +        id="read-dup",
          name="read",
 -        arguments={"file_path": str(reference)},
 +        arguments={"file_path": str(chapter_one)},
 +    )
 +    duplicate_message = (
 +        "[Skipped - duplicate action: Already read "
 +        f"{chapter_one} recently without any intervening changes; "
 +        "reuse the earlier read result instead of rereading]"
+     )
      executor = FakeExecutor(
+         [
 -            tool_outcome(
 +            ToolExecutionOutcome(
                  tool_call=tool_call,
 -                output="<h1>Fortran Beginner's Guide</h1>\n",
 +                state=ToolExecutionState.DUPLICATE,
 +                message=Message.tool_result_message(
 +                    tool_call_id=tool_call.id,
 +                    display_content=duplicate_message,
 +                    result_content=duplicate_message,
 +                ),
 +                event_content=duplicate_message,
                  is_error=False,
 +                result_output=duplicate_message,
+             )
+         ]
+     )
          consecutive_errors=0,
+     )
 -    assert any(
 -        "Continue with the next pending item: `Create the nginx index.html file`"
 -        in message
 -        for message in queued_messages
 -    )
 -    assert any(
 -        "stop gathering more reference material and perform the change now" in message
 -        for message in queued_messages
 -    )
 +    assert len(queued_messages) == 1
 +    assert "Verify all guide files are linked and complete" in queued_messages[0]
 +    assert "Create 07-performance-tuning.html" not in queued_messages[0]
  @pytest.mark.asyncio
 -async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_valid(
 +async def test_tool_batch_runner_duplicate_read_after_plan_complete_pushes_verification_handoff(
      temp_dir: Path,
  ) -> None:
      async def assess_confidence(
          tool_args: dict,
          context: str,
      ) -> ConfidenceAssessment:
 -        raise AssertionError("Confidence scoring should not run in this scenario")
 +        raise AssertionError("Confidence scoring should not run for this scenario")
      async def verify_action(
          tool_name: str,
          result: str,
          expected: str = "",
      ) -> ActionVerification:
 -        raise AssertionError("Verification should not run in this scenario")
 +        raise AssertionError("Verification should not run for this scenario")
 -    prompt = (
 -        "Have a look at ~/Loader/guides/fortran/index.html, then "
 -        "~/Loader/guides/fortran/chapters. The table of contents links in "
 -        "index.html are inaccurate and the href’s are wrong. Let’s update the "
 -        "links and their link texts to be correct."
 -    )
 -    chapters = temp_dir / "chapters"
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
      chapters.mkdir()
 -    (chapters / "01-introduction.html").write_text(
 -        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 -    )
 -    (chapters / "02-setup.html").write_text(
 -        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
 -    )
 -    current_block = (
 -        "<h2>Table of Contents</h2>\n"
 -        '        <ul class="chapter-list">\n'
 -        '            <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
 -        '            <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
 -        "        </ul>\n"
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
+     )
 -    index_path = temp_dir / "index.html"
 -    index_path.write_text(current_block)
      context = build_context(
          temp_dir=temp_dir,
          verify_action=verify_action,
          auto_recover=False,
+     )
 -    context.session.current_task = prompt  # type: ignore[attr-defined]
      queued_messages: list[str] = []
      context.queue_steering_message_callback = queued_messages.append
      runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.verification_commands = [f"ls -la {guide_root}"]
 +    dod.pending_items = [
 +        "Create 07-performance-tuning.html",
 +        "Complete the requested work",
 +    ]
++
      tool_call = ToolCall(
 -        id="edit-1",
 -        name="edit",
 -        arguments={
 -            "file_path": str(index_path),
 -            "old_string": current_block,
 -            "new_string": current_block,
 -        },
 +        id="read-dup",
 +        name="read",
 +        arguments={"file_path": str(chapter_one)},
 +    )
 +    duplicate_message = (
 +        "[Skipped - duplicate action: Already read "
 +        f"{chapter_one} recently without any intervening changes; "
 +        "reuse the earlier read result instead of rereading]"
+     )
      executor = FakeExecutor(
+         [
 -            tool_outcome(
 +            ToolExecutionOutcome(
                  tool_call=tool_call,
 -                output=(
 -                    "[Blocked - old_string and new_string are identical - no change "
 -                    "would occur] Suggestion: Provide different old and new strings"
 +                state=ToolExecutionState.DUPLICATE,
 +                message=Message.tool_result_message(
 +                    tool_call_id=tool_call.id,
 +                    display_content=duplicate_message,
 +                    result_content=duplicate_message,
                  ),
 -                is_error=True,
 -                state=ToolExecutionState.BLOCKED,
 +                event_content=duplicate_message,
 +                is_error=False,
 +                result_output=duplicate_message,
+             )
+         ]
+     )
 +    summary = TurnSummary(final_response="")
      await runner.execute_batch(
          tool_calls=[tool_call],
          tool_source="assistant",
          pending_tool_calls_seen=set(),
          emit=_noop_emit,
 -        summary=TurnSummary(final_response=""),
 -        dod=create_definition_of_done(prompt),
 +        summary=summary,
 +        dod=dod,
          executor=executor,  # type: ignore[arg-type]
          on_confirmation=None,
          on_user_question=None,
+     )
      assert len(queued_messages) == 1
 -    assert "already matches the validated replacement block" in queued_messages[0]
 -    assert "validated 2 linked entries" in queued_messages[0]
 -    assert f"`{index_path}`" in queued_messages[0]
 -    assert "Do not call `edit`, `patch`, or reread the same TOC again" in queued_messages[0]
+-
+-
 -async def _noop_emit(event: AgentEvent) -> None:
 -    return None
 +    assert "All explicitly planned artifacts already exist." in queued_messages[0]
 +    assert "Move to verification or final confirmation using the files already on disk." in queued_messages[0]
 +    assert "Create 07-performance-tuning.html" not in queued_messages[0]
  @pytest.mark.asyncio
 -async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
 +async def test_tool_batch_runner_duplicate_read_after_plan_complete_ignores_stale_creation_todos(
      temp_dir: Path,
  ) -> None:
      async def assess_confidence(
          tool_args: dict,
          context: str,
      ) -> ConfidenceAssessment:
 -        raise AssertionError("Confidence scoring should be disabled in this scenario")
 +        raise AssertionError("Confidence scoring should not run for this scenario")
      async def verify_action(
          tool_name: str,
      ) -> ActionVerification:
          raise AssertionError("Verification should not run for this scenario")
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
      context = build_context(
          temp_dir=temp_dir,
          messages=[],
          safeguards=FakeSafeguards(),
          assess_confidence=assess_confidence,
          verify_action=verify_action,
 +        auto_recover=False,
+     )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
      runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.verification_commands = [f"ls -la {guide_root}"]
 +    dod.pending_items = [
 +        "Create 01-getting-started.html",
 +        "Creating 02-installation.html",
 +        "Complete the requested work",
 +    ]
++
      tool_call = ToolCall(
 -        id="write-1",
 -        name="write",
 -        arguments={"file_path": str(temp_dir / "README.md"), "content": "updated\n"},
 +        id="read-dup-built-stale",
 +        name="read",
 +        arguments={"file_path": str(chapter_one)},
 +    )
 +    duplicate_message = (
 +        "[Skipped - duplicate action: Already read "
 +        f"{chapter_one} recently without any intervening changes; "
 +        "reuse the earlier read result instead of rereading]"
+     )
      executor = FakeExecutor(
 -        [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
 +        [
 +            ToolExecutionOutcome(
 +                tool_call=tool_call,
 +                state=ToolExecutionState.DUPLICATE,
 +                message=Message.tool_result_message(
 +                    tool_call_id=tool_call.id,
 +                    display_content=duplicate_message,
 +                    result_content=duplicate_message,
 +                ),
 +                event_content=duplicate_message,
 +                is_error=False,
 +                result_output=duplicate_message,
 +            )
 +        ]
+     )
 -    summary = TurnSummary(final_response="")
 -    dod = create_definition_of_done("Update README and verify it still works.")
 -    events: list[AgentEvent] = []
+-
 -    async def emit(event: AgentEvent) -> None:
 -        events.append(event)
 +    summary = TurnSummary(final_response="")
      await runner.execute_batch(
          tool_calls=[tool_call],
          tool_source="assistant",
          pending_tool_calls_seen=set(),
 -        emit=emit,
 +        emit=_noop_emit,
          summary=summary,
          dod=dod,
          executor=executor,  # type: ignore[arg-type]
          consecutive_errors=0,
+     )
 -    assert dod.last_verification_result == "planned"
 -    assert dod.verification_commands
 -    assert "Collect verification evidence" in dod.pending_items
 -    assert dod.active_verification_attempt_id == "verification-attempt-1"
 +    assert len(queued_messages) == 1
 +    assert "All explicitly planned artifacts already exist." in queued_messages[0]
 +    assert "Move to verification or final confirmation using the files already on disk." in queued_messages[0]
 +    assert "Create 01-getting-started.html" not in queued_messages[0]
 +    assert "Creating 02-installation.html" not in queued_messages[0]
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    reference = temp_dir / "fortran" / "index.html"
 +    reference.parent.mkdir(parents=True)
 +    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Examine the existing Fortran guide structure to understand the cadence and format",
 +                "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Create the nginx index.html file",
 +                "active_form": "Working on: Create the nginx index.html file",
 +                "status": "pending",
 +            },
 +        ],
 +    )
 +    tool_call = ToolCall(
 +        id="read-reference",
 +        name="read",
 +        arguments={"file_path": str(reference)},
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output="<h1>Fortran Beginner's Guide</h1>\n",
 +                is_error=False,
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert any(
 +        "Continue with the next pending item: `Create the nginx index.html file`"
 +        in message
 +        for message in queued_messages
 +    )
 +    assert any(
 +        "stop gathering more reference material and perform the change now" in message
 +        for message in queued_messages
 +    )
++
++
 +@pytest.mark.asyncio
 +async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_review(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    index_path.write_text("<a href=\"chapters/01-getting-started.html\">One</a>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapters / '06-ssl-configuration.html'}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Ensure all files are properly linked and formatted consistently",
 +                "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Create the final chapter (06-ssl-configuration.html)",
 +                "active_form": "Working on: Create the final chapter (06-ssl-configuration.html)",
 +                "status": "pending",
 +            },
 +        ],
 +    )
 +    assert tool_batches_should_prioritize_missing_artifact(
 +        next_pending=dod.pending_items[0],
 +        missing_artifact=(chapters / "06-ssl-configuration.html", False),
 +    )
++
 +    tool_call = ToolCall(
 +        id="dup-read",
 +        name="read",
 +        arguments={"file_path": str(index_path)},
 +    )
 +    runner._queue_duplicate_observation_nudge(tool_call, dod=dod)  # type: ignore[attr-defined]
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert "06-ssl-configuration.html" in message
 +    assert "Do not switch into review or consistency-check mode" in message
 +    assert (
 +        "Continue with the next pending item: `Ensure all files are properly linked and formatted consistently`"
 +        not in message
 +    )
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_hands_off_to_verification_once_planned_artifacts_exist(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<a href=\"chapters/01-getting-started.html\">One</a>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create the guide files",
 +                "active_form": "Working on: Create the guide files",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Ensure all files are properly linked and formatted consistently",
 +                "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
 +                "status": "pending",
 +            },
 +        ],
 +    )
 +    tool_call = ToolCall(
 +        id="write-final",
 +        name="write",
 +        arguments={
 +            "file_path": str(chapter_two),
 +            "content": "<h1>Two</h1>\n",
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output=f"Successfully wrote {chapter_two}",
 +                is_error=False,
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert any(
 +        "All explicitly planned artifacts now exist." in message
 +        for message in queued_messages
 +    )
 +    assert any(
 +        "Ensure all files are properly linked and formatted consistently" in message
 +        for message in queued_messages
 +    )
 +    assert any(
 +        "Move to verification once no specific mismatch remains." in message
 +        for message in queued_messages
 +    )
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_mutation_handoff_points_at_next_missing_artifact(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create the main index.html file with proper structure",
 +                "active_form": "Working on: Create the main index.html file with proper structure",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Create each chapter file in sequence, following the established pattern",
 +                "active_form": "Working on: Create each chapter file in sequence, following the established pattern",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Ensure all files are properly linked and formatted consistently",
 +                "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
 +                "status": "pending",
 +            },
 +        ],
 +    )
 +    tool_call = ToolCall(
 +        id="write-index",
 +        name="write",
 +        arguments={"file_path": str(index_path), "content": "<html></html>\n"},
 +    )
 +    executor = FakeExecutor(
 +        [tool_outcome(tool_call=tool_call, output=f"Successfully wrote {index_path}", is_error=False)]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert "Resume by creating `01-getting-started.html` now." in message
 +    assert "refresh `TodoWrite`" in message
 +    assert "Do not move to verification, final confirmation, or TodoWrite-only bookkeeping" in message
 +    assert "Do not spend another turn on working notes or rediscovery alone." in message
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_large_plan_does_not_claim_completion_early(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    index_path.write_text("<html></html>\n")
++
 +    chapter_paths = [
 +        chapters / "01-getting-started.html",
 +        chapters / "02-installation.html",
 +        chapters / "03-first-website.html",
 +        chapters / "04-configuration-basics.html",
 +        chapters / "05-advanced-configurations.html",
 +        chapters / "06-performance-tuning.html",
 +        chapters / "07-security-best-practices.html",
 +    ]
 +    for chapter in chapter_paths[:4]:
 +        chapter.write_text(f"<h1>{chapter.stem}</h1>\n")
 +    chapter_paths[4].write_text("<h1>Advanced configurations</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                *[f"- `{path}`" for path in chapter_paths],
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a thorough nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create the nginx guide artifacts",
 +                "active_form": "Creating nginx guide artifacts",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Verify all guide files are linked and complete",
 +                "active_form": "Verifying guide linkage and completeness",
 +                "status": "pending",
 +            },
 +        ],
 +    )
 +    tool_call = ToolCall(
 +        id="write-chapter-05",
 +        name="write",
 +        arguments={
 +            "file_path": str(chapter_paths[4]),
 +            "content": "<h1>Advanced configurations</h1>\n",
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output=f"Successfully wrote {chapter_paths[4]}",
 +                is_error=False,
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert any(
 +        "Resume by creating `06-performance-tuning.html` now." in message
 +        for message in queued_messages
 +    )
 +    assert not any(
 +        "All explicitly planned artifacts now exist." in message
 +        for message in queued_messages
 +    )
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_todowrite_with_missing_artifact_requeues_exact_resume_step(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    chapter_one.write_text("<h1>One</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create 01-getting-started.html",
 +                "active_form": "Creating 01-getting-started.html",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Create 02-installation.html",
 +                "active_form": "Creating 02-installation.html",
 +                "status": "pending",
 +            },
 +        ],
 +    )
 +    dod.touched_files.extend([str(index_path), str(chapter_one)])
++
 +    tool_call = ToolCall(
 +        id="todo-only",
 +        name="TodoWrite",
 +        arguments={
 +            "todos": [
 +                {
 +                    "content": "Create 01-getting-started.html",
 +                    "active_form": "Creating 01-getting-started.html",
 +                    "status": "completed",
 +                },
 +                {
 +                    "content": "Create 02-installation.html",
 +                    "active_form": "Creating 02-installation.html",
 +                    "status": "pending",
 +                },
 +            ]
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output="Todos updated",
 +                is_error=False,
 +                metadata={
 +                    "new_todos": [
 +                        {
 +                            "content": "Create 01-getting-started.html",
 +                            "active_form": "Creating 01-getting-started.html",
 +                            "status": "completed",
 +                        },
 +                        {
 +                            "content": "Create 02-installation.html",
 +                            "active_form": "Creating 02-installation.html",
 +                            "status": "pending",
 +                        },
 +                    ]
 +                },
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert "Todo tracking is updated. An explicitly planned artifact is still missing." in message
 +    assert "Resume by creating `02-installation.html` now." in message
 +    assert "refresh `TodoWrite`" in message
 +    assert "Do not spend the next turn on TodoWrite alone" in message
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_todowrite_after_artifacts_exist_pushes_verification_handoff(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.verification_commands = [f"ls -la {guide_root}"]
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
 +                "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Verify all guide files are linked and complete",
 +                "active_form": "Working on: Verify all guide files are linked and complete",
 +                "status": "pending",
 +            },
 +        ],
 +        project_root=temp_dir,
 +    )
++
 +    tool_call = ToolCall(
 +        id="todo-only",
 +        name="TodoWrite",
 +        arguments={
 +            "todos": [
 +                {
 +                    "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
 +                    "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
 +                    "status": "pending",
 +                },
 +                {
 +                    "content": "Verify all guide files are linked and complete",
 +                    "active_form": "Working on: Verify all guide files are linked and complete",
 +                    "status": "pending",
 +                },
 +            ]
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output="Todos updated",
 +                is_error=False,
 +                metadata={
 +                    "new_todos": [
 +                        {
 +                            "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
 +                            "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
 +                            "status": "pending",
 +                        },
 +                        {
 +                            "content": "Verify all guide files are linked and complete",
 +                            "active_form": "Working on: Verify all guide files are linked and complete",
 +                            "status": "pending",
 +                        },
 +                    ]
 +                },
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert "Todo tracking is updated. All explicitly planned artifacts now exist." in message
 +    assert "Verify all guide files are linked and complete" in message
 +    assert "Move to verification once no specific mismatch remains." in message
 +    assert "reopen reference materials" in message
 +    assert "Fortran guide structure" not in message
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_todowrite_with_existing_output_roots_requeues_next_mutation(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    index_path.write_text(
 +        "\n".join(
 +            [
 +                "<!DOCTYPE html>",
 +                "<html>",
 +                "<body>",
 +                '<a href="chapters/01-introduction.html">Introduction</a>',
 +                "</body>",
 +                "</html>",
 +                "",
 +            ]
 +        )
 +    )
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.append(str(index_path))
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Examine the existing Fortran guide structure",
 +                "active_form": "Examining the existing Fortran guide structure",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Create the nginx directory structure",
 +                "active_form": "Creating the nginx directory structure",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Write the introduction chapter",
 +                "active_form": "Writing the introduction chapter",
 +                "status": "pending",
 +            },
 +        ],
 +        project_root=temp_dir,
 +    )
++
 +    tool_call = ToolCall(
 +        id="todo-next-mutation",
 +        name="TodoWrite",
 +        arguments={
 +            "todos": [
 +                {
 +                    "content": "Examine the existing Fortran guide structure",
 +                    "active_form": "Examining the existing Fortran guide structure",
 +                    "status": "completed",
 +                },
 +                {
 +                    "content": "Create the nginx directory structure",
 +                    "active_form": "Creating the nginx directory structure",
 +                    "status": "completed",
 +                },
 +                {
 +                    "content": "Write the introduction chapter",
 +                    "active_form": "Writing the introduction chapter",
 +                    "status": "pending",
 +                },
 +            ]
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output="Todos updated",
 +                is_error=False,
 +                metadata={
 +                    "new_todos": [
 +                        {
 +                            "content": "Examine the existing Fortran guide structure",
 +                            "active_form": "Examining the existing Fortran guide structure",
 +                            "status": "completed",
 +                        },
 +                        {
 +                            "content": "Create the nginx directory structure",
 +                            "active_form": "Creating the nginx directory structure",
 +                            "status": "completed",
 +                        },
 +                        {
 +                            "content": "Write the introduction chapter",
 +                            "active_form": "Writing the introduction chapter",
 +                            "status": "pending",
 +                        },
 +                    ]
 +                },
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert "Todo tracking is updated. An explicitly planned artifact is still missing." in message
 +    assert "Continue with the next pending item: `Write the introduction chapter`." in message
 +    assert "Resume by creating `01-introduction.html` now." in message
 +    assert "It is the next missing declared output under `chapters/`." in message
 +    assert "Prefer one `write` call for `" in message
 +    assert "01-introduction.html` instead of more rereads." in message
 +    assert "Do not spend the next turn on TodoWrite alone" in message
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_todowrite_with_declared_child_targets_names_next_missing_file(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    index_path.write_text(
 +        "\n".join(
 +            [
 +                "<html>",
 +                '<a href="chapters/introduction.html">Introduction</a>',
 +                '<a href="chapters/installation.html">Installation</a>',
 +                "</html>",
 +            ]
 +        )
 +        + "\n"
 +    )
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.pending_items = [
 +        "Write the introduction chapter",
 +        "Complete the requested work",
 +    ]
 +    dod.touched_files.append(str(index_path))
++
 +    queued_messages: list[str] = []
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
++
 +    tool_call = ToolCall(
 +        id="todo-1",
 +        name="TodoWrite",
 +        arguments={
 +            "todos": [
 +                {
 +                    "content": "Write the introduction chapter",
 +                    "activeForm": "Writing the introduction chapter",
 +                    "status": "pending",
 +                }
 +            ]
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output="Todos updated",
 +                is_error=False,
 +                metadata={
 +                    "new_todos": [
 +                        {
 +                            "content": "Write the introduction chapter",
 +                            "active_form": "Writing the introduction chapter",
 +                            "status": "pending",
 +                        }
 +                    ]
 +                },
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert "Todo tracking is updated. An explicitly planned artifact is still missing." in message
 +    assert "Continue with the next pending item: `Write the introduction chapter`." in message
 +    assert "Resume by creating `introduction.html` now." in message
 +    assert "It is the next missing declared output under `chapters/`." in message
 +    assert "Prefer one `write` call for `" in message
 +    assert "introduction.html` instead of more rereads." in message
 +    assert "Do not spend the next turn on TodoWrite alone" in message
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_bookkeeping_note_with_missing_artifact_requeues_resume_step(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create 01-getting-started.html",
 +                "active_form": "Creating 01-getting-started.html",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Create 02-installation.html",
 +                "active_form": "Creating 02-installation.html",
 +                "status": "pending",
 +            },
 +        ],
 +        project_root=temp_dir,
 +    )
 +    dod.touched_files.extend([str(index_path), str(chapter_one)])
++
 +    tool_call = ToolCall(
 +        id="working-note",
 +        name="notepad_write_working",
 +        arguments={"content": "Creating the second chapter file: Installation"},
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output="Working note recorded",
 +                is_error=False,
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert "Bookkeeping note is recorded. An explicitly planned artifact is still missing." in message
 +    assert "Resume by creating `02-installation.html` now." in message
 +    assert "Make your next response the concrete mutation tool call itself" in message
 +    assert "refresh `TodoWrite`" in message
 +    assert "Do not spend the next turn on additional notes, rediscovery, verification, or final confirmation" in message
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_working_note_respects_discovery_first_pending_step(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
 +                f"- `{temp_dir / 'guides' / 'nginx' / 'chapters'}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.pending_items.extend(
 +        [
 +            "First, examine the existing fortran guide structure and content to understand the format",
 +            "Create the nginx directory structure",
 +            "Develop the main index.html file for the nginx guide",
 +        ]
 +    )
++
 +    tool_call = ToolCall(
 +        id="working-note",
 +        name="notepad_write_working",
 +        arguments={"content": "Analyzing the fortran guide structure before creating nginx guide"},
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output="Working note recorded",
 +                is_error=False,
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert (
 +        "Continue with the next pending item: `First, examine the existing fortran guide structure and content to understand the format`."
 +        in message
 +    )
 +    assert "one concrete evidence-gathering tool call" in message
 +    assert "Resume by creating `index.html` now." not in message
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_valid(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    prompt = (
 +        "Have a look at ~/Loader/guides/fortran/index.html, then "
 +        "~/Loader/guides/fortran/chapters. The table of contents links in "
 +        "index.html are inaccurate and the href’s are wrong. Let’s update the "
 +        "links and their link texts to be correct."
 +    )
 +    chapters = temp_dir / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-introduction.html").write_text(
 +        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 +    )
 +    (chapters / "02-setup.html").write_text(
 +        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
 +    )
 +    current_block = (
 +        "<h2>Table of Contents</h2>\n"
 +        '        <ul class="chapter-list">\n'
 +        '            <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
 +        '            <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
 +        "        </ul>\n"
 +    )
 +    index_path = temp_dir / "index.html"
 +    index_path.write_text(current_block)
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    context.session.current_task = prompt  # type: ignore[attr-defined]
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    tool_call = ToolCall(
 +        id="edit-1",
 +        name="edit",
 +        arguments={
 +            "file_path": str(index_path),
 +            "old_string": current_block,
 +            "new_string": current_block,
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output=(
 +                    "[Blocked - old_string and new_string are identical - no change "
 +                    "would occur] Suggestion: Provide different old and new strings"
 +                ),
 +                is_error=True,
 +                state=ToolExecutionState.BLOCKED,
 +            )
 +        ]
 +    )
++
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=TurnSummary(final_response=""),
 +        dod=create_definition_of_done(prompt),
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages == []
++
++
 +async def _noop_emit(event: AgentEvent) -> None:
 +    return None
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    tool_call = ToolCall(
 +        id="write-1",
 +        name="write",
 +        arguments={"file_path": str(temp_dir / "README.md"), "content": "updated\n"},
 +    )
 +    executor = FakeExecutor(
 +        [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
 +    )
 +    summary = TurnSummary(final_response="")
 +    dod = create_definition_of_done("Update README and verify it still works.")
 +    events: list[AgentEvent] = []
++
 +    async def emit(event: AgentEvent) -> None:
 +        events.append(event)
++
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert dod.last_verification_result == "planned"
 +    assert dod.verification_commands
 +    assert "Collect verification evidence" in dod.pending_items
 +    assert dod.active_verification_attempt_id == "verification-attempt-1"
      assert dod.active_verification_attempt_number == 1
      assert summary.workflow_timeline[-1].reason_code == "verification_planned"
      assert summary.workflow_timeline[-1].policy_outcome == "planned"
          summary.workflow_timeline[-1].verification_observations[0].command
          == "uv run pytest -q"
+     )
++
++
 +def test_tool_batch_runner_blocked_active_repair_nudge_uses_repair_scope(temp_dir: Path) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    repair_target = temp_dir / "guide" / "index.html"
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `chapters/01-getting-started.html` in `{repair_target}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`; otherwise remove or replace `chapters/01-getting-started.html`.\n"
 +                ),
 +            )
 +        ],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    queued: list[str] = []
 +    context.queue_steering_message_callback = queued.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
++
 +    runner._queue_blocked_active_repair_nudge(
 +        "[Blocked - active repair scope: verification already identified the repair target.]"
 +    )
++
 +    assert queued
 +    assert str(repair_target) in queued[0]
 +    assert str(temp_dir / "guide" / "chapters" / "01-getting-started.html") in queued[0]
 +    assert "Do not reopen unrelated reference materials" in queued[0]
++
++
 +def test_tool_batch_runner_blocked_active_repair_mutation_nudge_uses_allowed_paths(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
 +    stylesheet = temp_dir / "guide" / "styles.css"
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
 +                    f"- Immediate next step: edit `{repair_target}`.\n"
 +                    f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
 +                ),
 +            )
 +        ],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    queued: list[str] = []
 +    context.queue_steering_message_callback = queued.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
++
 +    runner._queue_blocked_active_repair_mutation_nudge(
 +        "[Blocked - active repair mutation scope: verification already identified the repair target.]"
 +    )
++
 +    assert queued
 +    assert str(repair_target) in queued[0]
 +    assert str(stylesheet) in queued[0]
 +    assert "before widening the change set" in queued[0]
++
++
 +def test_tool_batch_runner_blocked_late_reference_drift_nudge_points_to_missing_artifact(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    queued: list[str] = []
 +    context.queue_steering_message_callback = queued.append
 +    store = DefinitionOfDoneStore(temp_dir)
 +    dod = create_definition_of_done("Create a multi-file guide from a reference")
 +    plan_path = temp_dir / "implementation.md"
 +    plan_path.write_text(
 +        "# File Changes\n"
 +        "- `guide/index.html`\n"
 +        "- `guide/chapters/01-getting-started.html`\n"
 +        "- `guide/chapters/02-installation.html`\n"
 +        "- `guide/chapters/03-first-website.html`\n"
 +    )
 +    dod.implementation_plan = str(plan_path)
 +    (temp_dir / "guide" / "chapters").mkdir(parents=True, exist_ok=True)
 +    (temp_dir / "guide" / "index.html").write_text("index")
 +    (temp_dir / "guide" / "chapters" / "01-getting-started.html").write_text("one")
 +    (temp_dir / "guide" / "chapters" / "02-installation.html").write_text("two")
 +    runner = ToolBatchRunner(context, store)
++
 +    runner._queue_blocked_late_reference_drift_nudge(
 +        "[Blocked - late reference drift: several planned artifacts already exist.]",
 +        dod=dod,
 +    )
++
 +    assert queued
 +    assert "03-first-website.html" in queued[0]
 +    assert "older reference materials" in queued[0]
++
++
 +def test_tool_batch_runner_blocked_completed_artifact_scope_nudge_prefers_verification(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guide"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("index")
 +    chapter_one.write_text("one")
 +    chapter_two.write_text("two")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}`",
 +                f"- `{chapters}`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    queued: list[str] = []
 +    context.queue_steering_message_callback = queued.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file guide from a reference")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.verification_commands = [f"ls -la {guide_root}"]
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Verify all guide files are linked and complete",
 +                "active_form": "Working on: Verify all guide files are linked and complete",
 +                "status": "pending",
 +            }
 +        ],
 +        project_root=temp_dir,
 +    )
++
 +    runner._queue_blocked_completed_artifact_scope_nudge(
 +        "[Blocked - completed artifact set scope: all explicitly planned artifacts already exist.]",
 +        dod=dod,
 +    )
++
 +    assert queued
 +    assert "All explicitly planned artifacts already exist." in queued[0]
 +    assert "Verify all guide files are linked and complete" in queued[0]
 +    assert "Do not reopen earlier reference materials." in queued[0]

tests/test_turn_completion.pymodified

+     )
 +@pytest.mark.asyncio
 +async def test_turn_completion_blocks_false_completion_without_preserving_it(
 +    temp_dir: Path,
 +) -> None:
 +    backend = ScriptedBackend()
 +    agent = Agent(
 +        backend=backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
 +    runtime = ConversationRuntime(agent)
 +    events = []
++
 +    async def capture(event) -> None:
 +        events.append(event)
++
 +    prepared = await runtime.turn_preparation.prepare(
 +        task=(
 +            "Create a multi-file nginx guide under ~/Loader/guides/nginx "
 +            "with an index and chapter files."
 +        ),
 +        emit=capture,
 +        requested_mode="execute",
 +        original_task=None,
 +        on_user_question=None,
 +    )
 +    await runtime.phase_tracker.enter(
 +        TurnPhase.ASSISTANT,
 +        capture,
 +        detail="Requesting assistant response",
 +        reason_code="request_assistant_response",
 +    )
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "# Implementation Plan\n\n"
 +        "## File Changes\n\n"
 +        "1. Create main index.html file:\n"
 +        "   - `index.html`\n\n"
 +        "2. Create chapter files:\n"
 +        "   - `chapters/01-getting-started.html`\n"
 +        "   - `chapters/06-troubleshooting.html`\n"
 +    )
 +    chapters_dir = temp_dir / "chapters"
 +    chapters_dir.mkdir()
 +    (chapters_dir / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
 +    (temp_dir / "index.html").write_text("<h1>NGINX Guide</h1>\n")
++
 +    prepared.definition_of_done.implementation_plan = str(implementation_plan)
 +    prepared.definition_of_done.mutating_actions.append("write")
 +    prepared.definition_of_done.touched_files.extend(
 +        [
 +            str(temp_dir / "index.html"),
 +            str(chapters_dir / "01-getting-started.html"),
 +        ]
 +    )
++
 +    queued_messages: list[str] = []
 +    runtime.context.queue_steering_message_callback = queued_messages.append
++
 +    completion_claim = (
 +        "I've successfully completed the NGINX guide with all planned files "
 +        "and verified everything is done."
 +    )
 +    decision = await runtime.turn_completion.handle_text_response(
 +        content=completion_claim,
 +        response_content=completion_claim,
 +        task=prepared.task,
 +        effective_task=prepared.effective_task,
 +        iterations=1,
 +        max_iterations=agent.config.max_iterations,
 +        actions_taken=[],
 +        continuation_count=0,
 +        dod=prepared.definition_of_done,
 +        emit=capture,
 +        summary=prepared.summary,
 +        executor=prepared.executor,
 +        rollback_plan=prepared.rollback_plan,
 +    )
++
 +    assert decision.action == TurnCompletionAction.CONTINUE
 +    assert prepared.summary.assistant_messages == []
 +    assert not any(
 +        message.role.value == "assistant" and message.content == completion_claim
 +        for message in agent.session.messages
 +    )
 +    assert agent.session.messages[-1].role.value == "user"
 +    assert agent.session.messages[-1].content.startswith(
 +        "[PLANNED ARTIFACTS STILL MISSING]"
 +    )
 +    assert "`06-troubleshooting.html`" in agent.session.messages[-1].content
 +    assert queued_messages
 +    assert "06-troubleshooting.html" in queued_messages[-1]
 +    assert "Do not summarize, mark completion, or write bookkeeping notes yet" in queued_messages[-1]
 +    assert not any(event.type == "response" for event in events)
++
++
  @pytest.mark.asyncio
  async def test_turn_completion_handles_fake_tool_narration_without_reroute(
      temp_dir: Path,

tests/test_workflow.pymodified

      WorkflowMode,
      advance_todos_from_tool_call,
      build_execute_bridge,
 +    effective_pending_todo_items,
      enrich_clarify_brief_with_grounding,
      extract_verification_commands_from_markdown,
      merge_refreshed_todos_with_existing_scope,
      preserve_task_grounded_acceptance_criteria,
 +    reconcile_aggregate_completion_steps,
      sync_todos_to_definition_of_done,
+ )
      assert "04-configuring.html" not in labels
 +def test_merge_refreshed_todos_with_existing_scope_drops_unplanned_filename_expansion() -> None:
 +    task = (
 +        "Create an equally thorough nginx guide with index.html plus chapter files "
 +        "covering getting started, installation, configuration, usage, and troubleshooting."
 +    )
++
 +    todos = merge_refreshed_todos_with_existing_scope(
 +        task,
 +        existing_pending_items=[
 +            "Create chapter files with appropriate content structure",
 +        ],
 +        existing_completed_items=[
 +            "Create the nginx guide directory structure",
 +            "Create introduction.html",
 +        ],
 +        refreshed_steps=[
 +            "Create optimization.html",
 +            "Create security.html",
 +            "Ensure consistent chapter navigation",
 +        ],
 +        planned_files={
 +            "index.html",
 +            "introduction.html",
 +            "installation.html",
 +            "configuration.html",
 +            "usage.html",
 +            "troubleshooting.html",
 +        },
 +    )
++
 +    labels = {item["content"]: item["status"] for item in todos}
 +    assert "Create chapter files with appropriate content structure" in labels
 +    assert "Ensure consistent chapter navigation" in labels
 +    assert "Create optimization.html" not in labels
 +    assert "Create security.html" not in labels
++
++
 +def test_planning_artifacts_with_file_changes_replaces_file_change_section() -> None:
 +    artifacts = PlanningArtifacts(
 +        implementation_markdown="\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                "- `old.txt`",
 +                "",
 +                "## Execution Order",
 +                "- Do the work",
 +                "",
 +            ]
 +        )
 +        + "\n",
 +        verification_markdown="# Verification Plan\n",
 +        verification_commands=[],
 +        acceptance_criteria=["task"],
 +        implementation_steps=["Do the work"],
 +    )
++
 +    updated = artifacts.with_file_changes(
 +        ["`guides/nginx/index.html`", "`guides/nginx/chapters/`"]
 +    )
++
 +    assert "`old.txt`" not in updated.implementation_markdown
 +    assert "`guides/nginx/index.html`" in updated.implementation_markdown
 +    assert "`guides/nginx/chapters/`" in updated.implementation_markdown
++
++
 +def test_effective_pending_todo_items_filters_stale_discovery_after_artifacts_exist(
 +    temp_dir: Path,
 +) -> None:
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.pending_items = [
 +        "First, examine the existing Fortran guide structure to understand the format and content organization",
 +        "Verify all guide files are linked and complete",
 +        "Complete the requested work",
 +    ]
++
 +    pending = effective_pending_todo_items(dod, project_root=temp_dir)
++
 +    assert "Verify all guide files are linked and complete" in pending
 +    assert "Complete the requested work" in pending
 +    assert not any("Fortran guide structure" in item for item in pending)
++
++
 +def test_effective_pending_todo_items_filters_stale_creation_steps_after_artifacts_exist(
 +    temp_dir: Path,
 +) -> None:
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.pending_items = [
 +        "Create 01-getting-started.html",
 +        "Creating 02-installation.html",
 +        "Verify all guide files are linked and complete",
 +        "Complete the requested work",
 +    ]
++
 +    pending = effective_pending_todo_items(dod, project_root=temp_dir)
++
 +    assert "Verify all guide files are linked and complete" in pending
 +    assert "Complete the requested work" in pending
 +    assert "Create 01-getting-started.html" not in pending
 +    assert "Creating 02-installation.html" not in pending
++
++
  def test_workflow_artifact_store_and_bridge_round_trip(tmp_path: Path) -> None:
      store = WorkflowArtifactStore(tmp_path)
      brief = ClarifyBrief.fallback(
      assert "Update tests" in dod.completed_items
 +def test_sync_todos_to_definition_of_done_keeps_completed_items_monotonic() -> None:
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create 03-first-website.html",
 +                "active_form": "Creating 03-first-website.html",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Create 04-configuration-basics.html",
 +                "active_form": "Creating 04-configuration-basics.html",
 +                "status": "pending",
 +            },
 +        ],
 +    )
++
 +    assert advance_todos_from_tool_call(
 +        dod,
 +        ToolCall(
 +            id="write-third-chapter",
 +            name="write",
 +            arguments={
 +                "file_path": "/tmp/nginx/chapters/03-first-website.html",
 +                "content": "<html></html>",
 +            },
 +        ),
 +    )
 +    assert "Create 03-first-website.html" in dod.completed_items
++
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create 03-first-website.html",
 +                "active_form": "Creating 03-first-website.html",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Create 04-configuration-basics.html",
 +                "active_form": "Creating 04-configuration-basics.html",
 +                "status": "pending",
 +            },
 +        ],
 +    )
++
 +    assert "Create 03-first-website.html" in dod.completed_items
 +    assert "Create 03-first-website.html" not in dod.pending_items
 +    assert "Create 04-configuration-basics.html" in dod.pending_items
++
++
  def test_advance_todos_from_tool_call_tracks_plan_progress() -> None:
      dod = create_definition_of_done("Fix the chapter links in index.html.")
      sync_todos_to_definition_of_done(
+     )
 +def test_advance_todos_from_tool_call_keeps_plural_chapter_creation_step_pending() -> None:
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create chapter files following the established pattern",
 +                "active_form": "Working on: Create chapter files following the established pattern",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Ensure consistency with existing guide formatting and content style",
 +                "active_form": "Working on: Ensure consistency with existing guide formatting and content style",
 +                "status": "pending",
 +            },
 +        ],
 +    )
++
 +    assert (
 +        advance_todos_from_tool_call(
 +            dod,
 +            ToolCall(
 +                id="write-one-chapter",
 +                name="write",
 +                arguments={
 +                    "file_path": "/tmp/nginx/chapters/01-overview.html",
 +                    "content": "<html></html>",
 +                },
 +            ),
 +        )
 +        is False
 +    )
 +    assert "Create chapter files following the established pattern" in dod.pending_items
++
++
  def test_advance_todos_from_tool_call_tracks_bash_directory_creation_progress() -> None:
      dod = create_definition_of_done("Create a multi-file nginx guide.")
      sync_todos_to_definition_of_done(
+     )
      assert "Create the nginx directory structure" in dod.completed_items
      assert "Create index.html for nginx guide" in dod.pending_items
++
++
 +def test_advance_todos_from_tool_call_does_not_complete_linking_step_from_glob() -> None:
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Link all chapters together properly in the index file",
 +                "active_form": "Working on: Link all chapters together properly in the index file",
 +                "status": "pending",
 +            },
 +        ],
 +    )
++
 +    assert (
 +        advance_todos_from_tool_call(
 +            dod,
 +            ToolCall(
 +                id="glob-reference-chapters",
 +                name="glob",
 +                arguments={"path": "~/Loader", "pattern": "**/fortran/chapters/*"},
 +            ),
 +        )
 +        is False
 +    )
 +    assert "Link all chapters together properly in the index file" in dod.pending_items
++
++
 +def test_sync_todos_to_definition_of_done_keeps_linking_step_pending_while_artifacts_missing(
 +    temp_dir: Path,
 +) -> None:
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create 01-getting-started.html chapter file",
 +                "active_form": "Creating 01-getting-started.html chapter file",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Link all chapters together properly in the index file",
 +                "active_form": "Linking chapters in the index file",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Create 02-installation.html chapter file",
 +                "active_form": "Creating 02-installation.html chapter file",
 +                "status": "pending",
 +            },
 +        ],
 +        project_root=temp_dir,
 +    )
++
 +    assert "Link all chapters together properly in the index file" in dod.pending_items
 +    assert "Link all chapters together properly in the index file" not in dod.completed_items
++
++
 +def test_sync_todos_to_definition_of_done_allows_linking_step_when_artifacts_exist(
 +    temp_dir: Path,
 +) -> None:
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Link all chapters together properly in the index file",
 +                "active_form": "Linking chapters in the index file",
 +                "status": "completed",
 +            },
 +        ],
 +        project_root=temp_dir,
 +    )
++
 +    assert "Link all chapters together properly in the index file" in dod.completed_items
++
++
 +def test_sync_todos_to_definition_of_done_reopens_directory_content_step_when_output_dir_is_empty(
 +    temp_dir: Path,
 +) -> None:
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    index_path.write_text("<html></html>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root / 'index.html'}`",
 +                f"- `{chapters}/` (directory for chapter files)",
 +                "",
 +                "## Execution Order",
 +                "- Create chapter files with appropriate content",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create an equally thorough nginx guide with chapters.")
 +    dod.implementation_plan = str(implementation_plan)
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create chapter files with appropriate content",
 +                "active_form": "Creating chapter files with appropriate content",
 +                "status": "completed",
 +            },
 +        ],
 +        project_root=temp_dir,
 +    )
++
 +    assert "Create chapter files with appropriate content" in dod.pending_items
 +    assert "Create chapter files with appropriate content" not in dod.completed_items
++
++
 +def test_reconcile_aggregate_completion_steps_reopens_linking_step_when_artifacts_missing(
 +    temp_dir: Path,
 +) -> None:
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    chapter_three = chapters / "03-first-website.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                f"- `{chapter_three}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.completed_items.append("Link all chapters together properly")
++
 +    reconcile_aggregate_completion_steps(dod, project_root=temp_dir)
++
 +    assert "Link all chapters together properly" not in dod.completed_items
 +    assert "Link all chapters together properly" in dod.pending_items
++
++
 +def test_sync_todos_to_definition_of_done_drops_unplanned_artifact_expansion_after_plan_complete(
 +    temp_dir: Path,
 +) -> None:
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create 01-getting-started.html",
 +                "active_form": "Creating 01-getting-started.html",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Create 02-installation.html",
 +                "active_form": "Creating 02-installation.html",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Create 07-performance-tuning.html",
 +                "active_form": "Creating 07-performance-tuning.html",
 +                "status": "in_progress",
 +            },
 +        ],
 +        project_root=temp_dir,
 +    )
++
 +    assert "Creating 07-performance-tuning.html" not in dod.pending_items
 +    assert "Create 01-getting-started.html" in dod.completed_items
 +    assert "Create 02-installation.html" in dod.completed_items

tests/test_workflow_recovery.pyadded

 +"""Focused tests for workflow recovery priority rules."""
++
 +from __future__ import annotations
++
 +from pathlib import Path
++
 +from loader.runtime.workflow_recovery import _should_prioritize_missing_artifact
++
++
 +def test_workflow_recovery_prioritizes_missing_artifact_over_review_step() -> None:
 +    missing_artifact = (Path("/tmp/guide/06-ssl-configuration.html"), False)
++
 +    assert _should_prioritize_missing_artifact(
 +        next_pending="Ensure all files are properly linked and formatted consistently",
 +        missing_artifact=missing_artifact,
 +    )
 +    assert not _should_prioritize_missing_artifact(
 +        next_pending="Create the final chapter (06-ssl-configuration.html)",
 +        missing_artifact=missing_artifact,
 +    )

tests/test_workflow_tools.pymodified

      assert json.loads(store_path.read_text()) == []
 +@pytest.mark.asyncio
 +async def test_todo_write_merges_partial_status_updates_with_existing_scope(
 +    tmp_path: Path,
 +) -> None:
 +    tool = TodoWriteTool(tmp_path)
++
 +    initial = await tool.execute(
 +        todos=[
 +            {
 +                "content": "Create nginx index",
 +                "active_form": "Creating nginx index",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Create chapter files",
 +                "active_form": "Creating chapter files",
 +                "status": "in_progress",
 +            },
 +            {
 +                "content": "Verify links",
 +                "active_form": "Verifying links",
 +                "status": "pending",
 +            },
 +        ]
 +    )
 +    partial = await tool.execute(
 +        todos=[
 +            {
 +                "content": "Create chapter files",
 +                "active_form": "Creating chapter files",
 +                "status": "completed",
 +            }
 +        ]
 +    )
++
 +    initial_payload = json.loads(initial.output)
 +    partial_payload = json.loads(partial.output)
 +    assert initial.is_error is False
 +    assert partial.is_error is False
 +    assert partial_payload["old_todos"] == initial_payload["new_todos"]
 +    assert partial_payload["new_todos"] == [
 +        {
 +            "content": "Create nginx index",
 +            "active_form": "Creating nginx index",
 +            "status": "completed",
 +        },
 +        {
 +            "content": "Create chapter files",
 +            "active_form": "Creating chapter files",
 +            "status": "completed",
 +        },
 +        {
 +            "content": "Verify links",
 +            "active_form": "Verifying links",
 +            "status": "pending",
 +        },
 +    ]
++
++
  @pytest.mark.asyncio
  async def test_todo_write_rejects_invalid_payloads_and_sets_verification_nudge(
      tmp_path: Path,