`723240e`

Stabilize qwen HTML TOC recovery

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 3 weeks ago

SHA: 723240e9f255c866ae1459361a835c31fe00f952
Parents: 3a703a1
Tree: 39f6098

29 changed files

Status	File	+	-
M	`src/loader/agent/parsing.py`	1	0
M	`src/loader/llm/ollama.py`	30	4
M	`src/loader/runtime/compaction.py`	320	32
M	`src/loader/runtime/dod.py`	38	8
M	`src/loader/runtime/explore.py`	1	0
M	`src/loader/runtime/finalization.py`	178	23
M	`src/loader/runtime/hooks.py`	88	0
M	`src/loader/runtime/parsing.py`	135	1
M	`src/loader/runtime/prompting.py`	2	0
M	`src/loader/runtime/recovery.py`	18	2
M	`src/loader/runtime/safeguard_services.py`	853	3
M	`src/loader/runtime/tool_batch_recovery.py`	101	27
M	`src/loader/runtime/tool_batches.py`	288	5
M	`src/loader/runtime/turn_preparation.py`	2	0
M	`src/loader/runtime/workflow.py`	38	6
M	`tests/test_compaction.py`	162	1
M	`tests/test_dod.py`	23	1
M	`tests/test_finalization.py`	128	1
M	`tests/test_ollama_backend.py`	115	0
M	`tests/test_parsing.py`	32	0
M	`tests/test_permissions.py`	32	0
M	`tests/test_prompt_builder.py`	12	0
M	`tests/test_recovery.py`	30	0
M	`tests/test_runtime_harness.py`	388	0
M	`tests/test_safeguard_services.py`	339	0
M	`tests/test_tool_batch_policies.py`	112	16
M	`tests/test_tool_batches.py`	209	3
M	`tests/test_turn_preparation.py`	77	0
M	`tests/test_workflow.py`	37	0

src/loader/agent/parsing.pymodified

  from ..runtime.parsing import (  # noqa: F401
      ParsedResponse,
 +    canonicalize_tool_name,
      format_tool_result,
      parse_tool_calls,
+ )

src/loader/llm/ollama.pymodified

  import httpx
 -from ..agent.parsing import parse_tool_calls
 +from ..agent.parsing import canonicalize_tool_name, parse_tool_calls
  from ..runtime.capabilities import CapabilityProfile, resolve_capability_profile
  from .base import (
      CompletionResponse,
+         )
          return parsed.content, parsed.tool_calls
 +    def _canonical_native_tool_name(
 +        self,
 +        raw_name: object,
 +        *,
 +        tools: list[dict[str, Any]] | None = None,
 +    ) -> str:
 +        """Normalize native Ollama tool-call names to Loader's canonical names."""
++
 +        name = str(raw_name or "").strip()
 +        if not name:
 +            return ""
 +        canonical_name = canonicalize_tool_name(
 +            name,
 +            allowed_tool_names=self._allowed_tool_names(tools),
 +        )
 +        return canonical_name or name
++
      async def complete(
          self,
          messages: list[Message],
                          args = {}
                  tool_calls.append(ToolCall(
                      id=tc.get("id", f"call_{i}"),
 -                    name=func.get("name", ""),
 +                    name=self._canonical_native_tool_name(
 +                        func.get("name", ""),
 +                        tools=tools,
 +                    ),
                      arguments=args,
                  ))
          else:
                              args = {}
                      accumulated_tool_calls.append(ToolCall(
                          id=tc.get("id", f"call_{len(accumulated_tool_calls)}"),
 -                        name=func.get("name", ""),
 +                        name=self._canonical_native_tool_name(
 +                            func.get("name", ""),
 +                            tools=tools,
 +                        ),
                          arguments=args,
                      ))
                  continue
                                  args = {}
                          tool_calls.append(ToolCall(
                              id=tc.get("id", f"call_{i}"),
 -                            name=func.get("name", ""),
 +                            name=self._canonical_native_tool_name(
 +                                func.get("name", ""),
 +                                tools=tools,
 +                            ),
                              arguments=args,
                          ))

src/loader/runtime/compaction.pymodified

  from __future__ import annotations
 +import html
  import re
  from collections import Counter
  from dataclasses import dataclass
  from pathlib import Path
 -from ..llm.base import Message, Role
 +from ..llm.base import Message, Role, ToolCall
  DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 100_000
  MIN_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 12_000
  def summarize_confirmed_facts(messages: list[Message], *, max_items: int = 2) -> str | None:
      """Summarize recent confirmed discoveries from successful tool results."""
 -    facts: list[str] = []
 -    for message in reversed(messages):
 -        if message.role != Role.TOOL or _is_compacted_context_message(message.content):
 -            continue
 -        if any(result.is_error for result in message.tool_results):
 -            continue
+-
 -        tool_name = _observed_tool_name(message.content)
 -        payload = "\n".join(
 -            result.content.strip()
 -            for result in message.tool_results
 -            if result.content.strip()
 -        ) or message.content
+-
 -        if tool_name in {
 -            "notepad_write_working",
 -            "notepad_append",
 -            "notepad_write_priority",
 -            "notepad_write_manual",
 -        }:
 -            mapping_fact = _summarize_html_mappings(payload)
 -            if mapping_fact and mapping_fact not in facts:
 -                facts.append(mapping_fact)
+-
 -        if tool_name in {"glob", "bash"}:
 -            file_fact = _summarize_html_file_discovery(payload)
 -            if file_fact and file_fact not in facts:
 -                facts.append(file_fact)
+-
 -        if len(facts) >= max_items:
 -            break
 +    facts = _collect_confirmed_facts(messages)
      if not facts:
          return None
          return None
      target_path = _choose_target_path(messages, current_task=current_task)
 +    has_confirmed_titles = _summarize_html_title_discovery(messages) is not None
 +    verification_gap = _summarize_latest_html_verification_gap(messages)
      if target_path:
 +        if verification_gap:
 +            return (
 +                f"Update `{target_path}` to fix the specific verification failures "
 +                f"({verification_gap}) instead of restarting discovery."
 +            )
 +        if has_confirmed_titles:
 +            return (
 +                f"Update `{target_path}` using the confirmed chapter file/title pairs "
 +                "instead of rereading files."
 +            )
          return (
              f"Update `{target_path}` using the confirmed findings instead of "
              "restarting earlier discovery steps."
      return None
 +def _collect_confirmed_facts(messages: list[Message]) -> list[str]:
 +    facts: list[str] = []
 +    tool_calls_by_id = {
 +        tool_call.id: tool_call
 +        for message in messages
 +        for tool_call in message.tool_calls
 +    }
++
 +    explicit_mapping_fact = _collect_explicit_mapping_fact(
 +        messages,
 +        tool_calls_by_id=tool_calls_by_id,
 +    )
 +    if explicit_mapping_fact:
 +        facts.append(explicit_mapping_fact)
++
 +    verification_gap_fact = _collect_html_verification_gap_fact(
 +        messages,
 +        tool_calls_by_id=tool_calls_by_id,
 +    )
 +    if verification_gap_fact:
 +        facts.append(verification_gap_fact)
++
 +    title_fact = _summarize_html_title_discovery(
 +        messages,
 +        tool_calls_by_id=tool_calls_by_id,
 +    )
 +    if title_fact:
 +        facts.append(title_fact)
++
 +    file_fact = _collect_html_file_discovery_fact(
 +        messages,
 +        tool_calls_by_id=tool_calls_by_id,
 +    )
 +    if file_fact:
 +        facts.append(file_fact)
++
 +    return facts
++
++
 +def _collect_explicit_mapping_fact(
 +    messages: list[Message],
 +    *,
 +    tool_calls_by_id: dict[str, ToolCall],
 +) -> str | None:
 +    mappings: list[str] = []
 +    for message in messages:
 +        if message.role != Role.TOOL or _is_compacted_context_message(message.content):
 +            continue
 +        if any(result.is_error for result in message.tool_results):
 +            continue
++
 +        tool_name = _resolve_tool_name(
 +            message,
 +            tool_calls_by_id=tool_calls_by_id,
 +        )
 +        if tool_name not in {
 +            "notepad_write_working",
 +            "notepad_append",
 +            "notepad_write_priority",
 +            "notepad_write_manual",
 +        }:
 +            continue
++
 +        payload = "\n".join(
 +            result.content.strip()
 +            for result in message.tool_results
 +            if result.content.strip()
 +        ) or message.content
 +        pairs = re.findall(
 +            r"([A-Za-z0-9_.-]+\.html)\s*->\s*([A-Za-z0-9_.-]+\.html)",
 +            payload,
 +        )
 +        for left, right in pairs:
 +            mapping = f"{left} -> {right}"
 +            if mapping not in mappings:
 +                mappings.append(mapping)
++
 +    if not mappings:
 +        return None
++
 +    preview = ", ".join(mappings[:4])
 +    if len(mappings) > 4:
 +        preview += ", ..."
 +    return f"Filename mappings confirmed: {preview}"
++
++
  def _summarize_html_mappings(payload: str) -> str | None:
      pairs = re.findall(
          r"([A-Za-z0-9_.-]+\.html)\s*->\s*([A-Za-z0-9_.-]+\.html)",
      return f"Filename mappings confirmed: {preview}"
 +def _summarize_html_title_discovery(
 +    messages: list[Message],
 +    *,
 +    max_pairs: int = 4,
 +    tool_calls_by_id: dict[str, ToolCall] | None = None,
 +) -> str | None:
 +    if tool_calls_by_id is None:
 +        tool_calls_by_id = {
 +            tool_call.id: tool_call
 +            for message in messages
 +            for tool_call in message.tool_calls
 +        }
++
 +    confirmed_pairs: list[str] = []
 +    for message in messages:
 +        if message.role != Role.TOOL or _is_compacted_context_message(message.content):
 +            continue
 +        if any(result.is_error for result in message.tool_results):
 +            continue
++
 +        tool_call = next(
 +            (
 +                tool_calls_by_id.get(result.tool_call_id)
 +                for result in message.tool_results
 +                if result.tool_call_id in tool_calls_by_id
 +            ),
 +            None,
 +        )
 +        if tool_call is None or tool_call.name != "read":
 +            continue
++
 +        raw_path = tool_call.arguments.get("file_path")
 +        if not isinstance(raw_path, str):
 +            continue
 +        normalized_path = _normalize_path_candidate(raw_path) or raw_path
 +        if Path(normalized_path).name == "index.html" or "/chapters/" not in normalized_path:
 +            continue
++
 +        payload = "\n".join(
 +            result.content.strip()
 +            for result in message.tool_results
 +            if result.content.strip()
 +        ) or message.content
 +        title = _extract_html_title(payload)
 +        if not title:
 +            continue
++
 +        pair = f"{Path(normalized_path).name} = {title}"
 +        if pair not in confirmed_pairs:
 +            confirmed_pairs.append(pair)
++
 +    if not confirmed_pairs:
 +        return None
++
 +    preview = ", ".join(confirmed_pairs[:max_pairs])
 +    if len(confirmed_pairs) > max_pairs:
 +        preview += ", ..."
 +    return f"Chapter titles confirmed: {preview}"
++
++
 +def _extract_html_title(payload: str) -> str | None:
 +    for pattern in (
 +        r"<h1[^>]*>(.*?)</h1>",
 +        r"<title[^>]*>(.*?)</title>",
 +    ):
 +        match = re.search(pattern, payload, re.IGNORECASE | re.DOTALL)
 +        if not match:
 +            continue
 +        title = re.sub(r"<[^>]+>", " ", match.group(1))
 +        title = _collapse_inline_whitespace(html.unescape(title))
 +        if title:
 +            return title
 +    return None
++
++
 +def _collect_html_file_discovery_fact(
 +    messages: list[Message],
 +    *,
 +    tool_calls_by_id: dict[str, ToolCall],
 +) -> str | None:
 +    filenames: list[str] = []
 +    for message in messages:
 +        if message.role != Role.TOOL or _is_compacted_context_message(message.content):
 +            continue
 +        if any(result.is_error for result in message.tool_results):
 +            continue
++
 +        tool_name = _resolve_tool_name(
 +            message,
 +            tool_calls_by_id=tool_calls_by_id,
 +        )
 +        if tool_name not in {"glob", "bash"}:
 +            continue
++
 +        payload = "\n".join(
 +            result.content.strip()
 +            for result in message.tool_results
 +            if result.content.strip()
 +        ) or message.content
 +        matches = re.findall(r"([A-Za-z0-9_.-]+\.html)", payload)
 +        for name in matches:
 +            if name not in filenames:
 +                filenames.append(name)
++
 +    if len(filenames) < 3:
 +        return None
++
 +    preview = ", ".join(filenames[:6])
 +    if len(filenames) > 6:
 +        preview += ", ..."
 +    return f"Existing files include {preview}"
++
++
 +def _collect_html_verification_gap_fact(
 +    messages: list[Message],
 +    *,
 +    tool_calls_by_id: dict[str, ToolCall],
 +) -> str | None:
 +    gap = _summarize_latest_html_verification_gap(
 +        messages,
 +        tool_calls_by_id=tool_calls_by_id,
 +    )
 +    if not gap:
 +        return None
 +    return f"Verification gaps: {gap}"
++
++
 +def _summarize_latest_html_verification_gap(
 +    messages: list[Message],
 +    *,
 +    max_items: int = 2,
 +    tool_calls_by_id: dict[str, ToolCall] | None = None,
 +) -> str | None:
 +    if tool_calls_by_id is None:
 +        tool_calls_by_id = {
 +            tool_call.id: tool_call
 +            for message in messages
 +            for tool_call in message.tool_calls
 +        }
++
 +    for message in reversed(messages):
 +        if message.role != Role.TOOL or _is_compacted_context_message(message.content):
 +            continue
 +        if not any(result.is_error for result in message.tool_results):
 +            continue
 +        tool_name = _resolve_tool_name(
 +            message,
 +            tool_calls_by_id=tool_calls_by_id,
 +        )
 +        if tool_name != "bash":
 +            continue
++
 +        payload = "\n".join(
 +            result.content.strip()
 +            for result in message.tool_results
 +            if result.content.strip()
 +        ) or message.content
 +        gap = _extract_html_verification_gap(payload, max_items=max_items)
 +        if gap:
 +            return gap
++
 +    return None
++
++
 +def _extract_html_verification_gap(payload: str, *, max_items: int = 2) -> str | None:
 +    missing: list[str] = []
 +    mismatches: list[str] = []
 +    mode: str | None = None
++
 +    for raw_line in payload.splitlines():
 +        line = raw_line.strip()
 +        if not line:
 +            continue
 +        lowered = line.lower()
 +        if lowered == "missing links:":
 +            mode = "missing"
 +            continue
 +        if lowered == "title mismatches:":
 +            mode = "mismatch"
 +            continue
 +        if mode == "missing" and "->" in line:
 +            href = line.split("->", 1)[0].strip()
 +            if href and href not in missing:
 +                missing.append(href)
 +            continue
 +        if mode == "mismatch" and "!=" in line:
 +            if line not in mismatches:
 +                mismatches.append(line)
++
 +    parts: list[str] = []
 +    if missing:
 +        preview = ", ".join(missing[:max_items])
 +        if len(missing) > max_items:
 +            preview += ", ..."
 +        parts.append(f"missing TOC links {preview}")
 +    if mismatches:
 +        preview = ", ".join(mismatches[:max_items])
 +        if len(mismatches) > max_items:
 +            preview += ", ..."
 +        parts.append(f"title mismatches {preview}")
 +    return "; ".join(parts) if parts else None
++
++
  def _summarize_html_file_discovery(payload: str) -> str | None:
      filenames = re.findall(r"([A-Za-z0-9_.-]+\.html)", payload)
      unique_names: list[str] = []
      return f"Existing files include {preview}"
 +def _resolve_tool_name(
 +    message: Message,
 +    *,
 +    tool_calls_by_id: dict[str, ToolCall],
 +) -> str | None:
 +    observed = _observed_tool_name(message.content)
 +    if observed:
 +        return observed
++
 +    for result in message.tool_results:
 +        tool_call = tool_calls_by_id.get(result.tool_call_id)
 +        if tool_call is not None:
 +            return tool_call.name
 +    return None
++
++
  def _choose_target_path(
      messages: list[Message],
      *,

src/loader/runtime/dod.pymodified

      command: str
      passed: bool
 +    skipped: bool = False
      exit_code: int | None = None
      stdout: str = ""
      stderr: str = ""
      line_changes: int = 0
      storage_path: str | None = None
      last_verification_result: str | None = None
 +    last_verification_signature: str | None = None
      verification_attempt_counter: int = 0
      active_verification_attempt_id: str | None = None
      active_verification_attempt_number: int | None = None
              line_changes=int(data.get("line_changes", 0)),
              storage_path=data.get("storage_path"),
              last_verification_result=data.get("last_verification_result"),
 +            last_verification_signature=data.get("last_verification_signature"),
              verification_attempt_counter=int(data.get("verification_attempt_counter", 0)),
              active_verification_attempt_id=data.get("active_verification_attempt_id"),
              active_verification_attempt_number=(
      lines = ["Verification:"]
      for item in evidence:
 -        status = "PASS" if item.passed else "FAIL"
 -        detail = _first_non_empty_line(item.stdout) or _first_non_empty_line(item.stderr)
 +        status = "SKIP" if item.skipped else "PASS" if item.passed else "FAIL"
 +        detail = _summarize_verification_detail(item)
          if detail:
              lines.append(f"- `{item.command}`: {status} ({detail})")
          else:
          task_statement: str,
          *,
          retry_budget: int = 3,
 +        resume_path: Path | str | None = None,
      ) -> DefinitionOfDone:
 -        """Load an unfinished DoD for the same task, or create a new one."""
+-
 -        existing = self.load_latest(task_statement)
 -        if existing is not None and existing.status not in {"done", "failed"}:
 -            return existing
 +        """Resume the active DoD for this session, or create a new one."""
++
 +        if resume_path is not None:
 +            path = Path(resume_path)
 +            if path.exists():
 +                existing = self.load(path)
 +                if (
 +                    existing.task_statement == task_statement
 +                    and existing.status not in {"done", "failed"}
 +                ):
 +                    return existing
          dod = create_definition_of_done(task_statement, retry_budget=retry_budget)
          slug = slugify(task_statement)
      path_literal = repr(str(index_path))
      return "\n".join(
+         [
 -            "/usr/bin/python3 - <<'PY'",
 +            "python3 - <<'PY'",
              "from pathlib import Path",
              "import re",
              "import sys",
          if stripped:
              return stripped[:120]
      return ""
++
++
 +def _summarize_verification_detail(item: VerificationEvidence) -> str:
 +    for candidate in (item.stdout, item.stderr, item.output):
 +        lines = [line.strip() for line in str(candidate).splitlines() if line.strip()]
 +        if not lines:
 +            continue
 +        if len(lines) == 1:
 +            return lines[0][:240]
++
 +        head = lines[0][:120]
 +        tail = [line[:120] for line in lines[1:3]]
 +        if head.endswith(":") and tail:
 +            detail = f"{head} {'; '.join(tail)}"
 +        else:
 +            detail = "; ".join([head, *tail[:1]])
 +        if len(lines) > len(tail) + 1:
 +            detail += "; ..."
 +        return detail[:240]
 +    return ""

src/loader/runtime/explore.pymodified

                  validator=self.context.safeguards.validator,
                  registry=self.registry,
                  rollback_plan=None,
 +                workspace_root=self.context.project_root,
              ),
+         )

src/loader/runtime/finalization.pymodified

                  verification_observations=skip_observations,
+             )
 +        current_verification_signature = _verification_state_signature(dod)
 +        if (
 +            dod.last_verification_result == "failed"
 +            and dod.last_verification_signature
 +            and dod.last_verification_signature == current_verification_signature
 +        ):
 +            summary.verification_status = "failed"
 +            summary.definition_of_done = dod
 +            failed_provenance = _verification_result_provenance(dod, passed=False)
 +            if dod.retry_count >= dod.retry_budget:
 +                dod.status = "failed"
 +                dod.confidence = "low"
 +                self.dod_store.save(dod)
 +                await self.emit_dod_status(emit, dod)
 +                exhausted_response = (
 +                    "I couldn't verify that the task is complete within the retry budget.\n\n"
 +                    f"{build_verification_summary(dod.evidence)}"
 +                )
 +                return CompletionGateResult(
 +                    should_continue=False,
 +                    reason_code="verification_retry_budget_exhausted",
 +                    reason_summary="stopped after verification retry budget was exhausted",
 +                    final_response=exhausted_response,
 +                    evidence_provenance=failed_provenance,
 +                    verification_observations=_verification_result_observations(
 +                        dod,
 +                        passed=False,
 +                        attempt_id=dod.active_verification_attempt_id,
 +                        attempt_number=dod.active_verification_attempt_number,
 +                    ),
 +                )
 +            repair_prompt = (
 +                "[DEFINITION OF DONE CHECK STILL FAILING]\n"
 +                f"Task: {dod.task_statement}\n"
 +                "No new file changes were made since the last failed verification.\n\n"
 +                f"{build_verification_summary(dod.evidence)}\n\n"
 +                f"{_build_verification_repair_guidance(dod)}\n\n"
 +                "Apply a concrete edit or patch before trying to finish again."
 +            )
 +            self.context.session.append(Message(role=Role.USER, content=repair_prompt))
 +            return CompletionGateResult(
 +                should_continue=True,
 +                reason_code="verification_failed_no_new_changes",
 +                reason_summary=(
 +                    "continued because verification already failed and no new "
 +                    "mutating changes were made before trying to finish again"
 +                ),
 +                final_response="",
 +                evidence_provenance=failed_provenance,
 +                verification_observations=_verification_result_observations(
 +                    dod,
 +                    passed=False,
 +                    attempt_id=dod.active_verification_attempt_id,
 +                    attempt_number=dod.active_verification_attempt_number,
 +                ),
 +            )
++
          verify_item = "Collect verification evidence"
          if verify_item not in dod.pending_items and verify_item not in dod.completed_items:
              dod.pending_items.append(verify_item)
              f"Attempt: {dod.retry_count}/{dod.retry_budget}\n"
              f"Pending items: {', '.join(dod.pending_items)}\n\n"
              f"{build_verification_summary(dod.evidence)}\n\n"
 +            f"{_build_verification_repair_guidance(dod)}\n\n"
              "Fix the failures above, then finish the task again."
+         )
          self.context.session.append(Message(role=Role.USER, content=failure_prompt))
          """Collect verification evidence for one DoD."""
          dod.status = "verifying"
 +        dod.last_verification_signature = _verification_state_signature(dod)
          self.dod_store.save(dod)
          await self.emit_dod_status(emit, dod)
          attempt = ensure_active_verification_attempt(dod)
                  output=outcome.result_output,
                  kind=_classify_verification_kind(command),
+             )
 +            evidence = _maybe_mark_optional_verification_skip(evidence)
              dod.evidence.append(evidence)
              observation = _verification_observation_from_evidence(
                  evidence,
              append_verification_timeline_entry(
                  self.context,
                  summary,
 -                reason_code=(
 -                    "verification_command_passed"
 -                    if evidence.passed
 -                    else "verification_command_failed"
 -                ),
 -                reason_summary=(
 -                    f"verification passed for `{command}`"
 -                    if evidence.passed
 -                    else f"verification failed for `{command}`"
 -                ),
 +                reason_code=_verification_timeline_reason_code(evidence),
 +                reason_summary=_verification_timeline_reason_summary(evidence),
                  evidence_provenance=provenance,
                  verification_observations=[observation],
+             )
 -            all_passed = all_passed and evidence.passed
 +            all_passed = all_passed and (evidence.passed or evidence.skipped)
              summary.tool_result_messages.append(outcome.message)
              self.context.session.append(outcome.message)
      command = evidence.command or "verification"
      return VerificationObservation(
          status=(
 -            VerificationObservationStatus.PASSED.value
 +            VerificationObservationStatus.SKIPPED.value
 +            if evidence.skipped
 +            else VerificationObservationStatus.PASSED.value
              if evidence.passed
              else VerificationObservationStatus.FAILED.value
          ),
 -        summary=(
 -            f"verification passed for `{command}`"
 -            if evidence.passed
 -            else f"verification failed for `{command}`"
 -        ),
 +        summary=_verification_timeline_reason_summary(evidence),
          command=evidence.command or None,
          kind=evidence.kind,
          exit_code=evidence.exit_code,
          EvidenceProvenance(
              category="verification",
              source="dod.evidence",
 -            summary=(
 -                f"verification passed for `{command}`"
 -                if evidence.passed
 -                else f"verification failed for `{command}`"
 -            ),
 +            summary=_verification_timeline_reason_summary(evidence),
              status=(
 -                EvidenceProvenanceStatus.SUPPORTS.value
 +                EvidenceProvenanceStatus.CONTEXT.value
 +                if evidence.skipped
 +                else EvidenceProvenanceStatus.SUPPORTS.value
                  if evidence.passed
                  else EvidenceProvenanceStatus.CONTRADICTS.value
              ),
      return None
 +def _verification_timeline_reason_code(evidence: VerificationEvidence) -> str:
 +    if evidence.skipped:
 +        return "verification_command_skipped"
 +    if evidence.passed:
 +        return "verification_command_passed"
 +    return "verification_command_failed"
++
++
 +def _verification_timeline_reason_summary(evidence: VerificationEvidence) -> str:
 +    command = evidence.command or "verification"
 +    if evidence.skipped:
 +        return f"verification skipped for `{command}`"
 +    if evidence.passed:
 +        return f"verification passed for `{command}`"
 +    return f"verification failed for `{command}`"
++
++
 +def _maybe_mark_optional_verification_skip(
 +    evidence: VerificationEvidence,
 +) -> VerificationEvidence:
 +    detail = "\n".join(
 +        part for part in (evidence.stderr, evidence.output) if str(part).strip()
 +    ).lower()
 +    command = (evidence.command or "").lower()
 +    if (
 +        not evidence.passed
 +        and evidence.exit_code == 127
 +        and "command not found" in detail
 +        and "html5validator" in command
 +    ):
 +        evidence.skipped = True
 +    return evidence
++
++
 +def _verification_state_signature(dod: DefinitionOfDone) -> str:
 +    touched = "|".join(sorted(set(dod.touched_files)))
 +    commands = "|".join(sorted(set(dod.successful_commands)))
 +    return (
 +        f"lines={dod.line_changes}"
 +        f";touched={touched}"
 +        f";actions={len(dod.mutating_actions)}"
 +        f";commands={commands}"
 +    )
++
++
 +def _build_verification_repair_guidance(dod: DefinitionOfDone) -> str:
 +    fixes = _extract_verification_repairs(dod.evidence)
 +    if not fixes:
 +        return (
 +            "Use the failed verification evidence directly, avoid rereading unrelated "
 +            "files, and fix the target file before retrying."
 +        )
++
 +    return "\n".join(
 +        [
 +            "Repair focus:",
 +            *[f"- {item}" for item in fixes],
 +            "- Reuse these exact failures instead of restarting discovery from earlier chapters.",
 +        ]
 +    )
++
++
 +def _extract_verification_repairs(
 +    evidence_items: list[VerificationEvidence],
 +) -> list[str]:
 +    fixes: list[str] = []
 +    for evidence in evidence_items:
 +        for candidate in (evidence.stderr, evidence.output, evidence.stdout):
 +            missing, mismatches = _parse_verification_failures(str(candidate))
 +            for href in missing:
 +                item = f"Fix the missing TOC href `{href}` in `index.html`."
 +                if item not in fixes:
 +                    fixes.append(item)
 +            for mismatch in mismatches:
 +                item = f"Fix the TOC label mismatch `{mismatch}`."
 +                if item not in fixes:
 +                    fixes.append(item)
 +    return fixes
++
++
 +def _parse_verification_failures(text: str) -> tuple[list[str], list[str]]:
 +    missing: list[str] = []
 +    mismatches: list[str] = []
 +    mode: str | None = None
++
 +    for raw_line in text.splitlines():
 +        line = raw_line.strip()
 +        if not line:
 +            continue
 +        lowered = line.lower()
 +        if lowered == "missing links:":
 +            mode = "missing"
 +            continue
 +        if lowered == "title mismatches:":
 +            mode = "mismatch"
 +            continue
 +        if mode == "missing" and "->" in line:
 +            href = line.split("->", 1)[0].strip()
 +            if href and href not in missing:
 +                missing.append(href)
 +            continue
 +        if mode == "mismatch" and "!=" in line and line not in mismatches:
 +            mismatches.append(line)
++
 +    return missing, mismatches
++
++
  def _classify_verification_kind(command: str) -> str:
      """Classify the verification command into a summary kind."""

src/loader/runtime/hooks.pymodified

  from collections.abc import Iterable
  from dataclasses import dataclass, field
  from enum import StrEnum
 +from pathlib import Path
  from typing import Any, Protocol
  from ..llm.base import ToolCall
                  updated_arguments.pop(cleanup_key, None)
              return HookResult(updated_arguments=updated_arguments)
 +        if context.tool_call.name == "glob":
 +            normalized_arguments = self._normalize_glob_pattern_path(arguments)
 +            if normalized_arguments is not None:
 +                return HookResult(updated_arguments=normalized_arguments)
++
          return HookResult()
 +    def _normalize_glob_pattern_path(
 +        self,
 +        arguments: dict[str, Any],
 +    ) -> dict[str, Any] | None:
 +        pattern = str(arguments.get("pattern", "")).strip()
 +        if not pattern or not pattern.startswith(("/", "~", "./", "../")):
 +            return None
++
 +        pattern_path = Path(pattern)
 +        parent = str(pattern_path.parent).strip()
 +        basename = pattern_path.name.strip()
 +        if not parent or not basename:
 +            return None
 +        if any(token in parent for token in ("*", "?", "[")):
 +            return None
++
 +        updated_arguments = dict(arguments)
 +        updated_arguments["path"] = parent
 +        updated_arguments["pattern"] = basename
 +        return updated_arguments
++
++
 +class RelativePathContextHook(BaseToolHook):
 +    """Recover relative file/search paths against recently-used external directories."""
++
 +    _FILE_TOOLS = frozenset({"read", "write", "edit", "patch"})
 +    _SEARCH_TOOLS = frozenset({"glob", "grep"})
++
 +    def __init__(self, action_tracker: ActionTracker, workspace_root: Path) -> None:
 +        self.action_tracker = action_tracker
 +        self.workspace_root = workspace_root.expanduser().resolve()
++
 +    async def pre_tool_use(self, context: HookContext) -> HookResult:
 +        argument_key = self._argument_key(context.tool_call.name)
 +        if argument_key is None:
 +            return HookResult()
++
 +        arguments = context.tool_call.arguments
 +        raw_path = str(arguments.get(argument_key, "")).strip()
 +        if not raw_path or raw_path.startswith(("/", "~")):
 +            return HookResult()
++
 +        resolved = self._resolve_recent_context_path(
 +            raw_path,
 +            require_existing=True,
 +        )
 +        if resolved is None:
 +            return HookResult()
++
 +        updated_arguments = dict(arguments)
 +        updated_arguments[argument_key] = resolved
 +        return HookResult(updated_arguments=updated_arguments)
++
 +    def _argument_key(self, tool_name: str) -> str | None:
 +        if tool_name in self._FILE_TOOLS:
 +            return "file_path"
 +        if tool_name in self._SEARCH_TOOLS:
 +            return "path"
 +        return None
++
 +    def _resolve_recent_context_path(
 +        self,
 +        raw_path: str,
 +        *,
 +        require_existing: bool,
 +    ) -> str | None:
 +        workspace_candidate = (self.workspace_root / raw_path).expanduser()
 +        if workspace_candidate.exists():
 +            return None
++
 +        for base_dir in self.action_tracker.recent_path_contexts():
 +            candidate = (Path(base_dir) / raw_path).expanduser()
 +            if require_existing:
 +                if candidate.exists():
 +                    return str(candidate)
 +                continue
 +            if candidate.exists() or candidate.parent.exists():
 +                return str(candidate)
 +        return None
++
  class HookManager:
      """Runs tool hooks across Loader's three lifecycle events."""
      validator: PreActionValidator,
      registry: ToolRegistry,
      rollback_plan: RollbackPlan | None,
 +    workspace_root: Path,
  ) -> HookManager:
      """Build Loader's default tool hook stack for one runtime turn."""
+         [
              FilePathAliasHook(),
              SearchPathAliasHook(),
 +            RelativePathContextHook(action_tracker, workspace_root),
              DuplicateActionHook(action_tracker),
              ActionValidationHook(validator),
              RollbackTrackingHook(registry, rollback_plan),

src/loader/runtime/parsing.pymodified

  import json
  import re
 +import shlex
  from collections.abc import Iterable
  from dataclasses import dataclass
      is_final_answer: bool = False
 +_TOOL_NAME_ALIASES = {
 +    "bashcommand": "bash",
 +    "editfile": "edit",
 +    "globfile": "glob",
 +    "globfiles": "glob",
 +    "patchfile": "patch",
 +    "readfile": "read",
 +    "writefile": "write",
 +}
++
++
  def _extract_arguments(data: dict) -> dict:
      """Extract arguments from tool call data, handling various key names."""
      return {name.casefold(): name for name in allowed_tool_names}
 +def _normalized_tool_key(name: str) -> str:
 +    """Collapse separators and case so near-miss tool names can still match."""
++
 +    return re.sub(r"[^a-z0-9]+", "", name.casefold())
++
++
 +def _normalized_allowed_tool_map(tool_names: dict[str, str] | None) -> dict[str, str] | None:
 +    """Build a separator-insensitive tool-name map."""
++
 +    if tool_names is None:
 +        return None
 +    return {
 +        _normalized_tool_key(canonical_name): canonical_name
 +        for canonical_name in tool_names.values()
 +    }
++
++
  def _canonicalize_tool_name(
      name: str,
      tool_names: dict[str, str] | None,
      if tool_names is None:
          return name.lower() if lowercase_default else name
 -    return tool_names.get(name.casefold())
++
 +    direct_match = tool_names.get(name.casefold())
 +    if direct_match is not None:
 +        return direct_match
++
 +    normalized_allowed = _normalized_allowed_tool_map(tool_names)
 +    if normalized_allowed is None:
 +        return None
++
 +    normalized_name = _normalized_tool_key(name)
 +    normalized_match = normalized_allowed.get(normalized_name)
 +    if normalized_match is not None:
 +        return normalized_match
++
 +    alias_target = _TOOL_NAME_ALIASES.get(normalized_name)
 +    if alias_target is None:
 +        return None
++
 +    direct_alias_match = tool_names.get(alias_target.casefold())
 +    if direct_alias_match is not None:
 +        return direct_alias_match
 +    return normalized_allowed.get(_normalized_tool_key(alias_target))
++
++
 +def canonicalize_tool_name(
 +    name: str,
 +    *,
 +    allowed_tool_names: Iterable[str] | None = None,
 +    lowercase_default: bool = False,
 +) -> str | None:
 +    """Public helper for backend/native tool-call normalization."""
++
 +    return _canonicalize_tool_name(
 +        name,
 +        _tool_name_map(allowed_tool_names),
 +        lowercase_default=lowercase_default,
 +    )
  def _extract_json_tool_calls(
      return tool_calls, spans
 +def _parse_fenced_tool_arguments(
 +    tool_name: str,
 +    command_line: str,
 +) -> dict[str, str] | None:
 +    """Convert one simple fenced command line into Loader tool arguments."""
++
 +    try:
 +        argv = shlex.split(command_line)
 +    except ValueError:
 +        return None
 +    if len(argv) < 2:
 +        return None
++
 +    payload = command_line[len(argv[0]) :].strip()
 +    if tool_name == "read" and len(argv) == 2:
 +        return {"file_path": argv[1]}
 +    if tool_name == "glob" and len(argv) == 2:
 +        return {"pattern": argv[1]}
 +    if tool_name == "bash" and payload:
 +        return {"command": payload}
 +    return None
++
++
 +def _extract_fenced_command_tool_calls(
 +    text: str,
 +    tool_names: dict[str, str] | None = None,
 +) -> tuple[list[ToolCall], list[tuple[int, int]]]:
 +    """Recover simple one-line fenced tool commands from local-model prose."""
++
 +    fence_pattern = r"```(?:[^\n`]*)\n(.*?)```"
 +    tool_calls: list[ToolCall] = []
 +    spans: list[tuple[int, int]] = []
++
 +    for match in re.finditer(fence_pattern, text, re.DOTALL):
 +        body = match.group(1).strip()
 +        if not body or "\n" in body:
 +            continue
 +        raw_name = body.split(None, 1)[0]
 +        canonical_name = _canonicalize_tool_name(
 +            raw_name,
 +            tool_names,
 +            lowercase_default=True,
 +        )
 +        if canonical_name is None:
 +            continue
 +        arguments = _parse_fenced_tool_arguments(canonical_name, body)
 +        if not arguments:
 +            continue
 +        tool_calls.append(
 +            ToolCall(
 +                id=f"call_{len(tool_calls)}",
 +                name=canonical_name,
 +                arguments=arguments,
 +            )
 +        )
 +        spans.append(match.span())
++
 +    return tool_calls, spans
++
++
  def parse_tool_calls(
      text: str,
      *,
          if tool_calls:
              content = re.sub(bracket_pattern, "", content, flags=re.IGNORECASE)
 +    if not tool_calls:
 +        fenced_calls, fenced_spans = _extract_fenced_command_tool_calls(
 +            text,
 +            tool_names,
 +        )
 +        if fenced_calls:
 +            tool_calls = fenced_calls
 +            content = _remove_spans(content, fenced_spans)
++
      if is_final:
          content = final_content

src/loader/runtime/prompting.pymodified

  - For servers, watchers, preview commands, or anything else that keeps running,
    call `bash` with `background=true`, then inspect it with `bash_wait` or
    `bash_jobs` instead of blocking the turn in the foreground
 +- Prefer `edit`/`patch`/`write` over shell one-liners like `sed -i`, `perl -pi`,
 +  or heredoc rewrites when modifying text files
  - If the task names an external directory like `~/Loader/...`, keep operating on
    that exact path instead of falling back to the repo cwd; file tools accept
    absolute and `~` paths, and `glob` works best with `path="~/Loader/..."`

src/loader/runtime/recovery.pymodified

  from pathlib import Path
  from typing import Any
 +from .safeguard_services import extract_shell_text_rewrite_target
++
  class ErrorCategory(Enum):
      """Categories of errors for recovery strategies."""
          if tool_name == "bash":
              command = str(args.get("command", ""))
 +            if extract_shell_text_rewrite_target(command) is not None:
 +                return True
              mutating_tokens = (
                  "git commit",
                  "git add",
      return ErrorCategory.UNKNOWN
 -def get_recovery_hints(category: ErrorCategory, tool_name: str) -> str:
 +def get_recovery_hints(
 +    category: ErrorCategory,
 +    tool_name: str,
 +    args: dict[str, Any] | None = None,
 +) -> str:
      """Get hints for recovering from a specific error category."""
      hints = {
      if tool_name == "bash" and category == ErrorCategory.COMMAND_NOT_FOUND:
          category_hints = ["Check if installed: bash(which <command>)"] + category_hints
 +    rewrite_target = extract_shell_text_rewrite_target(str((args or {}).get("command", "")))
 +    if tool_name == "bash" and rewrite_target is not None:
 +        category_hints = [
 +            f"Switch to edit/patch/write for `{rewrite_target}` instead of shell rewriting it",
 +            "Reuse the evidence you already gathered and apply the file change directly",
 +            "If the exact replacement span is unclear, read just the target file and then edit it",
 +        ] + category_hints
++
      return "\n".join(f"- {hint}" for hint in category_hints)
      """Format a prompt asking the LLM to recover from an error."""
      category = categorize_error(error)
 -    hints = get_recovery_hints(category, tool_name)
 +    hints = get_recovery_hints(category, tool_name, args)
      args_str = ", ".join(f"{key}={value!r}" for key, value in args.items())
      return RECOVERY_PROMPT.format(

src/loader/runtime/safeguard_services.pymodified

  import re
  import shlex
 +from difflib import get_close_matches
  from dataclasses import dataclass
  from pathlib import Path
 +TEXT_REWRITE_SUFFIXES = frozenset(
 +    {
 +        ".c",
 +        ".cc",
 +        ".cpp",
 +        ".css",
 +        ".csv",
 +        ".go",
 +        ".h",
 +        ".hpp",
 +        ".html",
 +        ".htm",
 +        ".java",
 +        ".js",
 +        ".json",
 +        ".jsx",
 +        ".md",
 +        ".py",
 +        ".rb",
 +        ".rs",
 +        ".sh",
 +        ".sql",
 +        ".svg",
 +        ".toml",
 +        ".ts",
 +        ".tsx",
 +        ".txt",
 +        ".xml",
 +        ".yaml",
 +        ".yml",
 +    }
 +)
 +TEXT_REWRITE_FILENAMES = frozenset(
 +    {
 +        "dockerfile",
 +        "index.html",
 +        "makefile",
 +        "package.json",
 +        "pyproject.toml",
 +        "readme",
 +        "readme.md",
 +    }
 +)
++
++
 +def _strip_shell_token(token: str) -> str:
 +    return token.strip().strip("\"'").rstrip(";|&")
++
++
 +def _looks_like_text_rewrite_target(token: str) -> bool:
 +    candidate = _strip_shell_token(token)
 +    if not candidate or candidate in {"-", "/dev/null"}:
 +        return False
 +    if candidate.startswith("-"):
 +        return False
 +    lowered = Path(candidate).name.lower()
 +    if lowered in TEXT_REWRITE_FILENAMES:
 +        return True
 +    return Path(candidate).suffix.lower() in TEXT_REWRITE_SUFFIXES
++
++
 +def _extract_redirect_target(argv: list[str]) -> str | None:
 +    for index, token in enumerate(argv):
 +        if token in {">", ">>"} and index + 1 < len(argv):
 +            candidate = argv[index + 1]
 +            if _looks_like_text_rewrite_target(candidate):
 +                return _strip_shell_token(candidate)
 +        if token == "tee":
 +            for candidate in argv[index + 1 :]:
 +                if candidate.startswith("-"):
 +                    continue
 +                if _looks_like_text_rewrite_target(candidate):
 +                    return _strip_shell_token(candidate)
 +                break
 +    return None
++
++
 +def extract_shell_text_rewrite_target(command: str) -> str | None:
 +    """Return the target file when bash is used as a brittle text editor."""
++
 +    normalized = " ".join(str(command or "").split())
 +    if not normalized:
 +        return None
++
 +    try:
 +        argv = shlex.split(normalized)
 +    except ValueError:
 +        argv = []
++
 +    if argv:
 +        for index, token in enumerate(argv):
 +            if token == "sed" and any(part.startswith("-i") for part in argv[index + 1 :]):
 +                for candidate in reversed(argv[index + 1 :]):
 +                    if _looks_like_text_rewrite_target(candidate):
 +                        return _strip_shell_token(candidate)
 +            if token == "perl" and any(
 +                part.startswith("-p") or part.startswith("-0p") for part in argv[index + 1 :]
 +            ):
 +                for candidate in reversed(argv[index + 1 :]):
 +                    if _looks_like_text_rewrite_target(candidate):
 +                        return _strip_shell_token(candidate)
++
 +        redirect_target = _extract_redirect_target(argv)
 +        if redirect_target is not None:
 +            return redirect_target
++
 +    regex_match = re.search(
 +        r"(?:sed\s+-i(?:\s+''|\s+\"\"|\s+'[^']*'|\s+\"[^\"]*\")?.*?|perl\s+-[0-9]*p[i0-9-]*.*?)\s+([^\s\"';|&]+(?:\.[A-Za-z0-9]+)?)",
 +        normalized,
 +    )
 +    if regex_match:
 +        candidate = _strip_shell_token(regex_match.group(1))
 +        if _looks_like_text_rewrite_target(candidate):
 +            return candidate
++
 +    redirect_match = re.search(r"(?:>>?|tee(?:\s+-a)?)\s+([^\s\"';|&]+)", normalized)
 +    if redirect_match:
 +        candidate = _strip_shell_token(redirect_match.group(1))
 +        if _looks_like_text_rewrite_target(candidate):
 +            return candidate
++
 +    return None
++
++
 +def extract_html_title_from_text(payload: str) -> str | None:
 +    """Extract one human-readable HTML title from raw file contents."""
++
 +    for pattern in (r"<h1[^>]*>(.*?)</h1>", r"<title[^>]*>(.*?)</title>"):
 +        match = re.search(pattern, payload, re.IGNORECASE | re.DOTALL)
 +        if not match:
 +            continue
 +        title = re.sub(r"<[^>]+>", " ", match.group(1))
 +        normalized = " ".join(title.split()).strip()
 +        if normalized:
 +            return normalized
 +    return None
++
++
 +def read_html_title(path: Path) -> str:
 +    """Read one HTML file title for inventory and validation helpers."""
++
 +    try:
 +        return extract_html_title_from_text(path.read_text()) or ""
 +    except OSError:
 +        return ""
++
++
 +def format_html_inventory_entry(root: Path, candidate: Path) -> str:
 +    """Format one exact href/title pair for model-facing guidance."""
++
 +    normalized_root = root.expanduser().resolve(strict=False)
 +    normalized_candidate = candidate.expanduser().resolve(strict=False)
 +    try:
 +        href = str(normalized_candidate.relative_to(normalized_root))
 +    except ValueError:
 +        href = normalized_candidate.name
 +    title = read_html_title(candidate)
 +    if title:
 +        return f"{href} = {title}"
 +    return href
++
++
 +def _collect_html_inventory_entries(index_path: str | Path) -> list[tuple[str, str]]:
 +    """Return exact href/title pairs for sibling HTML chapters."""
++
 +    index = Path(index_path).expanduser()
 +    if index.name != "index.html":
 +        return []
++
 +    chapters_dir = index.parent / "chapters"
 +    if not chapters_dir.is_dir():
 +        return []
++
 +    entries: list[tuple[str, str]] = []
 +    for candidate in sorted(chapters_dir.glob("*.html")):
 +        if not candidate.is_file():
 +            continue
 +        title = read_html_title(candidate)
 +        if not title:
 +            continue
 +        href = format_html_inventory_entry(index.parent, candidate).split(" = ", 1)[0]
 +        entries.append((href, title))
 +    return entries
++
++
 +def summarize_html_inventory(
 +    index_path: str | Path,
 +    *,
 +    limit: int | None = 12,
 +) -> str | None:
 +    """Summarize the existing sibling HTML inventory for one index page."""
++
 +    index = Path(index_path).expanduser()
 +    if index.name != "index.html":
 +        return None
++
 +    entries = [f"{href} = {title}" for href, title in _collect_html_inventory_entries(index)]
 +    if not entries:
 +        return None
++
 +    if limit is not None and len(entries) > limit:
 +        return "; ".join(entries[:limit]) + "; ..."
 +    return "; ".join(entries)
++
++
 +def extract_html_toc_excerpt(
 +    index_path: str | Path,
 +    *,
 +    max_lines: int = 16,
 +) -> str | None:
 +    """Extract the current HTML table-of-contents block for recovery guidance."""
++
 +    index = Path(index_path).expanduser()
 +    if index.name != "index.html":
 +        return None
++
 +    try:
 +        text = index.read_text()
 +    except OSError:
 +        return None
++
 +    match = re.search(
 +        r"(<h2[^>]*>\s*Table of Contents\s*</h2>.*?</ul>)",
 +        text,
 +        re.IGNORECASE | re.DOTALL,
 +    )
 +    if not match:
 +        match = re.search(
 +            r"(<ul[^>]*class=\"[^\"]*chapter-list[^\"]*\"[^>]*>.*?</ul>)",
 +            text,
 +            re.IGNORECASE | re.DOTALL,
 +        )
 +    if not match:
 +        return None
++
 +    snippet_lines = [line.rstrip() for line in match.group(1).splitlines() if line.strip()]
 +    if not snippet_lines:
 +        return None
 +    if len(snippet_lines) > max_lines:
 +        snippet_lines = snippet_lines[:max_lines] + ["..."]
 +    return "\n".join(snippet_lines)
++
++
 +def build_html_toc_replacement_block(index_path: str | Path) -> str | None:
 +    """Build one exact replacement TOC block from the verified sibling inventory."""
++
 +    entries = _collect_html_inventory_entries(index_path)
 +    if not entries:
 +        return None
++
 +    excerpt = extract_html_toc_excerpt(index_path, max_lines=64)
 +    excerpt_lines = excerpt.splitlines() if excerpt else []
++
 +    heading_line = next(
 +        (line.rstrip() for line in excerpt_lines if "<h2" in line.lower()),
 +        "<h2>Table of Contents</h2>",
 +    )
 +    ul_line = next(
 +        (
 +            line.rstrip()
 +            for line in excerpt_lines
 +            if "<ul" in line.lower() and "chapter-list" in line.lower()
 +        ),
 +        '        <ul class="chapter-list">',
 +    )
 +    li_indent = next(
 +        (
 +            re.match(r"^\s*", line).group(0)
 +            for line in excerpt_lines
 +            if "<li><a " in line
 +        ),
 +        re.match(r"^\s*", ul_line).group(0) + "    ",
 +    )
 +    closing_line = next(
 +        (line.rstrip() for line in excerpt_lines if "</ul>" in line.lower()),
 +        f"{re.match(r'^\s*', ul_line).group(0)}</ul>",
 +    )
++
 +    lines = [heading_line, ul_line]
 +    lines.extend(
 +        f'{li_indent}<li><a href="{href}">{title}</a></li>'
 +        for href, title in entries
 +    )
 +    lines.append(closing_line)
 +    return "\n".join(lines)
++
++
 +def build_html_toc_edit_call_template(index_path: str | Path) -> str | None:
 +    """Build one concrete `edit(...)` template for replacing the TOC block."""
++
 +    index = Path(index_path).expanduser()
 +    excerpt = extract_html_toc_excerpt(index, max_lines=64)
 +    replacement = build_html_toc_replacement_block(index)
 +    if not excerpt or not replacement:
 +        return None
++
 +    return "\n".join(
 +        [
 +            "edit(",
 +            f'  file_path="{index}",',
 +            '  old_string="""',
 +            excerpt,
 +            '""",',
 +            '  new_string="""',
 +            replacement,
 +            '"""',
 +            ")",
 +        ]
 +    )
++
++
 +@dataclass(frozen=True)
 +class HtmlTocValidationResult:
 +    """Semantic validation result for one chapter-list table of contents."""
++
 +    valid: bool
 +    link_count: int
 +    missing: tuple[str, ...] = ()
 +    mismatched: tuple[str, ...] = ()
++
++
 +def validate_html_toc(index_path: str | Path) -> HtmlTocValidationResult | None:
 +    """Validate that one HTML index TOC points at real chapter files with matching titles."""
++
 +    index = Path(index_path).expanduser()
 +    if index.name != "index.html":
 +        return None
++
 +    try:
 +        text = index.read_text()
 +    except OSError:
 +        return None
++
 +    section_match = re.search(r'<ul class="chapter-list">(.*?)</ul>', text, re.S)
 +    if section_match is None:
 +        return HtmlTocValidationResult(
 +            valid=False,
 +            link_count=0,
 +            missing=("Missing chapter-list table of contents",),
 +        )
++
 +    links = re.findall(r'<a href="([^"]+)">([^<]+)</a>', section_match.group(1))
 +    if not links:
 +        return HtmlTocValidationResult(
 +            valid=False,
 +            link_count=0,
 +            missing=("No chapter links found in table of contents",),
 +        )
++
 +    root = index.parent
 +    missing: list[str] = []
 +    mismatched: list[str] = []
 +    for href, label in links:
 +        target = (root / href).expanduser().resolve(strict=False)
 +        if not target.exists():
 +            missing.append(f"{href} -> missing")
 +            continue
 +        title = read_html_title(target)
 +        if title and label.strip() != title:
 +            mismatched.append(f"{href} -> {label.strip()} != {title}")
++
 +    return HtmlTocValidationResult(
 +        valid=not missing and not mismatched,
 +        link_count=len(links),
 +        missing=tuple(missing),
 +        mismatched=tuple(mismatched),
 +    )
++
++
  class ActionTracker:
      """Tracks completed actions to prevent duplicates and detect loops."""
      READ_REPEAT_THRESHOLD = 3
      SEARCH_REPEAT_THRESHOLD = 2
      BASH_OBSERVATION_REPEAT_THRESHOLD = 2
 +    HTML_CHAPTER_EVIDENCE_THRESHOLD = 3
 +    RECENT_PATH_CONTEXT_LIMIT = 12
      def __init__(self) -> None:
          self._file_writes: dict[str, list[str]] = {}
          self._recent_reads: dict[str, tuple[int, int, int]] = {}
          self._recent_searches: dict[str, tuple[int, int, int]] = {}
          self._recent_bash_observations: dict[str, tuple[int, int, int]] = {}
 +        self._recent_html_directory_reads: dict[str, tuple[int, set[str]]] = {}
 +        self._recent_path_contexts: list[str] = []
 +        self._validated_html_tocs: dict[str, int] = {}
 +        self._verified_html_inventory_dirs: set[str] = set()
      def reset(self) -> None:
          self._file_writes.clear()
          self._recent_reads.clear()
          self._recent_searches.clear()
          self._recent_bash_observations.clear()
 +        self._recent_html_directory_reads.clear()
 +        self._recent_path_contexts.clear()
 +        self._validated_html_tocs.clear()
 +        self._verified_html_inventory_dirs.clear()
      def _normalize_path(self, path: str) -> str:
          expanded = Path(path).expanduser()
      def record_mkdir(self, dir_path: str) -> None:
          self._dirs_created.add(self._normalize_path(dir_path))
 +    def recent_path_contexts(self) -> list[str]:
 +        return list(self._recent_path_contexts)
++
 +    def note_validated_html_toc(self, index_path: str) -> None:
 +        """Record that one index currently satisfies the semantic chapter-link check."""
++
 +        normalized = self._normalize_path(index_path)
 +        if Path(normalized).name != "index.html":
 +            return
 +        self._validated_html_tocs[normalized] = self._mutation_epoch
++
 +    def note_verified_html_inventory(self, index_path: str) -> None:
 +        """Record that one sibling chapter inventory is already known exactly."""
++
 +        normalized = self._normalize_path(index_path)
 +        path = Path(normalized)
 +        chapters_dir = path if path.name == "chapters" else path.parent / "chapters"
 +        self._verified_html_inventory_dirs.add(self._normalize_path(str(chapters_dir)))
++
      def check_tool_call(self, tool_name: str, arguments: dict) -> tuple[bool, str]:
          if tool_name == "write":
              file_path = arguments.get("file_path", "")
                      return True, f"Same patch already applied to: {file_path}"
          elif tool_name == "read":
 +            inventory_duplicate, inventory_reason = self._check_verified_html_inventory_observation(
 +                tool_name,
 +                arguments,
 +            )
 +            if inventory_duplicate:
 +                return True, inventory_reason
 +            validated_duplicate, validated_reason = self._check_validated_html_toc_observation(
 +                tool_name,
 +                arguments,
 +            )
 +            if validated_duplicate:
 +                return True, validated_reason
              read_key = self._make_read_key(arguments)
              if read_key:
 +                sufficiency_duplicate, sufficiency_reason = (
 +                    self._check_html_observation_sufficiency(
 +                        tool_name,
 +                        arguments,
 +                    )
 +                )
 +                if sufficiency_duplicate:
 +                    return True, sufficiency_reason
                  duplicate, reason = self._check_recent_observation(
                      self._recent_reads,
                      read_key,
                      return True, reason
          elif tool_name in {"glob", "grep"}:
 +            inventory_duplicate, inventory_reason = self._check_verified_html_inventory_observation(
 +                tool_name,
 +                arguments,
 +            )
 +            if inventory_duplicate:
 +                return True, inventory_reason
 +            validated_duplicate, validated_reason = self._check_validated_html_toc_observation(
 +                tool_name,
 +                arguments,
 +            )
 +            if validated_duplicate:
 +                return True, validated_reason
              observation_key = self._make_search_key(tool_name, arguments)
              if observation_key:
 +                sufficiency_duplicate, sufficiency_reason = (
 +                    self._check_html_observation_sufficiency(
 +                        tool_name,
 +                        arguments,
 +                    )
 +                )
 +                if sufficiency_duplicate:
 +                    return True, sufficiency_reason
                  duplicate, reason = self._check_recent_observation(
                      self._recent_searches,
                      observation_key,
          elif tool_name == "bash":
              command = str(arguments.get("command", "")).strip()
              if self._is_observational_bash(command):
 +                inventory_duplicate, inventory_reason = self._check_verified_html_inventory_observation(
 +                    tool_name,
 +                    arguments,
 +                )
 +                if inventory_duplicate:
 +                    return True, inventory_reason
 +                validated_duplicate, validated_reason = self._check_validated_html_toc_observation(
 +                    tool_name,
 +                    arguments,
 +                )
 +                if validated_duplicate:
 +                    return True, validated_reason
                  duplicate, reason = self._check_recent_observation(
                      self._recent_bash_observations,
                      self._normalize_command(command),
              content = arguments.get("content", "")
              if file_path:
                  self.record_file_create(file_path, content)
 +                self._record_path_context(file_path)
 +                self._clear_verified_html_inventory_for_path(file_path)
                  self._note_mutation()
          elif tool_name == "edit":
              new_string = arguments.get("new_string", "")
              if file_path:
                  self.record_edit(file_path, old_string, new_string)
 +                self._record_path_context(file_path)
 +                self._clear_verified_html_inventory_for_path(file_path)
                  self._note_mutation()
          elif tool_name == "patch":
                      self.record_edit(file_path, str(hunks), "structured_patch")
                  elif isinstance(raw_patch, str) and raw_patch.strip():
                      self.record_edit(file_path, raw_patch, "raw_patch")
 +                self._record_path_context(file_path)
 +                self._clear_verified_html_inventory_for_path(file_path)
                  self._note_mutation()
          elif tool_name == "read":
                      self._recent_reads,
                      read_key,
+                 )
 +            file_path = str(arguments.get("file_path", "")).strip()
 +            if file_path:
 +                self._record_path_context(file_path)
 +            self._record_html_directory_read(arguments)
          elif tool_name in {"glob", "grep"}:
              observation_key = self._make_search_key(tool_name, arguments)
                      self._recent_searches,
                      observation_key,
+                 )
 +            search_path = str(arguments.get("path", "")).strip()
 +            if search_path:
 +                self._record_path_context(search_path, is_directory_hint=True)
          elif tool_name == "bash":
              command = arguments.get("command", "")
              if command:
                  self.record_command(command)
                  if self._is_mutating_bash(command):
 +                    target = extract_shell_text_rewrite_target(command)
 +                    if target:
 +                        self._clear_verified_html_inventory_for_path(target)
                      self._note_mutation()
                  elif self._is_observational_bash(command):
                      self._record_observation(
          norm_cmd = self._normalize_command(command)
          if not norm_cmd:
              return False
 +        if extract_shell_text_rewrite_target(norm_cmd) is not None:
 +            return True
          mutating_fragments = (
              " >",
              ">>",
              return False
          return argv[0] in {"touch", "mkdir", "rm", "mv", "cp", "chmod", "chown"}
 +    def _record_path_context(self, path_value: str, *, is_directory_hint: bool = False) -> None:
 +        normalized = self._normalize_path(path_value)
 +        path = Path(normalized)
 +        primary_dir = path if is_directory_hint or path.is_dir() else path.parent
 +        candidate_dirs = [primary_dir]
 +        if primary_dir.parent != primary_dir:
 +            candidate_dirs.append(primary_dir.parent)
++
 +        for candidate_dir in candidate_dirs:
 +            normalized_dir = self._normalize_path(str(candidate_dir))
 +            if normalized_dir in self._recent_path_contexts:
 +                self._recent_path_contexts.remove(normalized_dir)
 +            self._recent_path_contexts.insert(0, normalized_dir)
++
 +        if len(self._recent_path_contexts) > self.RECENT_PATH_CONTEXT_LIMIT:
 +            del self._recent_path_contexts[self.RECENT_PATH_CONTEXT_LIMIT :]
++
 +    def _record_html_directory_read(self, arguments: dict) -> None:
 +        file_path = str(arguments.get("file_path", "")).strip()
 +        if not file_path:
 +            return
 +        normalized_path = self._normalize_path(file_path)
 +        path = Path(normalized_path)
 +        if path.suffix != ".html" or path.name == "index.html" or path.parent.name != "chapters":
 +            return
++
 +        directory = str(path.parent)
 +        last_seen = self._recent_html_directory_reads.get(directory)
 +        if last_seen is None or last_seen[0] != self._mutation_epoch:
 +            self._recent_html_directory_reads[directory] = (
 +                self._mutation_epoch,
 +                {path.name},
 +            )
 +            return
++
 +        _, seen_files = last_seen
 +        updated = set(seen_files)
 +        updated.add(path.name)
 +        self._recent_html_directory_reads[directory] = (
 +            self._mutation_epoch,
 +            updated,
 +        )
++
 +    def _check_html_observation_sufficiency(
 +        self,
 +        tool_name: str,
 +        arguments: dict,
 +    ) -> tuple[bool, str]:
 +        if tool_name == "read":
 +            file_path = str(arguments.get("file_path", "")).strip()
 +            if not file_path:
 +                return False, ""
 +            normalized_path = self._normalize_path(file_path)
 +            path = Path(normalized_path)
 +            if path.name != "index.html":
 +                return False, ""
 +            chapters_dir = str(path.parent / "chapters")
 +            chapter_count = self._chapter_evidence_count(chapters_dir)
 +            if chapter_count < self.HTML_CHAPTER_EVIDENCE_THRESHOLD:
 +                return False, ""
 +            read_key = self._make_read_key(arguments)
 +            if read_key is None:
 +                return False, ""
 +            last_seen = self._recent_reads.get(read_key)
 +            if last_seen is None:
 +                return False, ""
 +            _, _, repeat_count = last_seen
 +            if repeat_count < 2:
 +                return False, ""
 +            return (
 +                True,
 +                "Already confirmed multiple chapter files in the sibling chapters "
 +                "directory; reuse the known file/title evidence and update index.html "
 +                "instead of rereading it",
 +            )
++
 +        if tool_name in {"glob", "grep"}:
 +            search_path = str(arguments.get("path", "")).strip()
 +            if not search_path:
 +                return False, ""
 +            normalized_path = self._normalize_path(search_path)
 +            path = Path(normalized_path)
 +            if path.name != "chapters":
 +                return False, ""
 +            chapter_count = self._chapter_evidence_count(str(path))
 +            if chapter_count < self.HTML_CHAPTER_EVIDENCE_THRESHOLD:
 +                return False, ""
 +            observation_key = self._make_search_key(tool_name, arguments)
 +            if observation_key is None or observation_key not in self._recent_searches:
 +                return False, ""
 +            return (
 +                True,
 +                "Already confirmed multiple chapter files in this directory; reuse "
 +                "the known filename/title evidence and update the target index instead "
 +                "of rerunning the directory search",
 +            )
++
 +        return False, ""
++
 +    def _chapter_evidence_count(self, directory: str) -> int:
 +        last_seen = self._recent_html_directory_reads.get(directory)
 +        if last_seen is None:
 +            return 0
 +        last_epoch, seen_files = last_seen
 +        if last_epoch != self._mutation_epoch:
 +            return 0
 +        return len(seen_files)
++
 +    def _check_validated_html_toc_observation(
 +        self,
 +        tool_name: str,
 +        arguments: dict,
 +    ) -> tuple[bool, str]:
 +        related_paths = self._validated_html_related_paths(tool_name, arguments)
 +        if not related_paths:
 +            return False, ""
++
 +        for path in related_paths:
 +            if self._matches_validated_html_toc(path):
 +                return (
 +                    True,
 +                    "The current index.html already passes the validated chapter-link "
 +                    "check; stop rereading index.html or chapters/ and finish the task "
 +                    "unless a specific href or title is still unresolved",
 +                )
 +        return False, ""
++
 +    def _check_verified_html_inventory_observation(
 +        self,
 +        tool_name: str,
 +        arguments: dict,
 +    ) -> tuple[bool, str]:
 +        related_paths = self._verified_inventory_related_paths(tool_name, arguments)
 +        if not related_paths:
 +            return False, ""
++
 +        for path in related_paths:
 +            if self._matches_verified_html_inventory(path):
 +                return (
 +                    True,
 +                    "The verified chapter inventory already lists the exact href/title "
 +                    "pairs for this directory; update index.html from that inventory "
 +                    "instead of rereading chapter files",
 +                )
 +        return False, ""
++
 +    def _validated_html_related_paths(
 +        self,
 +        tool_name: str,
 +        arguments: dict,
 +    ) -> list[str]:
 +        if tool_name == "read":
 +            file_path = str(arguments.get("file_path", "")).strip()
 +            return [self._normalize_path(file_path)] if file_path else []
++
 +        if tool_name in {"glob", "grep"}:
 +            search_path = str(arguments.get("path", "")).strip()
 +            return [self._normalize_path(search_path)] if search_path else []
++
 +        if tool_name == "bash":
 +            command = str(arguments.get("command", "")).strip()
 +            if not command:
 +                return []
 +            return self._extract_observational_bash_paths(command)
++
 +        return []
++
 +    def _verified_inventory_related_paths(
 +        self,
 +        tool_name: str,
 +        arguments: dict,
 +    ) -> list[str]:
 +        if tool_name == "read":
 +            file_path = str(arguments.get("file_path", "")).strip()
 +            return [self._normalize_path(file_path)] if file_path else []
++
 +        if tool_name in {"glob", "grep"}:
 +            search_path = str(arguments.get("path", "")).strip()
 +            return [self._normalize_path(search_path)] if search_path else []
++
 +        if tool_name == "bash":
 +            command = str(arguments.get("command", "")).strip()
 +            if not command:
 +                return []
 +            return self._extract_observational_bash_paths(command)
++
 +        return []
++
 +    def _matches_validated_html_toc(self, path: str) -> bool:
 +        normalized = self._normalize_path(path)
 +        candidate = Path(normalized)
 +        for index_path, epoch in self._validated_html_tocs.items():
 +            if epoch != self._mutation_epoch:
 +                continue
 +            index = Path(index_path)
 +            chapters = Path(self._normalize_path(str(index.parent / "chapters")))
 +            if candidate == index or candidate == chapters:
 +                return True
 +            if candidate.parent == chapters:
 +                return True
 +        return False
++
 +    def _matches_verified_html_inventory(self, path: str) -> bool:
 +        normalized = self._normalize_path(path)
 +        candidate = Path(normalized)
 +        for directory in self._verified_html_inventory_dirs:
 +            chapters = Path(directory)
 +            if candidate == chapters or candidate.parent == chapters:
 +                return True
 +        return False
++
 +    def _clear_verified_html_inventory_for_path(self, path_value: str) -> None:
 +        normalized = self._normalize_path(path_value)
 +        candidate = Path(normalized)
 +        stale: set[str] = set()
 +        for directory in self._verified_html_inventory_dirs:
 +            chapters = Path(directory)
 +            if candidate == chapters or candidate.parent == chapters:
 +                stale.add(directory)
 +        self._verified_html_inventory_dirs.difference_update(stale)
++
 +    def _extract_observational_bash_paths(self, command: str) -> list[str]:
 +        norm_cmd = self._normalize_command(command)
 +        try:
 +            argv = shlex.split(norm_cmd)
 +        except ValueError:
 +            return []
 +        if not argv:
 +            return []
++
 +        paths: list[str] = []
 +        for token in argv[1:]:
 +            candidate = _strip_shell_token(token)
 +            if not candidate or candidate.startswith("-"):
 +                continue
 +            if any(marker in candidate for marker in ("/", "~")) or Path(candidate).suffix == ".html":
 +                paths.append(self._normalize_path(candidate))
 +                continue
 +            if candidate.rstrip("/").endswith("chapters"):
 +                paths.append(self._normalize_path(candidate))
 +        return paths
++
  @dataclass
  class ValidationResult:
                  return ValidationResult(
                      valid=False,
                      reason=reason,
 -                    suggestion="This command is too dangerous to execute",
 -                    severity="block",
 -                )
 +                suggestion="This command is too dangerous to execute",
 +                severity="block",
 +            )
++
 +        rewrite_target = extract_shell_text_rewrite_target(str(command))
 +        if rewrite_target is not None:
 +            return ValidationResult(
 +                valid=False,
 +                reason="Shell-based text rewrites are brittle and bypass Loader's safer file tools",
 +                suggestion=(
 +                    f"Use edit/patch/write for `{rewrite_target}` instead of rewriting it with bash"
 +                ),
 +                severity="error",
 +            )
          for pattern, reason in self.SUSPICIOUS_PATTERNS:
              if re.search(pattern, command):
                  severity="error",
+             )
 +        html_index_result = self._validate_html_index_links(str(file_path), str(new_string))
 +        if not html_index_result.valid:
 +            return html_index_result
++
          return ValidationResult(valid=True)
      def _validate_patch(self, arguments: dict) -> ValidationResult:
          return ValidationResult(valid=True)
 +    def _validate_html_index_links(
 +        self,
 +        file_path: str,
 +        content: str,
 +    ) -> ValidationResult:
 +        normalized = Path(file_path).expanduser()
 +        if normalized.name != "index.html" or "<a " not in content:
 +            return ValidationResult(valid=True)
++
 +        link_pairs = re.findall(r'<a\s+href="([^"]+)">([^<]+)</a>', content)
 +        if not link_pairs:
 +            return ValidationResult(valid=True)
++
 +        root = normalized.parent
 +        missing: list[str] = []
 +        mismatched: list[str] = []
 +        for href, label in link_pairs:
 +            target = (root / href).resolve(strict=False)
 +            if not target.exists():
 +                if href not in missing:
 +                    missing.append(href)
 +                continue
++
 +            title = read_html_title(target)
 +            if title and label.strip() != title:
 +                if href not in mismatched:
 +                    mismatched.append(href)
++
 +        if missing:
 +            suggestions = self._suggest_existing_html_targets(root, missing)
 +            preview_items = [
 +                format_html_inventory_entry(root, root / suggestion)
 +                for suggestion in suggestions
 +            ]
 +            if not preview_items:
 +                preview_items = missing
 +            preview = ", ".join(preview_items[:3])
 +            if len(preview_items) > 3:
 +                preview += ", ..."
 +            return ValidationResult(
 +                valid=False,
 +                reason="Edited TOC references chapter files that do not exist",
 +                suggestion=(
 +                    "Use only existing chapter href/title pairs from beside index.html, for example: "
 +                    f"{preview}"
 +                ),
 +                severity="error",
 +            )
++
 +        if mismatched:
 +            exact_entries = [
 +                format_html_inventory_entry(root, (root / href).resolve(strict=False))
 +                for href in mismatched
 +                if (root / href).resolve(strict=False).exists()
 +            ]
 +            if not exact_entries:
 +                exact_entries = mismatched
 +            preview = "; ".join(exact_entries[:2])
 +            if len(exact_entries) > 2:
 +                preview += "; ..."
 +            return ValidationResult(
 +                valid=False,
 +                reason="Edited TOC labels do not match the linked chapter titles",
 +                suggestion=(
 +                    "Copy the exact href/title pair from the linked HTML file, for example: "
 +                    f"{preview}"
 +                ),
 +                severity="error",
 +            )
++
 +        return ValidationResult(valid=True)
++
 +    def _suggest_existing_html_targets(self, root: Path, missing: list[str]) -> list[str]:
 +        available_by_directory: dict[Path, list[str]] = {}
 +        suggestions: list[str] = []
++
 +        for href in missing:
 +            href_path = Path(href)
 +            directory = (root / href_path).parent
 +            if directory not in available_by_directory:
 +                available_by_directory[directory] = sorted(
 +                    str(path.relative_to(root))
 +                    for path in directory.glob("*.html")
 +                    if path.is_file()
 +                )
++
 +            available = available_by_directory[directory]
 +            if not available:
 +                continue
++
 +            missing_name = href_path.name
 +            chapter_match = re.match(r"(\d+)-", missing_name)
 +            preferred = available
 +            if chapter_match is not None:
 +                prefix = f"{chapter_match.group(1)}-"
 +                same_prefix = [
 +                    candidate
 +                    for candidate in available
 +                    if Path(candidate).name.startswith(prefix)
 +                ]
 +                if same_prefix:
 +                    preferred = same_prefix
++
 +            matched_names = get_close_matches(
 +                missing_name,
 +                [Path(candidate).name for candidate in preferred],
 +                n=1,
 +                cutoff=0.0,
 +            )
 +            if matched_names:
 +                matched_name = matched_names[0]
 +                candidate = next(
 +                    (
 +                        candidate
 +                        for candidate in preferred
 +                        if Path(candidate).name == matched_name
 +                    ),
 +                    None,
 +                )
 +                if candidate is not None and candidate not in suggestions:
 +                    suggestions.append(candidate)
++
 +        return suggestions
++
      def _validate_path(self, file_path: str) -> ValidationResult:
          if '\x00' in file_path:
              return ValidationResult(

src/loader/runtime/tool_batch_recovery.pymodified

  from .events import AgentEvent
  from .executor import ToolExecutionOutcome
  from .recovery import RecoveryContext, format_failure_message, format_recovery_prompt
 +from .safeguard_services import (
 +    build_html_toc_edit_call_template,
 +    build_html_toc_replacement_block,
 +    extract_html_toc_excerpt,
 +    read_html_title,
 +    summarize_html_inventory,
 +)
  EventSink = Callable[[AgentEvent], Awaitable[None]]
              current_task=current_task,
+         )
          actionable_known_state = bool(confirmed_facts and preferred_next_step)
 -        if not confirmed_facts and not preferred_next_step and not current_task:
 -            return prompt
+-
 -        lines = [prompt, "", "## CONTINUE FROM KNOWN STATE"]
 -        if current_task:
 -            lines.append(f"- Current task: {current_task}")
 -        if confirmed_facts:
 -            lines.append(f"- Confirmed facts: {confirmed_facts}")
 -        if preferred_next_step:
 -            lines.append(f"- Preferred next step: {preferred_next_step}")
 -        lines.append(
 -            "- Preserve progress: do not restart by rereading already-confirmed files "
 -            "unless you need genuinely new evidence."
 -        )
 -        if actionable_known_state:
 -            lines.extend(
 -                [
 -                    "",
 -                    "## ACTION BIAS FOR THIS RECOVERY",
 -                    "- The confirmed findings above are already enough to keep moving.",
 -                    "- Prefer edit/write/patch on the target file over rereading the same files.",
 -                    "- Only inspect one more file if a specific filename, href, or title is still unknown.",
 -                    "- Treat the preferred next step as the default path forward.",
 -                ]
 +        lines = [prompt]
 +        if confirmed_facts or preferred_next_step or current_task:
 +            lines.extend(["", "## CONTINUE FROM KNOWN STATE"])
 +            if current_task:
 +                lines.append(f"- Current task: {current_task}")
 +            if confirmed_facts:
 +                lines.append(f"- Confirmed facts: {confirmed_facts}")
 +            if preferred_next_step:
 +                lines.append(f"- Preferred next step: {preferred_next_step}")
 +            lines.append(
 +                "- Preserve progress: do not restart by rereading already-confirmed files "
 +                "unless you need genuinely new evidence."
+             )
 +            if actionable_known_state:
 +                lines.extend(
 +                    [
 +                        "",
 +                        "## ACTION BIAS FOR THIS RECOVERY",
 +                        "- The confirmed findings above are already enough to keep moving.",
 +                        "- Prefer edit/write/patch on the target file over rereading the same files.",
 +                        "- Only inspect one more file if a specific filename, href, or title is still unknown.",
 +                        "- Treat the preferred next step as the default path forward.",
 +                    ]
 +                )
          candidate_lines = self._file_not_found_candidate_lines(tool_call, outcome)
          if candidate_lines:
              lines.extend(["", "## LIKELY FILE CANDIDATES", *candidate_lines])
 +        target_excerpt_lines = self._target_excerpt_lines(tool_call)
 +        if target_excerpt_lines:
 +            lines.extend(["", "## CURRENT TARGET EXCERPT", *target_excerpt_lines])
          return "\n".join(lines)
      def _file_not_found_candidate_lines(
          if not candidates:
              return []
 -        names = ", ".join(f"`{Path(candidate).name}`" for candidate in candidates[:3])
 +        names = ", ".join(self._describe_candidate(candidate) for candidate in candidates[:3])
          return [
              f"- Requested file does not exist: `{missing_path}`",
              f"- Closest known files in the same directory: {names}",
          ranked: list[tuple[float, str]] = []
          seen: set[str] = set()
 -        for candidate in self._known_file_paths():
 +        for candidate in self._known_file_paths(missing_path):
              if candidate == missing_path:
                  continue
              if str(Path(candidate).parent) != missing_parent:
          ranked.sort(key=lambda item: (-item[0], item[1]))
          return [candidate for _, candidate in ranked]
 -    def _known_file_paths(self) -> list[str]:
 +    def _known_file_paths(self, missing_path: str | None = None) -> list[str]:
          pattern = re.compile(r"(?:~|/)[^\s`\"']+\.html")
          discovered: list[str] = []
          seen: set[str] = set()
                      continue
                  seen.add(candidate)
                  discovered.append(candidate)
 +        if missing_path:
 +            missing = Path(missing_path)
 +            parent = missing.parent
 +            if parent.is_dir():
 +                sibling_candidates = sorted(
 +                    child.resolve(strict=False)
 +                    for child in parent.iterdir()
 +                    if child.is_file()
 +                    and child.name != missing.name
 +                    and (
 +                        not missing.suffix
 +                        or child.suffix == missing.suffix
 +                    )
 +                )
 +                for child in sibling_candidates:
 +                    candidate = str(child)
 +                    if candidate in seen:
 +                        continue
 +                    seen.add(candidate)
 +                    discovered.append(candidate)
          return discovered
 +    def _describe_candidate(self, candidate: str) -> str:
 +        path = Path(candidate)
 +        label = f"`{path.name}`"
 +        if path.suffix == ".html":
 +            title = read_html_title(path)
 +            if title:
 +                return f"{label} = {title}"
 +        return label
++
 +    def _target_excerpt_lines(self, tool_call: ToolCall) -> list[str]:
 +        file_path = str(
 +            tool_call.arguments.get("file_path")
 +            or tool_call.arguments.get("path")
 +            or ""
 +        ).strip()
 +        if not file_path:
 +            return []
++
 +        inventory = summarize_html_inventory(file_path, limit=12)
 +        excerpt = extract_html_toc_excerpt(file_path)
 +        if not inventory and not excerpt:
 +            return []
++
 +        lines: list[str] = []
 +        if inventory:
 +            lines.append(f"- Verified chapter inventory: {inventory}")
 +        if excerpt:
 +            lines.append("- Current TOC block:")
 +            lines.extend(f"  {line}" for line in excerpt.splitlines())
 +        replacement = build_html_toc_replacement_block(file_path)
 +        if replacement:
 +            lines.append("- Suggested replacement block:")
 +            lines.extend(f"  {line}" for line in replacement.splitlines())
 +        if excerpt and replacement:
 +            lines.append("- Exact edit guidance:")
 +            lines.append(f"  file_path: {file_path}")
 +            lines.append("  old_string: use the Current TOC block above exactly")
 +            lines.append("  new_string: use the Suggested replacement block above exactly")
 +            lines.append("  Do not rewrite the whole file.")
 +        edit_template = build_html_toc_edit_call_template(file_path)
 +        if edit_template:
 +            lines.append("- Suggested edit call:")
 +            lines.extend(f"  {line}" for line in edit_template.splitlines())
 +        return lines
++
      def _canonicalize_path(self, raw_path: str) -> str:
          if not raw_path:
              return ""

src/loader/runtime/tool_batches.pymodified

+ )
  from .workflow import sync_todos_to_definition_of_done
  from .workflow import advance_todos_from_tool_call
 -from .compaction import infer_preferred_next_step
 +from .compaction import infer_preferred_next_step, summarize_confirmed_facts
 +from .safeguard_services import (
 +    build_html_toc_edit_call_template,
 +    build_html_toc_replacement_block,
 +    extract_html_toc_excerpt,
 +    extract_shell_text_rewrite_target,
 +    summarize_html_inventory,
 +    validate_html_toc,
 +)
  EventSink = Callable[[AgentEvent], Awaitable[None]]
  ConfirmationHandler = (
          self.confidence_gate = confidence_gate or ToolBatchConfidenceGate(context)
          self.recovery_controller = recovery_controller or ToolBatchRecoveryController(context)
          self.verification_gate = verification_gate or ToolBatchVerificationGate(context)
 +        self._inventory_hint_targets: set[str] = set()
      async def execute_batch(
          self,
                  emit_confirmation=emit_confirmation,
                  source=tool_source,
+             )
 +            executed_tool_call = outcome.tool_call
              if (
                  outcome.rollback_action is not None
                  and self.context.config.reasoning.show_rollback_plan
                  and self.context.config.auto_recover
              ):
                  recovery_result = await self.recovery_controller.build_follow_up(
 -                    tool_call=tool_call,
 +                    tool_call=executed_tool_call,
                      outcome=outcome,
                      emit=emit,
+                 )
              if outcome.state == ToolExecutionState.EXECUTED and not outcome.is_error:
                  loop_response = await self._record_successful_execution(
 -                    tool_call=tool_call,
 +                    tool_call=executed_tool_call,
                      outcome=outcome,
                      dod=dod,
                      emit=emit,
                      summary=summary,
+                 )
                  # Mark this tool's label as completed and emit live progress
 -                label = _tool_call_label(tool_call)
 +                label = _tool_call_label(executed_tool_call)
                  if label:
                      completed_labels.append(label)
                  await _emit_batch_todos()
 +                self._annotate_verified_html_inventory(executed_tool_call, outcome)
 +                self._queue_verified_html_inventory_nudge(executed_tool_call)
 +                self._annotate_validated_html_toc_completion(executed_tool_call, outcome)
 +                self._queue_validated_html_toc_completion_nudge(executed_tool_call)
                  if loop_response is not None:
                      result.halted = True
                      result.final_response = loop_response
                  AgentEvent(
                      type="tool_result",
                      content=outcome.event_content,
 -                    tool_name=tool_call.name,
 +                    tool_name=executed_tool_call.name,
                      tool_call_id=outcome.tool_call.id,
                      tool_metadata=(
                          outcome.registry_result.metadata
              summary.tool_result_messages.append(outcome.message)
              if outcome.state == ToolExecutionState.DUPLICATE:
                  self._queue_duplicate_observation_nudge(tool_call)
 +            elif outcome.state == ToolExecutionState.BLOCKED:
 +                self._queue_blocked_shell_rewrite_nudge(tool_call)
 +                self._queue_blocked_html_edit_nudge(tool_call, outcome.event_content)
              should_continue = await self.verification_gate.should_continue(
                  tool_call=tool_call,
              return
          current_task = getattr(self.context.session, "current_task", None)
 +        confirmed_facts = summarize_confirmed_facts(
 +            self.context.session.messages,
 +            max_items=2,
 +        )
          preferred_next_step = infer_preferred_next_step(
              self.context.session.messages,
              current_task=current_task,
+         )
 +        if preferred_next_step and confirmed_facts:
 +            self.context.queue_steering_message(
 +                "Reuse the earlier observation instead of repeating it. "
 +                f"Confirmed facts: {confirmed_facts}. "
 +                f"{preferred_next_step} "
 +                "Only gather more evidence if a specific filename, href, or title is still unknown."
 +            )
 +            return
++
          if preferred_next_step:
              self.context.queue_steering_message(
                  "Reuse the earlier observation instead of repeating it. "
              "Choose a different next step that makes progress."
+         )
 +    def _queue_blocked_shell_rewrite_nudge(self, tool_call: ToolCall) -> None:
 +        """Steer the model back to file tools after a blocked shell text rewrite."""
++
 +        if tool_call.name != "bash":
 +            return
++
 +        target = extract_shell_text_rewrite_target(
 +            str(tool_call.arguments.get("command", ""))
 +        )
 +        if target is None:
 +            return
++
 +        current_task = getattr(self.context.session, "current_task", None)
 +        confirmed_facts = summarize_confirmed_facts(
 +            self.context.session.messages,
 +            max_items=2,
 +        )
 +        preferred_next_step = infer_preferred_next_step(
 +            self.context.session.messages,
 +            current_task=current_task,
 +        )
++
 +        if preferred_next_step and confirmed_facts:
 +            self.context.queue_steering_message(
 +                "Use Loader's file tools for this text edit instead of a shell rewrite. "
 +                f"Confirmed facts: {confirmed_facts}. "
 +                f"{preferred_next_step} "
 +                f"Target `{target}` with edit/patch/write rather than `bash`."
 +            )
 +            return
++
 +        self.context.queue_steering_message(
 +            "Use Loader's file tools for this text edit instead of a shell rewrite. "
 +            f"Apply the change to `{target}` with edit/patch/write."
 +        )
++
 +    def _queue_blocked_html_edit_nudge(self, tool_call: ToolCall, event_content: str) -> None:
 +        """Steer blocked TOC edits back to the confirmed chapter inventory."""
++
 +        if tool_call.name not in {"edit", "patch"}:
 +            return
++
 +        target_path = str(tool_call.arguments.get("file_path", "")).strip()
 +        if not target_path.endswith("index.html"):
 +            return
++
 +        current_task = getattr(self.context.session, "current_task", None)
 +        confirmed_facts = summarize_confirmed_facts(
 +            self.context.session.messages,
 +            max_items=2,
 +        )
 +        preferred_next_step = infer_preferred_next_step(
 +            self.context.session.messages,
 +            current_task=current_task,
 +        )
 +        verified_inventory = summarize_html_inventory(target_path, limit=12)
 +        current_excerpt = extract_html_toc_excerpt(target_path)
 +        suggested_replacement = build_html_toc_replacement_block(target_path)
 +        suggested_call = build_html_toc_edit_call_template(target_path)
 +        excerpt_suffix = (
 +            f"\nCurrent TOC block:\n{current_excerpt}"
 +            if current_excerpt
 +            else ""
 +        )
 +        replacement_suffix = (
 +            f"\nSuggested replacement block:\n{suggested_replacement}"
 +            if suggested_replacement
 +            else ""
 +        )
 +        call_suffix = (
 +            f"\nSuggested edit call:\n{suggested_call}"
 +            if suggested_call
 +            else ""
 +        )
++
 +        if preferred_next_step and confirmed_facts and verified_inventory:
 +            self.context.queue_steering_message(
 +                "Use the current target contents plus the verified sibling inventory instead of guessing. "
 +                f"Confirmed facts: {confirmed_facts}. "
 +                f"Known chapter inventory: {verified_inventory}. "
 +                f"{preferred_next_step} "
 +                "Apply those exact href/title pairs in `index.html`. "
 +                "Do not rewrite the whole document. For `edit`, set `old_string` to the "
 +                "current TOC block above exactly and set `new_string` to the suggested "
 +                "replacement block below exactly."
 +                f"{excerpt_suffix}"
 +                f"{replacement_suffix}"
 +                f"{call_suffix}"
 +            )
 +            return
++
 +        if verified_inventory:
 +            self.context.queue_steering_message(
 +                "Use the current target contents plus the verified sibling inventory instead of guessing. "
 +                f"Known chapter inventory: {verified_inventory}. "
 +                "Apply those exact href/title pairs in `index.html`. "
 +                "Do not rewrite the whole document. For `edit`, set `old_string` to the "
 +                "current TOC block above exactly and set `new_string` to the suggested "
 +                "replacement block below exactly."
 +                f"{excerpt_suffix}"
 +                f"{replacement_suffix}"
 +                f"{call_suffix}"
 +            )
 +            return
++
 +        self.context.queue_steering_message(
 +            "Use the current target contents when retrying this `index.html` edit instead of guessing. "
 +            f"{excerpt_suffix}".strip()
 +        )
++
 +    def _queue_verified_html_inventory_nudge(self, tool_call: ToolCall) -> None:
 +        """Proactively hand off verified chapter inventory after sibling discovery."""
++
 +        if tool_call.name != "glob":
 +            return
++
 +        chapters_path = str(tool_call.arguments.get("path", "")).strip()
 +        if not chapters_path.endswith("chapters"):
 +            return
++
 +        index_path = str(Path(chapters_path).expanduser().parent / "index.html")
 +        if index_path in self._inventory_hint_targets:
 +            return
++
 +        current_task = str(getattr(self.context.session, "current_task", "") or "").lower()
 +        if not any(
 +            hint in current_task
 +            for hint in ("href", "link", "links", "table of contents", "chapter", "index.html")
 +        ):
 +            return
++
 +        verified_inventory = summarize_html_inventory(index_path, limit=12)
 +        if not verified_inventory:
 +            return
++
 +        self._inventory_hint_targets.add(index_path)
 +        self.context.queue_steering_message(
 +            "You already have the verified sibling inventory needed for this edit. "
 +            f"Known chapter inventory: {verified_inventory}. "
 +            f"Update `{index_path}` using those exact href/title pairs instead of rereading files "
 +            "unless one specific title is still unknown."
 +        )
++
 +    def _annotate_verified_html_inventory(self, tool_call: ToolCall, outcome) -> None:
 +        """Attach verified chapter inventory directly to a successful discovery result."""
++
 +        if tool_call.name != "glob":
 +            return
++
 +        chapters_path = str(tool_call.arguments.get("path", "")).strip()
 +        if not chapters_path.endswith("chapters"):
 +            return
++
 +        current_task = str(getattr(self.context.session, "current_task", "") or "").lower()
 +        if not any(
 +            hint in current_task
 +            for hint in ("href", "link", "links", "table of contents", "chapter", "index.html")
 +        ):
 +            return
++
 +        index_path = str(Path(chapters_path).expanduser().parent / "index.html")
 +        verified_inventory = summarize_html_inventory(index_path, limit=12)
 +        if not verified_inventory:
 +            return
++
 +        action_tracker = getattr(self.context.safeguards, "action_tracker", None)
 +        note_inventory = getattr(action_tracker, "note_verified_html_inventory", None)
 +        if callable(note_inventory):
 +            note_inventory(index_path)
++
 +        note = (
 +            "Verified chapter inventory: "
 +            f"{verified_inventory}"
 +        )
 +        merged_event = outcome.event_content
 +        if note not in merged_event:
 +            merged_event = f"{note}\n{merged_event}".strip()
 +            outcome.event_content = merged_event
 +            outcome.result_output = merged_event
 +            outcome.message.content = f"{note}\n{outcome.message.content}".strip()
 +            if outcome.message.tool_results:
 +                outcome.message.tool_results[0].content = merged_event
++
 +    def _annotate_validated_html_toc_completion(self, tool_call: ToolCall, outcome) -> None:
 +        """Attach semantic TOC validation evidence to a successful mutating result."""
++
 +        target_path = self._validated_html_toc_target(tool_call)
 +        if target_path is None:
 +            return
++
 +        validation = validate_html_toc(target_path)
 +        if validation is None or not validation.valid:
 +            return
++
 +        action_tracker = getattr(self.context.safeguards, "action_tracker", None)
 +        note_validated = getattr(action_tracker, "note_validated_html_toc", None)
 +        if callable(note_validated):
 +            note_validated(target_path)
++
 +        note = (
 +            "Semantic verification preview: "
 +            f"validated {validation.link_count} toc links in {Path(target_path).name}"
 +        )
 +        merged_event = outcome.event_content
 +        if note not in merged_event:
 +            merged_event = f"{merged_event}\n{note}".strip()
 +            outcome.event_content = merged_event
 +            outcome.result_output = merged_event
 +            outcome.message.content = f"{outcome.message.content}\n{note}".strip()
 +            if outcome.message.tool_results:
 +                outcome.message.tool_results[0].content = merged_event
++
 +    def _queue_validated_html_toc_completion_nudge(self, tool_call: ToolCall) -> None:
 +        """Push the next model turn toward finishing once the TOC already validates."""
++
 +        target_path = self._validated_html_toc_target(tool_call)
 +        if target_path is None:
 +            return
++
 +        validation = validate_html_toc(target_path)
 +        if validation is None or not validation.valid:
 +            return
++
 +        self.context.queue_steering_message(
 +            "The current `index.html` already satisfies the verified chapter-link constraints. "
 +            f"Semantic verification preview: validated {validation.link_count} toc links in "
 +            f"`{Path(target_path).name}`. "
 +            "Do not reread `index.html` or files in `chapters/` unless a specific href or "
 +            "title is still unresolved. Briefly state that the table of contents has been "
 +            "updated so Loader can run the verification gate."
 +        )
++
 +    @staticmethod
 +    def _validated_html_toc_target(tool_call: ToolCall) -> str | None:
 +        """Return the index target for a successful HTML TOC mutation."""
++
 +        target_path = ""
 +        if tool_call.name in {"write", "edit", "patch"}:
 +            target_path = str(tool_call.arguments.get("file_path", "")).strip()
 +        elif tool_call.name == "bash":
 +            target_path = (
 +                extract_shell_text_rewrite_target(
 +                    str(tool_call.arguments.get("command", ""))
 +                )
 +                or ""
 +            ).strip()
++
 +        if not target_path:
 +            return None
 +        if not target_path.endswith("index.html"):
 +            return None
 +        return str(Path(target_path).expanduser())
++
      async def _record_successful_execution(
          self,
          *,

src/loader/runtime/turn_preparation.pymodified

          dod = self.dod_store.create_or_resume(
              effective_task,
              retry_budget=self.context.config.verification_retry_budget,
 +            resume_path=self.context.session.active_dod_path,
+         )
          summary.definition_of_done = dod
                  validator=self.context.safeguards.validator,
                  registry=self.context.registry,
                  rollback_plan=rollback_plan,
 +                workspace_root=self.context.project_root,
              ),
+         )
          return executor, rollback_plan

src/loader/runtime/workflow.pymodified

+ )
  _SHELL_COMMAND_START = re.compile(
      r"(?<![\w/.-])("
 -    r"ls|grep|pytest|uv|python3?|html5validator|cargo|npm|node|mypy|ruff|find|git|cat|sed|head|tail"
 +    r"ls|grep|pytest|uv|python3?|html5validator|cargo|npm|node|mypy|ruff|find|git|cat|sed|head|tail|test|diff|cmp|bash|sh|make"
      r")\b"
+ )
              candidate = re.sub(r"^-\s+", "", candidate)
              match = re.match(r"^`(.+)`$", candidate)
              candidate = (match.group(1) if match else candidate).strip()
 -            if candidate.startswith("#"):
 -                candidate = _extract_shell_command_from_text(candidate)
 -                if not candidate:
 -                    continue
 +            candidate = _extract_shell_command_from_text(candidate)
 +            candidate = candidate.strip().strip("`")
              if candidate:
                  commands.append(candidate)
 -    return [command for command in commands if command]
 +    return _merge_continued_shell_commands([command for command in commands if command])
  def _extract_collapsed_shell_commands(text: str) -> list[str]:
      return text[match.start():].strip()
 +def _merge_continued_shell_commands(commands: list[str]) -> list[str]:
 +    merged: list[str] = []
 +    pending: str | None = None
++
 +    for command in commands:
 +        stripped = command.strip()
 +        if not stripped:
 +            continue
++
 +        if pending is not None:
 +            combined = f"{pending} {stripped}".strip()
 +            if _has_dangling_shell_continuation(combined):
 +                pending = combined
 +                continue
 +            merged.append(combined)
 +            pending = None
 +            continue
++
 +        if _has_dangling_shell_continuation(stripped):
 +            pending = stripped
 +            continue
 +        merged.append(stripped)
++
 +    if pending is not None:
 +        merged.append(pending.rstrip("|& ").strip())
++
 +    return [command for command in merged if command]
++
++
 +def _has_dangling_shell_continuation(command: str) -> bool:
 +    stripped = command.rstrip()
 +    return stripped.endswith("|") or stripped.endswith("&&") or stripped.endswith("||")
++
++
  def _has_concrete_anchor(task: str) -> bool:
      return any(
          re.search(pattern, task)

tests/test_compaction.pymodified

      build_session_summary,
      compact_session_messages,
      compress_summary,
 +    infer_preferred_next_step,
      resolve_auto_compaction_input_tokens_threshold,
 +    summarize_confirmed_facts,
+ )
+                 )
              ],
          ),
 +        Message(
 +            role=Role.ASSISTANT,
 +            content="Inspecting the setup chapter title.",
 +            tool_calls=[
 +                ToolCall(
 +                    id="read-2",
 +                    name="read",
 +                    arguments={"file_path": "~/Loader/guides/fortran/chapters/02-setup.html"},
 +                )
 +            ],
 +        ),
 +        Message.tool_result_message(
 +            tool_call_id="read-2",
 +            display_content=(
 +                "   1\t<!DOCTYPE html>\n"
 +                "   2\t<html>\n"
 +                "  61\t<h1>Chapter 2: Setting Up Fortran</h1>\n"
 +                "  62\t</html>\n"
 +            ),
 +            result_content=(
 +                "   1\t<!DOCTYPE html>\n"
 +                "   2\t<html>\n"
 +                "  61\t<h1>Chapter 2: Setting Up Fortran</h1>\n"
 +                "  62\t</html>\n"
 +            ),
 +        ),
          Message(
              role=Role.TOOL,
              content=(
      assert "Confirmed facts:" in summary
      assert "02-basic-syntax.html -> 02-setup.html" in summary
 -    assert "Existing files include 01-introduction.html" in summary
 +    assert "02-setup.html = Chapter 2: Setting Up Fortran" in summary
      assert "Preferred next step:" in summary
      assert "`~/Loader/guides/fortran/index.html`" in summary
 +def test_summarize_confirmed_facts_extracts_chapter_titles_from_read_results() -> None:
 +    messages = [
 +        Message(
 +            role=Role.ASSISTANT,
 +            content="I will inspect the chapter files.",
 +            tool_calls=[
 +                ToolCall(
 +                    id="read-1",
 +                    name="read",
 +                    arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
 +                ),
 +                ToolCall(
 +                    id="read-2",
 +                    name="read",
 +                    arguments={"file_path": "/tmp/fortran/chapters/02-setup.html"},
 +                ),
 +            ],
 +        ),
 +        Message.tool_result_message(
 +            tool_call_id="read-1",
 +            display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
 +            result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
 +        ),
 +        Message.tool_result_message(
 +            tool_call_id="read-2",
 +            display_content="<title>Chapter 2: Setting Up Fortran</title>\n",
 +            result_content="<title>Chapter 2: Setting Up Fortran</title>\n",
 +        ),
 +    ]
++
 +    confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
++
 +    assert confirmed_facts is not None
 +    assert "Chapter titles confirmed:" in confirmed_facts
 +    assert "01-introduction.html = Chapter 1: Introduction to Fortran" in confirmed_facts
 +    assert "02-setup.html = Chapter 2: Setting Up Fortran" in confirmed_facts
++
++
 +def test_infer_preferred_next_step_uses_confirmed_chapter_pairs() -> None:
 +    messages = [
 +        Message(
 +            role=Role.ASSISTANT,
 +            content="I should inspect the chapter and then update the index.",
 +            tool_calls=[
 +                ToolCall(
 +                    id="read-index",
 +                    name="read",
 +                    arguments={"file_path": "/tmp/fortran/index.html"},
 +                ),
 +                ToolCall(
 +                    id="read-1",
 +                    name="read",
 +                    arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
 +                ),
 +            ],
 +        ),
 +        Message.tool_result_message(
 +            tool_call_id="read-1",
 +            display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
 +            result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
 +        ),
 +    ]
++
 +    next_step = infer_preferred_next_step(
 +        messages,
 +        current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
 +    )
++
 +    assert next_step == (
 +        "Update `/tmp/fortran/index.html` using the confirmed chapter file/title pairs "
 +        "instead of rereading files."
 +    )
++
++
 +def test_infer_preferred_next_step_uses_latest_verification_gap() -> None:
 +    messages = [
 +        Message(
 +            role=Role.ASSISTANT,
 +            content="I should inspect the chapter and then update the index.",
 +            tool_calls=[
 +                ToolCall(
 +                    id="read-index",
 +                    name="read",
 +                    arguments={"file_path": "/tmp/fortran/index.html"},
 +                ),
 +                ToolCall(
 +                    id="read-1",
 +                    name="read",
 +                    arguments={"file_path": "/tmp/fortran/chapters/01-introduction.html"},
 +                ),
 +                ToolCall(
 +                    id="verify-1",
 +                    name="bash",
 +                    arguments={"command": "python3 - <<'PY'\n...\nPY"},
 +                ),
 +            ],
 +        ),
 +        Message.tool_result_message(
 +            tool_call_id="read-1",
 +            display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
 +            result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
 +        ),
 +        Message.tool_result_message(
 +            tool_call_id="verify-1",
 +            display_content=(
 +                "Missing links:\n"
 +                "chapters/05-control-structures.html -> missing\n"
 +                "chapters/06-input-output.html -> missing\n"
 +            ),
 +            result_content=(
 +                "Missing links:\n"
 +                "chapters/05-control-structures.html -> missing\n"
 +                "chapters/06-input-output.html -> missing\n"
 +            ),
 +            is_error=True,
 +        ),
 +    ]
++
 +    confirmed_facts = summarize_confirmed_facts(messages, max_items=2)
 +    next_step = infer_preferred_next_step(
 +        messages,
 +        current_task="Update /tmp/fortran/index.html so the chapter list matches the real files.",
 +    )
++
 +    assert confirmed_facts is not None
 +    assert "Verification gaps: missing TOC links chapters/05-control-structures.html" in confirmed_facts
 +    assert next_step == (
 +        "Update `/tmp/fortran/index.html` to fix the specific verification failures "
 +        "(missing TOC links chapters/05-control-structures.html, "
 +        "chapters/06-input-output.html) instead of restarting discovery."
 +    )
++
++
  def test_compact_session_messages_uses_single_continuation_instruction_block() -> None:
      messages = [
          Message(role=Role.USER, content="Task framing"),

tests/test_dod.pymodified

  from loader.llm.base import ToolCall
  from loader.runtime.dod import (
      DefinitionOfDoneStore,
 +    VerificationEvidence,
      begin_new_verification_attempt,
 +    build_verification_summary,
      create_definition_of_done,
      derive_verification_commands,
      determine_task_size,
          task_statement=dod.task_statement,
+     )
 -    assert any(command.startswith("/usr/bin/python3 - <<'PY'") for command in commands)
 +    assert any(command.startswith("python3 - <<'PY'") for command in commands)
      assert not any(command == f"test -f {index}" for command in commands)
++
++
 +def test_build_verification_summary_keeps_concrete_missing_link_details() -> None:
 +    summary = build_verification_summary(
 +        [
 +            VerificationEvidence(
 +                command="python3 - <<'PY' ... PY",
 +                passed=False,
 +                stderr=(
 +                    "Missing links:\n"
 +                    "chapters/05-control-structures.html -> missing\n"
 +                    "chapters/06-input-output.html -> missing\n"
 +                ),
 +            )
 +        ]
 +    )
++
 +    assert "Missing links:" in summary
 +    assert "chapters/05-control-structures.html -> missing" in summary
 +    assert "chapters/06-input-output.html -> missing" in summary

tests/test_finalization.pymodified

      assert result.should_continue is False
      assert any(command == 'grep -n "href=" index.html' for command in executor.commands)
 -    assert any(command.startswith("/usr/bin/python3 - <<'PY'") for command in executor.commands)
 +    assert any(command.startswith("python3 - <<'PY'") for command in executor.commands)
      assert (
          session.workflow_timeline[-1].verification_observations[0].attempt_id
          == "verification-attempt-1"
+     )
      assert session.messages[-1].role == Role.USER
      assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
++
++
 +@pytest.mark.asyncio
 +async def test_turn_finalizer_does_not_reverify_without_new_changes(
 +    temp_dir: Path,
 +) -> None:
 +    session = FakeSession()
 +    context = build_context(temp_dir, session)
 +    finalizer = TurnFinalizer(
 +        context,
 +        RuntimeTracer(),
 +        DefinitionOfDoneStore(temp_dir),
 +        set_workflow_mode=_noop_set_workflow_mode,
 +    )
 +    index = temp_dir / "index.html"
 +    index.write_text("<ul></ul>\n")
 +    dod = create_definition_of_done("Fix the chapter list in index.html.")
 +    dod.mutating_actions.append("edit")
 +    dod.touched_files.append(str(index))
 +    dod.line_changes = 12
 +    dod.last_verification_result = "failed"
 +    dod.last_verification_signature = (
 +        f"lines={dod.line_changes};touched={index};actions=1;commands="
 +    )
 +    dod.evidence = []
 +    summary = TurnSummary(final_response="")
 +    executor = RecordingExecutor()
++
 +    async def capture(event) -> None:
 +        return None
++
 +    result = await finalizer.run_definition_of_done_gate(
 +        dod=dod,
 +        candidate_response="I checked the file again.",
 +        emit=capture,
 +        summary=summary,
 +        executor=executor,  # type: ignore[arg-type]
 +    )
++
 +    assert result.should_continue is True
 +    assert result.reason_code == "verification_failed_no_new_changes"
 +    assert executor.commands == []
 +    assert summary.verification_status == "failed"
 +    assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK STILL FAILING]")
++
++
 +@pytest.mark.asyncio
 +async def test_turn_finalizer_accepts_missing_optional_html5validator_when_semantic_check_passes(
 +    temp_dir: Path,
 +    monkeypatch: pytest.MonkeyPatch,
 +) -> None:
 +    session = FakeSession()
 +    context = build_context(temp_dir, session)
 +    finalizer = TurnFinalizer(
 +        context,
 +        RuntimeTracer(),
 +        DefinitionOfDoneStore(temp_dir),
 +        set_workflow_mode=_noop_set_workflow_mode,
 +    )
 +    dod = create_definition_of_done(
 +        "Update index.html so the table of contents links and chapter titles are correct."
 +    )
 +    dod.mutating_actions.append("edit")
 +    dod.touched_files.append(str(temp_dir / "index.html"))
 +    dod.verification_commands = [
 +        "python3 - <<'PY'\nprint('semantic ok')\nPY",
 +        "html5validator --root /tmp/fortran-qwen-recovery-check/",
 +    ]
 +    summary = TurnSummary(final_response="")
 +    semantic_call = ToolCall(
 +        id="verify-1-1",
 +        name="bash",
 +        arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
 +    )
 +    html5validator_call = ToolCall(
 +        id="verify-1-2",
 +        name="bash",
 +        arguments={"command": dod.verification_commands[1], "cwd": str(temp_dir)},
 +    )
++
 +    async def capture(event) -> None:
 +        return None
++
 +    monkeypatch.setattr(
 +        "loader.runtime.finalization.derive_verification_commands",
 +        lambda *args, **kwargs: [],
 +    )
++
 +    result = await finalizer.run_definition_of_done_gate(
 +        dod=dod,
 +        candidate_response="Updated the chapter links and titles.",
 +        emit=capture,
 +        summary=summary,
 +        executor=FakeExecutor(
 +            [
 +                tool_outcome(
 +                    tool_call=semantic_call,
 +                    output="semantic ok",
 +                    is_error=False,
 +                    exit_code=0,
 +                    stdout="semantic ok",
 +                ),
 +                tool_outcome(
 +                    tool_call=html5validator_call,
 +                    output="/bin/sh: html5validator: command not found",
 +                    is_error=True,
 +                    exit_code=127,
 +                    stderr="/bin/sh: html5validator: command not found",
 +                ),
 +            ]
 +        ),  # type: ignore[arg-type]
 +    )
++
 +    assert result.should_continue is False
 +    assert result.reason_code == "verification_passed"
 +    assert summary.verification_status == "passed"
 +    assert dod.status == "done"
 +    assert dod.last_verification_result == "passed"
 +    assert [item.passed for item in dod.evidence] == [True, False]
 +    assert [item.skipped for item in dod.evidence] == [False, True]
 +    assert "SKIP" in result.final_response
 +    assert "html5validator" in result.final_response
 +    assert session.workflow_timeline[-2].reason_code == "verification_command_passed"
 +    assert session.workflow_timeline[-1].reason_code == "verification_command_skipped"
 +    assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
 +        VerificationObservationStatus.SKIPPED.value
 +    ]

tests/test_ollama_backend.pymodified

      await backend.close()
 +@pytest.mark.asyncio
 +async def test_ollama_complete_canonicalizes_native_tool_aliases() -> None:
 +    backend = OllamaBackend()
++
 +    async def fake_describe_model() -> None:
 +        return None
++
 +    backend.describe_model = fake_describe_model  # type: ignore[method-assign]
 +    backend._client = FakeClient(
 +        [
 +            FakeResponse(
 +                {
 +                    "message": {
 +                        "content": "",
 +                        "tool_calls": [
 +                            {
 +                                "id": "call_read",
 +                                "function": {
 +                                    "name": "read_file",
 +                                    "arguments": {"file_path": "/tmp/test.txt"},
 +                                },
 +                            }
 +                        ],
 +                    },
 +                    "prompt_eval_count": 4,
 +                    "eval_count": 2,
 +                }
 +            )
 +        ]
 +    )
++
 +    response = await backend.complete(
 +        messages=[],
 +        tools=[{"name": "read"}, {"name": "write"}, {"name": "patch"}],
 +    )
++
 +    assert response.tool_calls[0].name == "read"
 +    assert response.tool_calls[0].arguments == {"file_path": "/tmp/test.txt"}
 +    await backend.close()
++
++
  @pytest.mark.asyncio
  async def test_ollama_stream_response_uses_shared_parser_for_text_tool_calls() -> None:
      backend = OllamaBackend()
      await backend.close()
 +@pytest.mark.asyncio
 +async def test_ollama_stream_response_canonicalizes_native_tool_aliases() -> None:
 +    backend = OllamaBackend()
++
 +    chunks = [
 +        chunk
 +        async for chunk in backend._stream_response(
 +            FakeStreamResponse(
 +                [
 +                    {
 +                        "message": {
 +                            "content": "",
 +                            "tool_calls": [
 +                                {
 +                                    "id": "call_read",
 +                                    "function": {
 +                                        "name": "read_file",
 +                                        "arguments": {"file_path": "/tmp/test.txt"},
 +                                    },
 +                                }
 +                            ],
 +                        },
 +                        "done": True,
 +                        "prompt_eval_count": 4,
 +                        "eval_count": 2,
 +                    }
 +                ]
 +            ),
 +            tools=[{"name": "read"}, {"name": "write"}, {"name": "patch"}],
 +        )
 +    ]
++
 +    final_chunk = chunks[-1]
 +    assert final_chunk.tool_calls[0].name == "read"
 +    assert final_chunk.tool_calls[0].arguments == {"file_path": "/tmp/test.txt"}
 +    await backend.close()
++
++
 +@pytest.mark.asyncio
 +async def test_ollama_stream_response_parses_fenced_read_command() -> None:
 +    backend = OllamaBackend()
++
 +    chunks = [
 +        chunk
 +        async for chunk in backend._stream_response(
 +            FakeStreamResponse(
 +                [
 +                    {
 +                        "message": {
 +                            "content": (
 +                                "I need to inspect the file first.\n"
 +                                "```bash\nread /tmp/test.txt\n```"
 +                            )
 +                        },
 +                        "done": False,
 +                    },
 +                    {
 +                        "message": {"content": ""},
 +                        "done": True,
 +                        "prompt_eval_count": 4,
 +                        "eval_count": 2,
 +                    },
 +                ]
 +            ),
 +            tools=[{"name": "read"}, {"name": "glob"}, {"name": "bash"}],
 +        )
 +    ]
++
 +    final_chunk = chunks[-1]
 +    assert final_chunk.tool_calls[0].name == "read"
 +    assert final_chunk.tool_calls[0].arguments == {"file_path": "/tmp/test.txt"}
 +    await backend.close()
++
++
  @pytest.mark.asyncio
  async def test_ollama_stream_response_defers_raw_json_detection_to_final_parse() -> None:
      backend = OllamaBackend()

tests/test_parsing.pymodified

          assert result.tool_calls == []
          assert "TotallyUnknownTool" in result.content
 +    def test_parse_bare_json_maps_read_file_alias_to_read(self):
 +        text = '{"name": "read_file", "arguments": {"file_path": "/tmp/test.txt"}}'
 +        result = parse_tool_calls(
 +            text,
 +            allowed_tool_names=["read", "write", "patch"],
 +        )
 +        assert len(result.tool_calls) == 1
 +        assert result.tool_calls[0].name == "read"
 +        assert result.tool_calls[0].arguments == {"file_path": "/tmp/test.txt"}
++
 +    def test_parse_fenced_read_command_into_tool_call(self):
 +        text = "Let me inspect the file first.\n```bash\nread /tmp/test.txt\n```"
 +        result = parse_tool_calls(
 +            text,
 +            allowed_tool_names=["read", "glob", "bash"],
 +        )
 +        assert len(result.tool_calls) == 1
 +        assert result.tool_calls[0].name == "read"
 +        assert result.tool_calls[0].arguments == {"file_path": "/tmp/test.txt"}
++
 +    def test_parse_fenced_glob_command_into_tool_call(self):
 +        text = "```bash\nglob /tmp/guide/chapters/*.html\n```"
 +        result = parse_tool_calls(
 +            text,
 +            allowed_tool_names=["read", "glob", "bash"],
 +        )
 +        assert len(result.tool_calls) == 1
 +        assert result.tool_calls[0].name == "glob"
 +        assert result.tool_calls[0].arguments == {
 +            "pattern": "/tmp/guide/chapters/*.html"
 +        }
++
  class TestFormatToolResult:
      """Tests for format_tool_result function."""

tests/test_permissions.pymodified

      assert result.updated_arguments["path"] == expected_path
      for alias in ("directory", "dir", "folder"):
          assert alias not in result.updated_arguments
++
++
 +@pytest.mark.asyncio
 +async def test_search_path_alias_hook_splits_full_glob_pattern(
 +    temp_dir: Path,
 +) -> None:
 +    registry = create_default_registry(temp_dir)
 +    policy = build_permission_policy(
 +        active_mode=PermissionMode.WORKSPACE_WRITE,
 +        workspace_root=temp_dir,
 +        tool_requirements=registry.get_tool_requirements(),
 +    )
 +    hook = SearchPathAliasHook()
 +    chapters = temp_dir / "chapters"
++
 +    result = await hook.pre_tool_use(
 +        HookContext(
 +            tool_call=ToolCall(
 +                id="glob-1",
 +                name="glob",
 +                arguments={"pattern": f"{chapters}/*.html"},
 +            ),
 +            tool=registry.get("glob"),
 +            registry=registry,
 +            permission_policy=policy,
 +            source="native",
 +        )
 +    )
++
 +    assert result.updated_arguments is not None
 +    assert result.updated_arguments["path"] == str(chapters)
 +    assert result.updated_arguments["pattern"] == "*.html"

tests/test_prompt_builder.pymodified

      assert "`react`" in react.content
      assert "<tool_call>" in react.content
      assert "call tools" in native.content.lower()
++
++
 +def test_execute_mode_guidance_prefers_file_tools_for_text_edits(temp_dir: Path) -> None:
 +    result = build_system_prompt_result(
 +        tools=[_tool_schema("edit")],
 +        use_react=False,
 +        workflow_mode="execute",
 +        permission_mode="workspace-write",
 +        cwd=temp_dir,
 +    )
++
 +    assert "Prefer `edit`/`patch`/`write` over shell one-liners" in result.content

tests/test_recovery.pymodified

          hints = get_recovery_hints(ErrorCategory.COMMAND_NOT_FOUND, "bash")
          assert "which" in hints.lower()
 +    def test_bash_text_rewrite_hint_prefers_file_tools(self):
 +        hints = get_recovery_hints(
 +            ErrorCategory.UNKNOWN,
 +            "bash",
 +            {"command": "sed -i '1,3c\\updated' index.html"},
 +        )
 +        assert "edit/patch/write" in hints.lower()
 +        assert "index.html" in hints
++
  class TestFormatRecoveryPrompt:
      """Tests for recovery prompt formatting."""
          assert "1/3" in prompt
          assert "retry the same command with slight variations" in prompt
 +    def test_format_recovery_prompt_for_failed_shell_rewrite_points_to_file_tools(self):
 +        ctx = RecoveryContext(
 +            original_tool="bash",
 +            original_args={"command": "sed -i '1,3c\\updated' index.html"},
 +        )
 +        ctx.add_attempt(
 +            "bash",
 +            {"command": "sed -i '1,3c\\updated' index.html"},
 +            "Exit code 1",
 +        )
++
 +        prompt = format_recovery_prompt(
 +            ctx,
 +            "bash",
 +            {"command": "sed -i '1,3c\\updated' index.html"},
 +            "Exit code 1",
 +        )
++
 +        assert "edit/patch/write" in prompt.lower()
 +        assert "index.html" in prompt
++
  class TestFormatFailureMessage:
      """Tests for failure message formatting."""

tests/test_runtime_harness.pymodified

      assert any("index.html" in message for message in steering_messages)
 +@pytest.mark.asyncio
 +async def test_relative_file_read_stays_on_recent_external_context(
 +    temp_dir: Path,
 +) -> None:
 +    external_dir = temp_dir.parent / f"{temp_dir.name}-external-guide"
 +    external_dir.mkdir(exist_ok=True)
 +    external_index = external_dir / "index.html"
 +    external_index.write_text("external guide index\n")
++
 +    backend = ScriptedBackend(
 +        completions=[
 +            native_tool_response(
 +                ToolCall(
 +                    id="read-1",
 +                    name="read",
 +                    arguments={"file_path": str(external_index)},
 +                ),
 +                content="I'll inspect the external index first.",
 +            ),
 +            native_tool_response(
 +                ToolCall(
 +                    id="read-2",
 +                    name="read",
 +                    arguments={"file_path": "index.html"},
 +                ),
 +                content="I'll reopen index.html in the same guide.",
 +            ),
 +            final_response("I stayed on the external guide instead of snapping back to the repo."),
 +        ]
 +    )
++
 +    run = await run_scenario(
 +        "Inspect the external guide index twice.",
 +        backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
++
 +    assert tool_event_names(run) == ["read", "read"]
 +    messages = tool_result_messages(run)
 +    assert any("external guide index" in message for message in messages)
 +    assert not any("File not found: index.html" in message for message in messages)
 +    assert any(
 +        "Skipped - duplicate action" in message or "external guide index" in message
 +        for message in messages[1:]
 +    )
++
++
 +@pytest.mark.asyncio
 +async def test_blocked_shell_text_rewrite_queues_file_tool_steering(
 +    temp_dir: Path,
 +) -> None:
 +    target = temp_dir / "notes.txt"
 +    target.write_text("old value\n")
++
 +    backend = ScriptedBackend(
 +        completions=[
 +            native_tool_response(
 +                ToolCall(
 +                    id="bash-1",
 +                    name="bash",
 +                    arguments={"command": "sed -i '1s/old/new/' notes.txt"},
 +                ),
 +                content="I'll update the file with sed.",
 +            ),
 +            native_tool_response(
 +                ToolCall(
 +                    id="edit-1",
 +                    name="edit",
 +                    arguments={
 +                        "file_path": str(target),
 +                        "old_string": "old value",
 +                        "new_string": "new value",
 +                    },
 +                ),
 +                content="I'll switch to the edit tool instead.",
 +            ),
 +            final_response("Updated the file with Loader's file tools."),
 +        ]
 +    )
++
 +    run = await run_scenario(
 +        "Update notes.txt from old value to new value.",
 +        backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
++
 +    assert tool_event_names(run) == ["bash", "edit"]
 +    assert target.read_text() == "new value\n"
 +    messages = tool_result_messages(run)
 +    assert any("Shell-based text rewrites are brittle" in message for message in messages)
 +    steering_messages = [
 +        event.content
 +        for event in run.events
 +        if event.type == "steering" and event.content
 +    ]
 +    assert any("Use Loader's file tools for this text edit" in message for message in steering_messages)
++
++
 +@pytest.mark.asyncio
 +async def test_blocked_html_index_edit_queues_inventory_reuse_steering(
 +    temp_dir: Path,
 +) -> None:
 +    chapters = temp_dir / "chapters"
 +    chapters.mkdir()
 +    (chapters / "05-input-output.html").write_text("<h1>Chapter 5: Input and Output</h1>\n")
 +    index_file = temp_dir / "index.html"
 +    index_file.write_text(
 +        '<ul class="chapter-list">\n'
 +        '    <li><a href="chapters/05-input-output.html">Chapter 5: Input and Output</a></li>\n'
 +        '</ul>\n'
 +    )
++
 +    backend = ScriptedBackend(
 +        completions=[
 +            native_tool_response(
 +                ToolCall(
 +                    id="glob-1",
 +                    name="glob",
 +                    arguments={"path": str(chapters), "pattern": "*.html"},
 +                ),
 +                content="I'll check which chapter files exist first.",
 +            ),
 +            native_tool_response(
 +                ToolCall(
 +                    id="edit-1",
 +                    name="edit",
 +                    arguments={
 +                        "file_path": str(index_file),
 +                        "old_string": '<li><a href="chapters/05-input-output.html">Chapter 5: Input and Output</a></li>',
 +                        "new_string": '<li><a href="chapters/05-control-structures.html">Chapter 5: Control Structures</a></li>',
 +                    },
 +                ),
 +                content="I'll update the TOC entry.",
 +            ),
 +            final_response("I'll reuse the known chapter inventory and correct the TOC."),
 +        ]
 +    )
++
 +    run = await run_scenario(
 +        "Fix the index table of contents so it matches the chapters directory.",
 +        backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
++
 +    messages = tool_result_messages(run)
 +    steering_messages = [
 +        event.content
 +        for event in run.events
 +        if event.type == "steering" and event.content
 +    ]
++
 +    assert any("TOC references chapter files that do not exist" in message for message in messages)
 +    assert any(
 +        "Use the current target contents plus the verified sibling inventory instead of guessing." in message
 +        for message in steering_messages
 +    )
 +    assert any(
 +        "chapters/05-input-output.html = Chapter 5: Input and Output" in message
 +        for message in steering_messages
 +    )
 +    assert any("<ul class=\"chapter-list\">" in message for message in steering_messages)
 +    assert any("Suggested replacement block:" in message for message in steering_messages)
 +    assert any("Do not rewrite the whole document." in message for message in steering_messages)
 +    assert any("set `old_string` to the current TOC block above exactly" in message for message in steering_messages)
 +    assert any("Suggested edit call:" in message for message in steering_messages)
 +    assert any('old_string="""' in message for message in steering_messages)
 +    assert any(
 +        '<li><a href="chapters/05-input-output.html">Chapter 5: Input and Output</a></li>' in message
 +        for message in steering_messages
 +    )
++
++
 +@pytest.mark.asyncio
 +async def test_full_path_glob_pattern_still_injects_verified_html_inventory(
 +    temp_dir: Path,
 +) -> None:
 +    chapters = temp_dir / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-introduction.html").write_text(
 +        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 +    )
 +    (chapters / "02-setup.html").write_text(
 +        "<h1>Chapter 2: Setting Up Fortran</h1>\n"
 +    )
 +    index_file = temp_dir / "index.html"
 +    index_file.write_text("broken table of contents\n")
++
 +    backend = ScriptedBackend(
 +        completions=[
 +            native_tool_response(
 +                ToolCall(
 +                    id="glob-1",
 +                    name="glob",
 +                    arguments={"pattern": f"{chapters}/*.html"},
 +                ),
 +                content="I'll inspect the chapter inventory first.",
 +            ),
 +            final_response("I'll update index.html using the verified inventory."),
 +        ]
 +    )
++
 +    run = await run_scenario(
 +        "Fix index.html so the chapter links match the real chapter files.",
 +        backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
++
 +    assert tool_event_names(run) == ["glob"]
 +    messages = tool_result_messages(run)
 +    assert any(
 +        "Verified chapter inventory: chapters/01-introduction.html = Chapter 1: Introduction to Fortran"
 +        in message
 +        for message in messages
 +    )
 +    assert any(
 +        "chapters/02-setup.html = Chapter 2: Setting Up Fortran" in message
 +        for message in messages
 +    )
++
++
 +@pytest.mark.asyncio
 +async def test_verified_html_inventory_blocks_redundant_chapter_reread(
 +    temp_dir: Path,
 +) -> None:
 +    chapters = temp_dir / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-introduction.html").write_text(
 +        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 +    )
 +    (chapters / "02-setup.html").write_text(
 +        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
 +    )
 +    index_file = temp_dir / "index.html"
 +    index_file.write_text("broken table of contents\n")
++
 +    backend = ScriptedBackend(
 +        completions=[
 +            native_tool_response(
 +                ToolCall(
 +                    id="glob-1",
 +                    name="glob",
 +                    arguments={"path": str(chapters), "pattern": "*.html"},
 +                ),
 +                content="I'll inspect the chapter inventory first.",
 +            ),
 +            native_tool_response(
 +                ToolCall(
 +                    id="read-1",
 +                    name="read",
 +                    arguments={"file_path": str(chapters / '01-introduction.html')},
 +                ),
 +                content="I'll open the first chapter file to extract its title.",
 +            ),
 +            final_response("I'll update index.html using the verified chapter inventory."),
 +        ]
 +    )
++
 +    run = await run_scenario(
 +        "Fix index.html so the chapter links and titles match the real chapter files.",
 +        backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
++
 +    messages = tool_result_messages(run)
 +    assert any(
 +        "Verified chapter inventory: chapters/01-introduction.html = Chapter 1: Introduction to Fortran"
 +        in message
 +        for message in messages
 +    )
 +    assert any(
 +        "The verified chapter inventory already lists the exact href/title pairs for this directory"
 +        in message
 +        for message in messages
 +    )
++
++
 +@pytest.mark.asyncio
 +async def test_successful_html_toc_edit_blocks_post_success_reread_and_steers_to_finish(
 +    temp_dir: Path,
 +) -> None:
 +    chapters = temp_dir / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-introduction.html").write_text(
 +        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 +    )
 +    (chapters / "02-setup.html").write_text(
 +        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
 +    )
 +    index_file = temp_dir / "index.html"
 +    old_block = (
 +        '<h2>Table of Contents</h2>\n'
 +        '<ul class="chapter-list">\n'
 +        '    <li><a href="chapters/01-old.html">Chapter 1: Old</a></li>\n'
 +        '    <li><a href="chapters/02-old.html">Chapter 2: Old</a></li>\n'
 +        '</ul>\n'
 +    )
 +    new_block = (
 +        '<h2>Table of Contents</h2>\n'
 +        '<ul class="chapter-list">\n'
 +        '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
 +        '    <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
 +        '</ul>\n'
 +    )
 +    index_file.write_text(new_block.replace("01-introduction.html", "01-old.html").replace("02-setup.html", "02-old.html").replace("Introduction to Fortran", "Old").replace("Setting Up Your Environment", "Old"))
++
 +    backend = ScriptedBackend(
 +        completions=[
 +            native_tool_response(
 +                ToolCall(
 +                    id="glob-1",
 +                    name="glob",
 +                    arguments={"path": str(chapters), "pattern": "*.html"},
 +                ),
 +                content="I'll inspect the chapter inventory first.",
 +            ),
 +            native_tool_response(
 +                ToolCall(
 +                    id="read-1",
 +                    name="read",
 +                    arguments={"file_path": str(index_file)},
 +                ),
 +                content="I'll inspect index.html next.",
 +            ),
 +            native_tool_response(
 +                ToolCall(
 +                    id="edit-1",
 +                    name="edit",
 +                    arguments={
 +                        "file_path": str(index_file),
 +                        "old_string": old_block,
 +                        "new_string": new_block,
 +                    },
 +                ),
 +                content="I'll fix the TOC now.",
 +            ),
 +            native_tool_response(
 +                ToolCall(
 +                    id="read-2",
 +                    name="read",
 +                    arguments={"file_path": str(index_file)},
 +                ),
 +                content="I'll reread index.html to confirm the change.",
 +            ),
 +            final_response(
 +                "I updated index.html so the table of contents matches the real chapter files."
 +            ),
 +        ]
 +    )
++
 +    run = await run_scenario(
 +        "Update index.html so every chapter link and title matches the real HTML files in chapters/.",
 +        backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
++
 +    messages = tool_result_messages(run)
 +    steering_messages = [
 +        event.content
 +        for event in run.events
 +        if event.type == "steering" and event.content
 +    ]
++
 +    assert any(
 +        "Semantic verification preview: validated 2 toc links in index.html"
 +        in message
 +        for message in messages
 +    )
 +    assert any(
 +        "already passes the validated chapter-link check" in message
 +        for message in messages
 +    )
 +    assert any(
 +        "already satisfies the verified chapter-link constraints" in message
 +        for message in steering_messages
 +    )
 +    assert any(
 +        "Do not reread `index.html` or files in `chapters/`" in message
 +        for message in steering_messages
 +    )
 +    assert "validated 2 toc links in index.html" in run.response
++
++
  @pytest.mark.asyncio
  async def test_interleaved_reread_is_allowed_once_without_intervening_mutation(
      temp_dir: Path,

tests/test_safeguard_services.pymodified

  from __future__ import annotations
 +import tempfile
 +from pathlib import Path
++
  import loader.agent.safeguards as agent_safeguards
  from loader.agent.safeguards import RuntimeSafeguards as AgentRuntimeSafeguards
  from loader.runtime.safeguard_services import (
      ActionTracker,
      PreActionValidator,
      ValidationResult,
 +    build_html_toc_edit_call_template,
 +    build_html_toc_replacement_block,
 +    format_html_inventory_entry,
 +    validate_html_toc,
+ )
  from loader.runtime.safeguards import RuntimeSafeguards
      assert str(file_path) in reason
 +def test_build_html_toc_replacement_block_uses_verified_inventory(tmp_path) -> None:
 +    chapters = tmp_path / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-introduction.html").write_text(
 +        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 +    )
 +    (chapters / "02-setup.html").write_text(
 +        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
 +    )
 +    index_path = tmp_path / "index.html"
 +    index_path.write_text(
 +        "<h2>Table of Contents</h2>\n"
 +        "<ul class=\"chapter-list\">\n"
 +        "    <li><a href=\"chapters/01-old.html\">Chapter 1: Old</a></li>\n"
 +        "</ul>\n"
 +    )
++
 +    replacement = build_html_toc_replacement_block(index_path)
++
 +    assert replacement is not None
 +    assert "<h2>Table of Contents</h2>" in replacement
 +    assert '<li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>' in replacement
 +    assert '<li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>' in replacement
++
++
 +def test_build_html_toc_edit_call_template_uses_current_and_replacement_blocks(tmp_path) -> None:
 +    chapters = tmp_path / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-introduction.html").write_text(
 +        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 +    )
 +    index_path = tmp_path / "index.html"
 +    index_path.write_text(
 +        "<h2>Table of Contents</h2>\n"
 +        '<ul class="chapter-list">\n'
 +        '    <li><a href="chapters/01-old.html">Chapter 1: Old</a></li>\n'
 +        "</ul>\n"
 +    )
++
 +    template = build_html_toc_edit_call_template(index_path)
++
 +    assert template is not None
 +    assert template.startswith("edit(")
 +    assert f'file_path="{index_path}"' in template
 +    assert 'old_string="""' in template
 +    assert 'new_string="""' in template
 +    assert '<li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>' in template
++
++
 +def test_validate_html_toc_reports_missing_and_mismatched_links(tmp_path) -> None:
 +    chapters = tmp_path / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-introduction.html").write_text(
 +        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 +    )
 +    index_path = tmp_path / "index.html"
 +    index_path.write_text(
 +        '<ul class="chapter-list">\n'
 +        '    <li><a href="chapters/01-introduction.html">Chapter 1: Wrong Title</a></li>\n'
 +        '    <li><a href="chapters/02-missing.html">Chapter 2: Missing</a></li>\n'
 +        "</ul>\n"
 +    )
++
 +    result = validate_html_toc(index_path)
++
 +    assert result is not None
 +    assert result.valid is False
 +    assert result.link_count == 2
 +    assert result.missing == ("chapters/02-missing.html -> missing",)
 +    assert (
 +        result.mismatched
 +        == (
 +            "chapters/01-introduction.html -> Chapter 1: Wrong Title != Chapter 1: Introduction to Fortran",
 +        )
 +    )
++
++
 +def test_validate_html_toc_reports_success(tmp_path) -> None:
 +    chapters = tmp_path / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-introduction.html").write_text(
 +        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 +    )
 +    (chapters / "02-setup.html").write_text(
 +        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
 +    )
 +    index_path = tmp_path / "index.html"
 +    index_path.write_text(
 +        '<ul class="chapter-list">\n'
 +        '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
 +        '    <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
 +        "</ul>\n"
 +    )
++
 +    result = validate_html_toc(index_path)
++
 +    assert result is not None
 +    assert result.valid is True
 +    assert result.link_count == 2
 +    assert result.missing == ()
 +    assert result.mismatched == ()
++
++
  def test_action_tracker_preserves_loop_description_format() -> None:
      tracker = ActionTracker()
      assert str(file_path) in reason
 +def test_action_tracker_blocks_post_validation_html_rereads_until_new_mutation(tmp_path) -> None:
 +    tracker = ActionTracker()
 +    chapters = tmp_path / "chapters"
 +    chapters.mkdir()
 +    chapter_path = chapters / "01-introduction.html"
 +    chapter_path.write_text("<h1>Chapter 1: Introduction to Fortran</h1>\n")
 +    index_path = tmp_path / "index.html"
 +    index_path.write_text(
 +        '<ul class="chapter-list">\n'
 +        '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
 +        "</ul>\n"
 +    )
++
 +    tracker.note_validated_html_toc(str(index_path))
++
 +    assert tracker.check_tool_call("read", {"file_path": str(index_path)}) == (
 +        True,
 +        "The current index.html already passes the validated chapter-link check; stop rereading index.html or chapters/ and finish the task unless a specific href or title is still unresolved",
 +    )
 +    assert tracker.check_tool_call("read", {"file_path": str(chapter_path)}) == (
 +        True,
 +        "The current index.html already passes the validated chapter-link check; stop rereading index.html or chapters/ and finish the task unless a specific href or title is still unresolved",
 +    )
 +    assert tracker.check_tool_call(
 +        "glob",
 +        {"path": str(chapters), "pattern": "*.html"},
 +    ) == (
 +        True,
 +        "The current index.html already passes the validated chapter-link check; stop rereading index.html or chapters/ and finish the task unless a specific href or title is still unresolved",
 +    )
 +    assert tracker.check_tool_call(
 +        "bash",
 +        {"command": f"cat {index_path}"},
 +    ) == (
 +        True,
 +        "The current index.html already passes the validated chapter-link check; stop rereading index.html or chapters/ and finish the task unless a specific href or title is still unresolved",
 +    )
++
 +    tracker.record_tool_call(
 +        "edit",
 +        {
 +            "file_path": str(index_path),
 +            "old_string": "Chapter 1",
 +            "new_string": "Chapter One",
 +        },
 +    )
++
 +    assert tracker.check_tool_call("read", {"file_path": str(index_path)}) == (False, "")
++
++
 +def test_action_tracker_blocks_chapter_rereads_after_verified_inventory(tmp_path) -> None:
 +    tracker = ActionTracker()
 +    chapters = tmp_path / "chapters"
 +    chapters.mkdir()
 +    chapter_path = chapters / "01-introduction.html"
 +    chapter_path.write_text("<h1>Chapter 1: Introduction to Fortran</h1>\n")
 +    index_path = tmp_path / "index.html"
 +    index_path.write_text("<ul></ul>\n")
++
 +    tracker.note_verified_html_inventory(str(index_path))
++
 +    assert tracker.check_tool_call("read", {"file_path": str(index_path)}) == (False, "")
 +    assert tracker.check_tool_call("read", {"file_path": str(chapter_path)}) == (
 +        True,
 +        "The verified chapter inventory already lists the exact href/title pairs for this directory; update index.html from that inventory instead of rereading chapter files",
 +    )
 +    assert tracker.check_tool_call(
 +        "glob",
 +        {"path": str(chapters), "pattern": "*.html"},
 +    ) == (
 +        True,
 +        "The verified chapter inventory already lists the exact href/title pairs for this directory; update index.html from that inventory instead of rereading chapter files",
 +    )
 +    assert tracker.check_tool_call(
 +        "bash",
 +        {"command": f"head -20 {chapter_path}"},
 +    ) == (
 +        True,
 +        "The verified chapter inventory already lists the exact href/title pairs for this directory; update index.html from that inventory instead of rereading chapter files",
 +    )
++
++
  def test_action_tracker_allows_one_interleaved_reread_without_changes(tmp_path) -> None:
      tracker = ActionTracker()
      index_path = tmp_path / "index.html"
      assert str(index_path) in reason
 +def test_action_tracker_allows_one_target_index_reread_after_chapter_discovery(tmp_path) -> None:
 +    tracker = ActionTracker()
 +    index_path = tmp_path / "index.html"
 +    chapters = tmp_path / "chapters"
 +    chapter_a = chapters / "01-introduction.html"
 +    chapter_b = chapters / "02-setup.html"
 +    chapter_c = chapters / "03-basics.html"
++
 +    tracker.record_tool_call("read", {"file_path": str(index_path)})
 +    tracker.record_tool_call("read", {"file_path": str(chapter_a)})
 +    tracker.record_tool_call("read", {"file_path": str(chapter_b)})
 +    tracker.record_tool_call("read", {"file_path": str(chapter_c)})
++
 +    is_duplicate, reason = tracker.check_tool_call("read", {"file_path": str(index_path)})
++
 +    assert is_duplicate is False
 +    assert reason == ""
++
++
 +def test_action_tracker_blocks_second_target_index_reread_after_chapter_discovery(tmp_path) -> None:
 +    tracker = ActionTracker()
 +    index_path = tmp_path / "index.html"
 +    chapters = tmp_path / "chapters"
++
 +    tracker.record_tool_call("read", {"file_path": str(index_path)})
 +    tracker.record_tool_call("read", {"file_path": str(chapters / "01-introduction.html")})
 +    tracker.record_tool_call("read", {"file_path": str(chapters / "02-setup.html")})
 +    tracker.record_tool_call("read", {"file_path": str(chapters / "03-basics.html")})
 +    tracker.record_tool_call("read", {"file_path": str(index_path)})
++
 +    is_duplicate, reason = tracker.check_tool_call("read", {"file_path": str(index_path)})
++
 +    assert is_duplicate is True
 +    assert "known file/title evidence" in reason
++
++
 +def test_action_tracker_blocks_repeated_chapter_directory_search_once_titles_are_known(
 +    tmp_path,
 +) -> None:
 +    tracker = ActionTracker()
 +    chapters = tmp_path / "chapters"
 +    search_args = {"pattern": "*.html", "path": str(chapters)}
++
 +    tracker.record_tool_call("glob", search_args)
 +    tracker.record_tool_call("read", {"file_path": str(chapters / "01-introduction.html")})
 +    tracker.record_tool_call("read", {"file_path": str(chapters / "02-setup.html")})
 +    tracker.record_tool_call("read", {"file_path": str(chapters / "03-basics.html")})
++
 +    is_duplicate, reason = tracker.check_tool_call("glob", search_args)
++
 +    assert is_duplicate is True
 +    assert "known filename/title evidence" in reason
++
++
  def test_action_tracker_allows_repeated_read_after_mutation(tmp_path) -> None:
      tracker = ActionTracker()
      file_path = tmp_path / "index.html"
      assert result == ValidationResult(valid=True)
 +def test_pre_action_validator_blocks_shell_text_rewrite_for_html_target() -> None:
 +    validator = PreActionValidator()
++
 +    result = validator.validate(
 +        "bash",
 +        {
 +            "command": (
 +                "cd /tmp/fortran-qwen-recovery-check && "
 +                "sed -i '1,3c\\<li>updated</li>' index.html"
 +            )
 +        },
 +    )
++
 +    assert result.valid is False
 +    assert result.reason == (
 +        "Shell-based text rewrites are brittle and bypass Loader's safer file tools"
 +    )
 +    assert "edit/patch/write" in result.suggestion
 +    assert "index.html" in result.suggestion
++
++
 +def test_pre_action_validator_allows_non_mutating_sed_probe() -> None:
 +    validator = PreActionValidator()
++
 +    result = validator.validate(
 +        "bash",
 +        {"command": "sed -n '1,20p' index.html"},
 +    )
++
 +    assert result == ValidationResult(valid=True)
++
++
 +def test_pre_action_validator_blocks_index_edit_with_missing_chapter_href(tmp_path) -> None:
 +    validator = PreActionValidator()
 +    index = tmp_path / "index.html"
 +    chapters = tmp_path / "chapters"
 +    chapters.mkdir()
 +    (chapters / "05-input-output.html").write_text(
 +        "<h1>Chapter 5: Input and Output</h1>\n"
 +    )
++
 +    result = validator.validate(
 +        "edit",
 +        {
 +            "file_path": str(index),
 +            "old_string": '<li><a href="chapters/05-input-output.html">Chapter 5: Input and Output</a></li>',
 +            "new_string": '<li><a href="chapters/05-control-structures.html">Chapter 5: Control Structures</a></li>',
 +        },
 +    )
++
 +    assert result.valid is False
 +    assert result.reason == "Edited TOC references chapter files that do not exist"
 +    assert "chapters/05-input-output.html = Chapter 5: Input and Output" in result.suggestion
++
++
 +def test_pre_action_validator_blocks_index_edit_with_title_mismatch(tmp_path) -> None:
 +    validator = PreActionValidator()
 +    index = tmp_path / "index.html"
 +    chapters = tmp_path / "chapters"
 +    chapters.mkdir()
 +    (chapters / "12-troubleshooting-tips.html").write_text(
 +        "<h1>Chapter 12: Troubleshooting and Tips</h1>\n"
 +    )
++
 +    result = validator.validate(
 +        "edit",
 +        {
 +            "file_path": str(index),
 +            "old_string": '<li><a href="chapters/12-troubleshooting-tips.html">Chapter 12: Troubleshooting and Tips</a></li>',
 +            "new_string": '<li><a href="chapters/12-troubleshooting-tips.html">Chapter 12: Troubleshooting Tips</a></li>',
 +        },
 +    )
++
 +    assert result.valid is False
 +    assert result.reason == "Edited TOC labels do not match the linked chapter titles"
 +    assert (
 +        "chapters/12-troubleshooting-tips.html = Chapter 12: Troubleshooting and Tips"
 +        in result.suggestion
 +    )
++
++
 +def test_format_html_inventory_entry_handles_tmp_alias_paths() -> None:
 +    root = Path(tempfile.mkdtemp(dir="/tmp"))
 +    chapters = root / "chapters"
 +    chapters.mkdir()
 +    candidate = chapters / "05-input-output.html"
 +    candidate.write_text("<h1>Chapter 5: Input and Output</h1>\n")
++
 +    entry = format_html_inventory_entry(root, candidate.resolve(strict=False))
++
 +    assert entry == "chapters/05-input-output.html = Chapter 5: Input and Output"
++
++
  def test_runtime_safeguards_wrap_runtime_owned_services() -> None:
      safeguards = RuntimeSafeguards()

tests/test_tool_batch_policies.pymodified

              ),
              tool_results=[],
          ),
 +        Message(
 +            role=Role.ASSISTANT,
 +            content="I already inspected the setup chapter.",
 +            tool_calls=[
 +                ToolCall(
 +                    id="read-setup",
 +                    name="read",
 +                    arguments={"file_path": "~/Loader/guides/fortran/chapters/02-setup.html"},
 +                )
 +            ],
 +        ),
 +        Message.tool_result_message(
 +            tool_call_id="read-setup",
 +            display_content="<h1>Chapter 2: Setting Up Fortran</h1>\n",
 +            result_content="<h1>Chapter 2: Setting Up Fortran</h1>\n",
 +        ),
          Message(
              role=Role.TOOL,
              content=(
      assert "Prefer edit/write/patch on the target file" in follow_up.content
      assert "04-variables.html" in follow_up.content
      assert "02-basic-syntax.html -> 02-setup.html" in follow_up.content
 +    assert "02-setup.html = Chapter 2: Setting Up Fortran" in follow_up.content
      assert "`~/Loader/guides/fortran/index.html`" in follow_up.content
      assert any(event.type == "recovery" for event in events)
      async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
          raise AssertionError("Verification should not run here")
 -    messages = [
 -        Message(
 -            role=Role.TOOL,
 -            content=(
 -                "Observation [glob]: Result: "
 -                "/private/tmp/fortran-qwen-recovery-check/chapters/01-introduction.html\n"
 -                "/private/tmp/fortran-qwen-recovery-check/chapters/02-setup.html\n"
 -                "/private/tmp/fortran-qwen-recovery-check/chapters/03-basics.html\n"
 -                "/private/tmp/fortran-qwen-recovery-check/chapters/04-variables.html\n"
 -                "/private/tmp/fortran-qwen-recovery-check/chapters/05-input-output.html"
 -            ),
 -            tool_results=[],
 -        ),
 -    ]
 +    chapters = temp_dir / "chapters"
 +    chapters.mkdir()
 +    (chapters / "04-variables.html").write_text(
 +        "<h1>Chapter 4: Variables and Data Types</h1>\n"
 +    )
 +    (chapters / "05-input-output.html").write_text(
 +        "<h1>Chapter 5: Input and Output</h1>\n"
 +    )
++
 +    messages: list[Message] = []
      context = build_context(
          temp_dir=temp_dir,
          messages=messages,
      tool_call = ToolCall(
          id="read-missing",
          name="read",
 -        arguments={"file_path": "/tmp/fortran-qwen-recovery-check/chapters/04-data-types.html"},
 +        arguments={"file_path": str(chapters / "04-data-types.html")},
+     )
      outcome = tool_outcome(
          tool_call=tool_call,
 -        output="File not found: /tmp/fortran-qwen-recovery-check/chapters/04-data-types.html",
 +        output=f"File not found: {chapters / '04-data-types.html'}",
          is_error=True,
+     )
      assert follow_up is not None
      assert "## LIKELY FILE CANDIDATES" in follow_up.content
      assert "`04-variables.html`" in follow_up.content
 +    assert "Chapter 4: Variables and Data Types" in follow_up.content
      assert "instead of retrying the missing path" in follow_up.content
 +@pytest.mark.asyncio
 +async def test_tool_batch_recovery_controller_includes_current_html_target_excerpt(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence should not run here")
++
 +    async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
 +        raise AssertionError("Verification should not run here")
++
 +    chapters = temp_dir / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-introduction.html").write_text(
 +        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 +    )
 +    (chapters / "02-setup.html").write_text(
 +        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
 +    )
 +    index = temp_dir / "index.html"
 +    index.write_text(
 +        "<h2>Table of Contents</h2>\n"
 +        "<ul class=\"chapter-list\">\n"
 +        "    <li><a href=\"chapters/01-introduction.html\">Chapter 1: Introduction to Fortran</a></li>\n"
 +        "    <li><a href=\"chapters/02-basic-syntax.html\">Chapter 2: Basic Syntax</a></li>\n"
 +        "</ul>\n"
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    controller = ToolBatchRecoveryController(context)
 +    tool_call = ToolCall(
 +        id="patch-index",
 +        name="patch",
 +        arguments={
 +            "file_path": str(index),
 +            "hunks": [
 +                {
 +                    "old_start": 1,
 +                    "old_lines": 1,
 +                    "new_start": 1,
 +                    "new_lines": 1,
 +                    "lines": ["-bad", "+good"],
 +                }
 +            ],
 +        },
 +    )
 +    outcome = tool_outcome(
 +        tool_call=tool_call,
 +        output="Patch failed: hunk did not apply cleanly",
 +        is_error=True,
 +    )
++
 +    events: list[AgentEvent] = []
++
 +    async def emit(event: AgentEvent) -> None:
 +        events.append(event)
++
 +    follow_up = await controller.build_follow_up(
 +        tool_call=tool_call,
 +        outcome=outcome,
 +        emit=emit,
 +    )
++
 +    assert follow_up is not None
 +    assert "## CURRENT TARGET EXCERPT" in follow_up.content
 +    assert "Verified chapter inventory:" in follow_up.content
 +    assert "<ul class=\"chapter-list\">" in follow_up.content
 +    assert "chapters/02-setup.html = Chapter 2: Setting Up Your Environment" in follow_up.content
 +    assert "Suggested replacement block:" in follow_up.content
 +    assert '<li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>' in follow_up.content
 +    assert "Exact edit guidance:" in follow_up.content
 +    assert "old_string: use the Current TOC block above exactly" in follow_up.content
 +    assert "new_string: use the Suggested replacement block above exactly" in follow_up.content
 +    assert "Do not rewrite the whole file." in follow_up.content
 +    assert "Suggested edit call:" in follow_up.content
 +    assert 'old_string="""' in follow_up.content
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_recovery_controller_reuses_context_for_related_missing_files(
      temp_dir: Path,

tests/test_tool_batches.pymodified

          [tool_outcome(tool_call=tool_call, output="01-introduction.html", is_error=False)]
+     )
 +    summary = TurnSummary(final_response="")
      await runner.execute_batch(
          tool_calls=[tool_call],
          tool_source="assistant",
          pending_tool_calls_seen=set(),
          emit=_noop_emit,
 -        summary=TurnSummary(final_response=""),
 +        summary=summary,
          dod=create_definition_of_done("Fix the chapter links"),
          executor=executor,  # type: ignore[arg-type]
          on_confirmation=None,
          [tool_outcome(tool_call=tool_call, output="Patched index.html", is_error=False)]
+     )
 +    summary = TurnSummary(final_response="")
      await runner.execute_batch(
          tool_calls=[tool_call],
          tool_source="assistant",
          pending_tool_calls_seen=set(),
          emit=_noop_emit,
 -        summary=TurnSummary(final_response=""),
 +        summary=summary,
          dod=create_definition_of_done("Fix the chapter links"),
          executor=executor,  # type: ignore[arg-type]
          on_confirmation=None,
              ),
              tool_results=[],
          ),
 +        Message(
 +            role=Role.ASSISTANT,
 +            content="I already inspected the first chapter title.",
 +            tool_calls=[
 +                ToolCall(
 +                    id="read-ch1",
 +                    name="read",
 +                    arguments={"file_path": str(temp_dir / 'chapters' / '01-introduction.html')},
 +                )
 +            ],
 +        ),
 +        Message.tool_result_message(
 +            tool_call_id="read-ch1",
 +            display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
 +            result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
 +        ),
          Message(
              role=Role.ASSISTANT,
              content="I should update the index now.",
+         ]
+     )
 +    summary = TurnSummary(final_response="")
      await runner.execute_batch(
          tool_calls=[tool_call],
          tool_source="assistant",
          pending_tool_calls_seen=set(),
          emit=_noop_emit,
 -        summary=TurnSummary(final_response=""),
 +        summary=summary,
          dod=create_definition_of_done("Fix the chapter links"),
          executor=executor,  # type: ignore[arg-type]
          on_confirmation=None,
      assert len(queued_messages) == 1
      assert "Reuse the earlier observation instead of repeating it." in queued_messages[0]
 +    assert "01-introduction.html = Chapter 1: Introduction to Fortran" in queued_messages[0]
      assert "index.html" in queued_messages[0]
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_proactively_queues_verified_html_inventory(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    chapters = temp_dir / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-introduction.html").write_text(
 +        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 +    )
 +    (chapters / "02-setup.html").write_text(
 +        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
 +    )
 +    (temp_dir / "index.html").write_text("<ul></ul>\n")
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    context.session.current_task = (
 +        f"Update {temp_dir / 'index.html'} so the chapter links match the sibling files."
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    tool_call = ToolCall(
 +        id="glob-1",
 +        name="glob",
 +        arguments={"path": str(chapters), "pattern": "*.html"},
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output="\n".join(
 +                    [
 +                        str(chapters / "01-introduction.html"),
 +                        str(chapters / "02-setup.html"),
 +                    ]
 +                ),
 +                is_error=False,
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=create_definition_of_done("Fix the chapter links"),
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert len(queued_messages) == 1
 +    assert "verified sibling inventory" in queued_messages[0]
 +    assert "chapters/01-introduction.html = Chapter 1: Introduction to Fortran" in queued_messages[0]
 +    assert str(temp_dir / "index.html") in queued_messages[0]
 +    assert len(summary.tool_result_messages) == 1
 +    assert (
 +        "Verified chapter inventory: chapters/01-introduction.html = Chapter 1: Introduction to Fortran"
 +        in summary.tool_result_messages[0].content
 +    )
++
++
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_marks_validated_html_toc_completion_after_successful_edit(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    chapters = temp_dir / "chapters"
 +    chapters.mkdir()
 +    (chapters / "01-introduction.html").write_text(
 +        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
 +    )
 +    (chapters / "02-setup.html").write_text(
 +        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
 +    )
 +    index_path = temp_dir / "index.html"
 +    old_block = (
 +        '<ul class="chapter-list">\n'
 +        '    <li><a href="chapters/01-old.html">Chapter 1: Old</a></li>\n'
 +        '    <li><a href="chapters/02-old.html">Chapter 2: Old</a></li>\n'
 +        "</ul>\n"
 +    )
 +    new_block = (
 +        '<ul class="chapter-list">\n'
 +        '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
 +        '    <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
 +        "</ul>\n"
 +    )
 +    index_path.write_text(new_block)
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    tool_call = ToolCall(
 +        id="edit-1",
 +        name="edit",
 +        arguments={
 +            "file_path": str(index_path),
 +            "old_string": old_block,
 +            "new_string": new_block,
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output=f"Successfully edited {index_path}",
 +                is_error=False,
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=create_definition_of_done("Fix the chapter links"),
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert any(
 +        "Semantic verification preview: validated 2 toc links in index.html"
 +        in message.content
 +        for message in summary.tool_result_messages
 +    )
 +    assert len(queued_messages) == 1
 +    assert "already satisfies the verified chapter-link constraints" in queued_messages[0]
 +    assert "Do not reread `index.html` or files in `chapters/`" in queued_messages[0]
++
++
  async def _noop_emit(event: AgentEvent) -> None:
      return None

tests/test_turn_preparation.pymodified

  from loader.llm.base import CompletionResponse, ToolCall
  from loader.runtime.completion_trace import CompletionTraceEntry
  from loader.runtime.conversation import ConversationRuntime
 +from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
  from loader.runtime.runtime_handle import RuntimeHandle
  from tests.helpers.runtime_harness import ScriptedBackend
          for event in events
          if event.type == "workflow_mode" and event.workflow_mode
      ] == ["clarify", "execute"]
++
++
 +@pytest.mark.asyncio
 +async def test_turn_preparation_does_not_resume_latest_dod_from_older_session(
 +    temp_dir: Path,
 +) -> None:
 +    backend = ScriptedBackend()
 +    handle = RuntimeHandle(
 +        backend=backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
 +    runtime = ConversationRuntime(handle)
 +    task = "Update /tmp/fortran/index.html so the chapter list matches the real files."
++
 +    stale_dod = create_definition_of_done(task)
 +    stale_dod.status = "fixing"
 +    stale_dod.touched_files.append("/tmp/fortran/index.html")
 +    stale_dod.mutating_actions.append("edit")
 +    stale_path = DefinitionOfDoneStore(temp_dir).save(stale_dod)
++
 +    events = []
++
 +    async def capture(event) -> None:
 +        events.append(event)
++
 +    prepared = await runtime.turn_preparation.prepare(
 +        task=task,
 +        emit=capture,
 +        requested_mode="execute",
 +        original_task=None,
 +        on_user_question=None,
 +    )
++
 +    assert prepared.definition_of_done.storage_path != str(stale_path)
 +    assert prepared.definition_of_done.touched_files == []
 +    assert prepared.definition_of_done.mutating_actions == []
 +    assert prepared.definition_of_done.pending_items == ["Complete the requested work"]
++
++
 +@pytest.mark.asyncio
 +async def test_turn_preparation_resumes_active_session_dod(
 +    temp_dir: Path,
 +) -> None:
 +    backend = ScriptedBackend()
 +    handle = RuntimeHandle(
 +        backend=backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
 +    runtime = ConversationRuntime(handle)
 +    task = "Keep repairing the runtime state controller."
++
 +    existing_dod = create_definition_of_done(task)
 +    existing_dod.status = "fixing"
 +    existing_dod.pending_items.append("Collect verification evidence")
 +    existing_dod.touched_files.append(str(temp_dir / "index.html"))
 +    existing_path = DefinitionOfDoneStore(temp_dir).save(existing_dod)
 +    handle.session.active_dod_path = str(existing_path)
++
 +    events = []
++
 +    async def capture(event) -> None:
 +        events.append(event)
++
 +    prepared = await runtime.turn_preparation.prepare(
 +        task=task,
 +        emit=capture,
 +        requested_mode="execute",
 +        original_task=None,
 +        on_user_question=None,
 +    )
++
 +    assert prepared.definition_of_done.storage_path == str(existing_path)
 +    assert prepared.definition_of_done.touched_files == [str(temp_dir / "index.html")]
 +    assert prepared.definition_of_done.status == "fixing"

tests/test_workflow.pymodified

+     ]
 +def test_extract_verification_commands_from_markdown_ignores_prose_only_bullets() -> None:
 +    markdown = "\n".join(
 +        [
 +            "# Verification Plan",
 +            "",
 +            "## Verification Commands",
 +            "- Check that all chapter links in index.html resolve to existing files",
 +            "- Validate chapter titles with `python3 scripts/check_titles.py`",
 +            "- `test -f index.html`",
 +        ]
 +    )
++
 +    assert extract_verification_commands_from_markdown(markdown) == [
 +        "python3 scripts/check_titles.py",
 +        "test -f index.html",
 +    ]
++
++
 +def test_extract_verification_commands_keeps_shell_pipelines_intact() -> None:
 +    markdown = "\n".join(
 +        [
 +            "# Verification Plan",
 +            "",
 +            "## Verification Commands",
 +            "```bash",
 +            "ls -la chapters/",
 +            "cat index.html | head -20",
 +            "```",
 +        ]
 +    )
++
 +    assert extract_verification_commands_from_markdown(markdown) == [
 +        "ls -la chapters/",
 +        "cat index.html | head -20",
 +    ]
++
++
  def test_workflow_artifact_store_and_bridge_round_trip(tmp_path: Path) -> None:
      store = WorkflowArtifactStore(tmp_path)
      brief = ClarifyBrief.fallback(