Force stale repairs to write

Status	File	+	-
M	`src/loader/runtime/dod.py`	16	0
M	`src/loader/runtime/repair.py`	32	12
M	`src/loader/runtime/repair_focus.py`	55	0
M	`src/loader/runtime/tool_batches.py`	24	4
M	`src/loader/runtime/turn_completion.py`	22	1
M	`src/loader/tools/fs_safety.py`	21	8
M	`tests/test_dod.py`	65	0
M	`tests/test_expanded_tools.py`	28	0
M	`tests/test_repair.py`	50	0
M	`tests/test_tool_batches.py`	62	0
M	`tests/test_turn_completion.py`	82	0

src/loader/runtime/dod.pymodified

              f"minimum_chapter_blocks = {quality_floor.chapter_blocks}",
              "tag_pattern = re.compile(r'<[^>]+>')",
              "content_block_pattern = re.compile(r'<(p|li|pre|code|section|article|table|h2|h3|h4)\\b', re.IGNORECASE)",
++            "html_close_pattern = re.compile(r'</html\\s*>', re.IGNORECASE)",
++            "body_close_pattern = re.compile(r'</body\\s*>', re.IGNORECASE)",
              "issues = []",
              "checked = 0",
              "for raw_path in paths:",
              "    plain = re.sub(r'\\s+', ' ', plain).strip()",
              "    content_blocks = len(content_block_pattern.findall(text))",
              "    has_h1 = bool(re.search(r'<h1\\b', text, re.IGNORECASE))",
++            "    html_close_matches = list(html_close_pattern.finditer(text))",
++            "    body_close_matches = list(body_close_pattern.finditer(text))",
              "    minimum_chars = minimum_index_chars if path.name.lower() == 'index.html' else minimum_chapter_chars",
              "    minimum_blocks = minimum_index_blocks if path.name.lower() == 'index.html' else minimum_chapter_blocks",
++            "    if len(body_close_matches) != 1:",
++            "        issues.append(",
++            "            f'{path}: expected exactly one closing </body> tag (found {len(body_close_matches)})'",
++            "        )",
++            "    if len(html_close_matches) != 1:",
++            "        issues.append(",
++            "            f'{path}: expected exactly one closing </html> tag (found {len(html_close_matches)})'",
++            "        )",
++            "    if html_close_matches and text[html_close_matches[-1].end():].strip():",
++            "        issues.append(f'{path}: content appears after closing </html>')",
++            "    if html_close_matches and body_close_matches and body_close_matches[-1].start() > html_close_matches[-1].start():",
++            "        issues.append(f'{path}: closing </body> appears after closing </html>')",
              "    if not has_h1:",
              "        issues.append(f'{path}: missing <h1>')",
              "    if len(plain) < minimum_chars:",

src/loader/runtime/repair.pymodified

  from .parsing import parse_tool_calls
  from .path_display import display_runtime_path
  from .recovery import detect_missing_mutation_payload
--from .repair_focus import ActiveRepairContext, extract_active_repair_context
++from .repair_focus import (
++    ActiveRepairContext,
++    extract_active_repair_context,
++    recent_repair_mutation_context_failed,
++)
  from .workflow import (
      infer_output_outline_label,
      infer_pending_todo_output_target,
+         ]
          if issue_line:
              lines.append(f"- Current verifier issue: {issue_line[2:] if issue_line.startswith('- ') else issue_line}")
--        lines.extend(
++        force_write = recent_repair_mutation_context_failed(
--            [
++            self.context.session.messages,
--                "- Use one bounded `edit`, `patch`, or `write` call for that same "
++            target,
--                "file now. Append or replace a body section with 4-6 substantive "
--                "sections, lists, commands, or examples; do not attempt a giant "
--                "full-page rewrite from memory.",
--                "- Do not add table-of-contents entries, do not retarget links, and "
--                "do not reopen unrelated reference files for this retry.",
--                "- No narration, no TodoWrite, no final summary, and no empty "
--                "response; emit the mutation tool call now.",
--            ]
+         )
++        if force_write:
++            lines.extend(
++                [
++                    "- Recent `edit`/`patch` attempts for this same target failed "
++                    "against stale or malformed context. Use exactly one "
++                    "`write(file_path=..., content=...)` call now with a complete "
++                    "valid HTML document for that file.",
++                    "- Do not call `read`, `edit`, `patch`, TodoWrite, or a final "
++                    "summary on this retry; emit the `write` mutation tool call now.",
++                ]
++            )
++        else:
++            lines.extend(
++                [
++                    "- Use one bounded `edit`, `patch`, or `write` call for that same "
++                    "file now. Append or replace a body section with 4-6 substantive "
++                    "sections, lists, commands, or examples; do not attempt a giant "
++                    "full-page rewrite from memory.",
++                    "- Do not add table-of-contents entries, do not retarget links, and "
++                    "do not reopen unrelated reference files for this retry.",
++                    "- No narration, no TodoWrite, no final summary, and no empty "
++                    "response; emit the mutation tool call now.",
++                ]
++            )
          if remaining_line:
              lines.append(remaining_line)
          return "\n".join(lines)

src/loader/runtime/repair_focus.pymodified

  from ..llm.base import Message
++_STALE_REPAIR_MUTATION_MARKERS = (
++    "old_string not found",
++    "old_string was stale",
++    "do not retry the same remembered text",
++    "patch hunks are missing",
++    "provide structured patch hunks",
++    "hunks must not be empty",
++    "structured patch context mismatch",
++    "structured patch hunk consumed",
++    "structured patch references lines past the end",
++    "structured patch hunks overlap",
++    "failed to complete the operation after",
++)
++
  @dataclass(frozen=True)
  class ActiveRepairContext:
      return normalized in normalized_paths
++def recent_repair_mutation_context_failed(
++    messages: list[Message],
++    target: str,
++    *,
++    lookback: int = 24,
++) -> bool:
++    """Return whether recent repair attempts proved the target context is stale."""
++
++    target_tokens = _target_match_tokens(target)
++    if not target_tokens:
++        return False
++
++    for message in reversed(messages[-lookback:]):
++        content = str(getattr(message, "content", "") or "")
++        if not content:
++            continue
++        lowered = content.lower()
++        if not any(token and token in content for token in target_tokens):
++            continue
++        if any(marker in lowered for marker in _STALE_REPAIR_MUTATION_MARKERS):
++            return True
++    return False
++
++
  def normalize_repair_path(raw_path: str) -> str:
      text = str(raw_path or "").strip()
      if not text:
          return str(Path(text).expanduser())
++def _target_match_tokens(raw_path: str) -> tuple[str, ...]:
++    text = str(raw_path or "").strip()
++    if not text:
++        return ()
++    tokens: list[str] = [text]
++    normalized = normalize_repair_path(text)
++    if normalized and normalized not in tokens:
++        tokens.append(normalized)
++    try:
++        name = Path(normalized or text).name
++    except (OSError, RuntimeError, ValueError):
++        name = ""
++    if name and name not in tokens:
++        tokens.append(name)
++    return tuple(tokens)
++
++
  def _path_roots(paths: set[str]) -> set[str]:
      roots: set[str] = set()
      for raw_path in paths:

src/loader/runtime/tool_batches.pymodified

  from .path_display import display_runtime_path
  from .policy_timeline import append_verification_timeline_entry
  from .recovery import RecoveryContext, detect_missing_mutation_payload
--from .repair_focus import extract_active_repair_context, path_within_allowed_roots
++from .repair_focus import (
++    extract_active_repair_context,
++    path_within_allowed_roots,
++    recent_repair_mutation_context_failed,
++)
  from .safeguard_services import extract_shell_text_rewrite_target
  from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate
  from .tool_batch_recovery import ToolBatchRecoveryController
                  if repair_issue
                  else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n"
+             )
++            force_write = recent_repair_mutation_context_failed(
++                self.context.session.messages,
++                target,
++            )
++            if force_write:
++                immediate_step = (
++                    f"- Immediate next step: rewrite `{target}` with one `write` call.\n"
++                    "- Recent `edit`/`patch` attempts for this file failed against stale "
++                    "or malformed context. Use `write(file_path=..., content=...)` with "
++                    "a complete valid HTML document, and do not call `read`, `edit`, "
++                    "`patch`, or TodoWrite again first."
++                )
++            else:
++                immediate_step = (
++                    f"- Immediate next step: edit `{target}`.\n"
++                    "- Continue with one concrete `edit`, `patch`, or `write` call that "
++                    "actually changes the current generated file."
++                )
              self.context.set_workflow_mode("execute")
              self.context.queue_steering_message(
                  "Todo tracking is updated, but verification still has an active "
                  "not finish yet.\n\n"
                  "Repair focus:\n"
                  f"{issue_line}"
--                f"- Immediate next step: edit `{target}`.\n"
++                f"{immediate_step}"
--                "- Continue with one concrete `edit`, `patch`, or `write` call that "
--                "actually changes the current generated file."
+             )
              return

src/loader/runtime/turn_completion.pymodified

      completion_timeline_kind,
+ )
  from .repair import ResponseRepairer
--from .repair_focus import extract_active_repair_context
++from .repair_focus import (
++    extract_active_repair_context,
++    recent_repair_mutation_context_failed,
++)
  from .rollback import RollbackPlan
  from .verification_observations import VerificationObservation
  from .workflow import (
      if not target_text:
          return None
++    force_write = recent_repair_mutation_context_failed(
++        cast(list[Message], messages),
++        target_text,
++    )
      issue_line = next(
+         (
              line[2:] if line.startswith("- ") else line
          "",
+     )
      issue_sentence = f" Current verifier issue: {issue_line}" if issue_line else ""
++    if force_write:
++        prompt = (
++            "[CONTINUE QUALITY REPAIR]\n"
++            "You just described a content-quality repair, but did not execute it. "
++            "Recent `patch`/`edit` attempts for this same file failed because their "
++            "remembered context was stale or malformed. "
++            f"Emit exactly one `write(file_path=..., content=...)` tool call for `{target_text}` now."
++            f"{issue_sentence} "
++            "Write a complete valid HTML document for this file that preserves the chapter topic "
++            "and satisfies the listed quality issue. Do not call `read`, `edit`, `patch`, "
++            "`TodoWrite`, or summarize."
++        )
++        return InProgressContinuation(prompt=prompt, target=None)
++
      prompt = (
          "[CONTINUE QUALITY REPAIR]\n"
          "You just described a content-quality repair, but did not execute it. "

src/loader/tools/fs_safety.pymodified

                  try:
                      value = ast.literal_eval(value)
                  except (SyntaxError, ValueError):
--                    return []
++                    repaired = _load_python_literal_with_balanced_closers(value)
++                    if repaired is None:
++                        return []
++                    value = repaired
      if isinstance(value, StructuredPatchHunk):
          return [value]
          return None
++def _load_python_literal_with_balanced_closers(value: str) -> object | None:
++    suffix = _missing_json_closer_suffix(value)
++    if not suffix:
++        return None
++    try:
++        return ast.literal_eval(value + suffix)
++    except (SyntaxError, ValueError):
++        return None
++
++
  def _missing_json_closer_suffix(value: str) -> str:
      stack: list[str] = []
--    in_string = False
++    quote_char = ""
      escaped = False
      pairs = {"[": "]", "{": "}"}
      openers = set(pairs)
      closers = {"]": "[", "}": "{"}
      for char in value:
--        if in_string:
++        if quote_char:
              if escaped:
                  escaped = False
              elif char == "\\":
                  escaped = True
--            elif char == '"':
++            elif char == quote_char:
--                in_string = False
++                quote_char = ""
              continue
--        if char == '"':
++        if char in {"'", '"'}:
--            in_string = True
++            quote_char = char
          elif char in openers:
              stack.append(char)
          elif char in closers:
                  return ""
              stack.pop()
--    if in_string:
++    if quote_char:
          return ""
      return "".join(pairs[char] for char in reversed(stack))

tests/test_dod.pymodified

      assert "expected at least 15" in result.stdout
++def test_html_guide_quality_check_flags_malformed_document_structure(
++    tmp_path: Path,
++) -> None:
++    def rich_doc(title: str) -> str:
++        body = "".join(
++            f"<h2>Section {index}</h2><p>{'x' * 180}</p><ul><li>{'y' * 90}</li></ul>"
++            for index in range(9)
++        )
++        return f"<!DOCTYPE html><html><body><h1>{title}</h1>{body}</body></html>\n"
++
++    guide = tmp_path / "guide"
++    chapters = guide / "chapters"
++    chapters.mkdir(parents=True)
++    index_path = guide / "index.html"
++    first = chapters / "01-introduction.html"
++    second = chapters / "02-installation.html"
++    third = chapters / "03-configuration.html"
++    index_path.write_text(rich_doc("Guide"))
++    first.write_text(rich_doc("Introduction"))
++    second.write_text(rich_doc("Installation").rstrip() + "\n</html>\n")
++    third.write_text(rich_doc("Configuration"))
++
++    implementation_plan = tmp_path / "implementation.md"
++    implementation_plan.write_text(
++        "\n".join(
++            [
++                "# Implementation Plan",
++                "",
++                "## File Changes",
++                f"- `{index_path}`",
++                f"- `{first}`",
++                f"- `{second}`",
++                f"- `{third}`",
++                "",
++            ]
++        )
++    )
++
++    dod = create_definition_of_done(
++        "Create an equally thorough multi-page HTML guide with chapter files."
++    )
++    dod.implementation_plan = str(implementation_plan)
++
++    commands = derive_verification_commands(
++        dod,
++        project_root=tmp_path,
++        task_statement=dod.task_statement,
++        supplement_existing=True,
++    )
++    quality_command = next(
++        command for command in commands if "HTML guide content quality issues:" in command
++    )
++    result = subprocess.run(
++        quality_command,
++        shell=True,
++        cwd=tmp_path,
++        capture_output=True,
++        text=True,
++        check=False,
++    )
++
++    assert result.returncode == 1
++    assert "02-installation.html: expected exactly one closing </html> tag" in result.stdout
++
++
  def test_derive_verification_commands_flags_insufficient_pages_for_broad_thorough_guide(
      tmp_path: Path,
  ) -> None:

tests/test_expanded_tools.pymodified

      assert target.read_text() == "alpha\nbeta from literal string\ngamma\n"
++@pytest.mark.asyncio
++async def test_patch_tool_accepts_python_literal_hunks_missing_outer_close(
++    temp_dir: Path,
++) -> None:
++    target = temp_dir / "sample.txt"
++    target.write_text("alpha\nbeta\ngamma\n")
++    tool = PatchTool(workspace_root=temp_dir)
++
++    hunk_payload = repr(
++        [
++            {
++                "old_start": 2,
++                "old_lines": 1,
++                "new_start": 2,
++                "new_lines": 1,
++                "lines": ["-beta", "+beta from repaired literal string"],
++            }
++        ]
++    )[:-1]
++    result = await tool.execute(
++        file_path=str(target),
++        hunks=hunk_payload,
++    )
++
++    assert result.is_error is False
++    assert target.read_text() == "alpha\nbeta from repaired literal string\ngamma\n"
++
++
  @pytest.mark.asyncio
  async def test_patch_tool_rejects_context_mismatch(temp_dir: Path) -> None:
      target = temp_dir / "sample.txt"

tests/test_repair.pymodified

      assert f"`{second_chapter.resolve(strict=False)}`" in decision.retry_message
++def test_empty_response_retry_forces_write_after_stale_quality_repair_context(
++    temp_dir: Path,
++) -> None:
++    context = build_context(
++        temp_dir=temp_dir,
++        use_react=False,
++    )
++    repairer = ResponseRepairer(context)
++    guide = temp_dir / "guides" / "nginx"
++    chapters = guide / "chapters"
++    chapters.mkdir(parents=True)
++    chapter = chapters / "05-load-balancing.html"
++    chapter.write_text("<html><body><h1>Load Balancing</h1></body></html>\n")
++    context.session.append(
++        Message(
++            role=Role.USER,
++            content=(
++                "Repair focus:\n"
++                f"- Improve `{chapter}`: thin content "
++                "(846 text chars, expected at least 1758).\n"
++                f"- Immediate next step: edit `{chapter}`.\n"
++            ),
++        )
++    )
++    context.session.append(
++        Message(
++            role=Role.TOOL,
++            content=(
++                "Observation [edit]: Error: Failed to complete the operation "
++                f"after 2 attempts for {chapter}. old_string not found in file."
++            ),
++        )
++    )
++    dod = create_definition_of_done("Create an equally thorough HTML guide.")
++    dod.touched_files = [str(chapter)]
++
++    decision = repairer.handle_empty_response(
++        task="Create an equally thorough HTML guide.",
++        original_task=None,
++        empty_retry_count=1,
++        max_empty_retries=2,
++        dod=dod,
++    )
++
++    assert decision.should_continue is True
++    assert decision.retry_message is not None
++    assert "Use exactly one `write(file_path=..., content=...)`" in decision.retry_message
++    assert "Do not call `read`, `edit`, `patch`, TodoWrite" in decision.retry_message
++
++
  def test_empty_response_retry_mentions_write_can_create_missing_parent_directories(
      temp_dir: Path,
  ) -> None:

tests/test_tool_batches.pymodified

      assert dod.completed_items == completed_before_todowrite
++def test_todowrite_quality_repair_nudge_forces_write_after_stale_context(
++    temp_dir: Path,
++) -> None:
++    async def assess_confidence(
++        tool_name: str,
++        tool_args: dict,
++        context: str,
++    ) -> ConfidenceAssessment:
++        raise AssertionError("Confidence should not run for direct nudge test")
++
++    async def verify_action(
++        tool_name: str,
++        tool_args: dict,
++        result: str,
++        expected: str = "",
++    ) -> ActionVerification:
++        raise AssertionError("Verification should not run for direct nudge test")
++
++    guide_root = temp_dir / "guides" / "nginx"
++    chapters = guide_root / "chapters"
++    chapters.mkdir(parents=True)
++    chapter_one = chapters / "05-load-balancing.html"
++    chapter_one.write_text("<html><body><h1>Load Balancing</h1></body></html>\n")
++    context = build_context(
++        temp_dir=temp_dir,
++        messages=[
++            Message(
++                role=Role.USER,
++                content=(
++                    "Repair focus:\n"
++                    f"- Improve `{chapter_one}`: thin content "
++                    "(846 text chars, expected at least 1758).\n"
++                    f"- Immediate next step: edit `{chapter_one}`.\n"
++                ),
++            ),
++            Message(
++                role=Role.TOOL,
++                content=(
++                    "Observation [edit]: Error: Failed to complete the operation "
++                    f"after 2 attempts for {chapter_one}. old_string not found in file."
++                ),
++            ),
++        ],
++        safeguards=FakeSafeguards(),
++        assess_confidence=assess_confidence,
++        verify_action=verify_action,
++        auto_recover=False,
++    )
++    queued_messages: list[str] = []
++    context.queue_steering_message_callback = queued_messages.append
++    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
++    dod = create_definition_of_done("Create a multi-file nginx guide.")
++
++    runner._queue_todowrite_resume_nudge(dod=dod)
++
++    assert queued_messages
++    message = queued_messages[-1]
++    assert f"Immediate next step: rewrite `{chapter_one.resolve(strict=False)}`" in message
++    assert "`write(file_path=..., content=...)`" in message
++    assert "do not call `read`, `edit`, `patch`, or TodoWrite again first" in message
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verify_handoff(
      temp_dir: Path,

tests/test_turn_completion.pymodified

      assert "Do not rewrite the whole file from memory" in agent.session.messages[-1].content
++@pytest.mark.asyncio
++async def test_turn_completion_forces_write_after_stale_quality_repair_context(
++    temp_dir: Path,
++) -> None:
++    backend = ScriptedBackend()
++    config = non_streaming_config()
++    config.reasoning.completion_check = False
++    agent = Agent(
++        backend=backend,
++        config=config,
++        project_root=temp_dir,
++    )
++    runtime = ConversationRuntime(agent)
++    events = []
++
++    async def capture(event) -> None:
++        events.append(event)
++
++    prepared = await runtime.turn_preparation.prepare(
++        task="Create an equally thorough HTML guide.",
++        emit=capture,
++        requested_mode="execute",
++        original_task=None,
++        on_user_question=None,
++    )
++    await runtime.phase_tracker.enter(
++        TurnPhase.ASSISTANT,
++        capture,
++        detail="Requesting assistant response",
++        reason_code="request_assistant_response",
++    )
++
++    chapter = temp_dir / "guides" / "nginx" / "chapters" / "05-load-balancing.html"
++    chapter.parent.mkdir(parents=True)
++    chapter.write_text("<html><body><h1>Load Balancing</h1></body></html>\n")
++    prepared.definition_of_done.touched_files.append(str(chapter))
++    prepared.definition_of_done.mutating_actions.append("edit")
++    agent.session.append(
++        Message(
++            role=Role.USER,
++            content=(
++                "Repair focus:\n"
++                f"- Improve `{chapter}`: thin content "
++                "(846 text chars, expected at least 1758).\n"
++                f"- Immediate next step: edit `{chapter}`.\n"
++            ),
++        )
++    )
++    agent.session.append(
++        Message(
++            role=Role.TOOL,
++            content=(
++                "Observation [edit]: Error: Failed to complete the operation after "
++                f"2 attempts for {chapter}. old_string not found in file."
++            ),
++        )
++    )
++
++    content = "I'll rewrite the load balancing chapter with comprehensive content."
++    decision = await runtime.turn_completion.handle_text_response(
++        content=content,
++        response_content=content,
++        task=prepared.task,
++        effective_task=prepared.effective_task,
++        iterations=1,
++        max_iterations=agent.config.max_iterations,
++        actions_taken=[],
++        continuation_count=0,
++        dod=prepared.definition_of_done,
++        emit=capture,
++        summary=prepared.summary,
++        executor=prepared.executor,
++        rollback_plan=prepared.rollback_plan,
++    )
++
++    assert decision.action == TurnCompletionAction.CONTINUE
++    message = agent.session.messages[-1].content
++    assert message.startswith("[CONTINUE QUALITY REPAIR]")
++    assert "exactly one `write(file_path=..., content=...)`" in message
++    assert "Do not call `read`, `edit`, `patch`, `TodoWrite`, or summarize." in message
++
++
  @pytest.mark.asyncio
  async def test_turn_completion_continues_queued_quality_repair_after_summary(
      temp_dir: Path,

tenseleyflow/loader / `3f4faaf`

11 changed files