`3f4faaf`

Force stale repairs to write

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 week ago

SHA: 3f4faafb565a1d9a79d0fac446b6cc084c1038ad
Parents: c7a21e5
Tree: 62365ed

11 changed files

Status	File	+	-
M	`src/loader/runtime/dod.py`	16	0
M	`src/loader/runtime/repair.py`	32	12
M	`src/loader/runtime/repair_focus.py`	55	0
M	`src/loader/runtime/tool_batches.py`	24	4
M	`src/loader/runtime/turn_completion.py`	22	1
M	`src/loader/tools/fs_safety.py`	21	8
M	`tests/test_dod.py`	65	0
M	`tests/test_expanded_tools.py`	28	0
M	`tests/test_repair.py`	50	0
M	`tests/test_tool_batches.py`	62	0
M	`tests/test_turn_completion.py`	82	0

src/loader/runtime/dod.pymodified

              f"minimum_chapter_blocks = {quality_floor.chapter_blocks}",
              "tag_pattern = re.compile(r'<[^>]+>')",
              "content_block_pattern = re.compile(r'<(p|li|pre|code|section|article|table|h2|h3|h4)\\b', re.IGNORECASE)",
 +            "html_close_pattern = re.compile(r'</html\\s*>', re.IGNORECASE)",
 +            "body_close_pattern = re.compile(r'</body\\s*>', re.IGNORECASE)",
              "issues = []",
              "checked = 0",
              "for raw_path in paths:",
              "    plain = re.sub(r'\\s+', ' ', plain).strip()",
              "    content_blocks = len(content_block_pattern.findall(text))",
              "    has_h1 = bool(re.search(r'<h1\\b', text, re.IGNORECASE))",
 +            "    html_close_matches = list(html_close_pattern.finditer(text))",
 +            "    body_close_matches = list(body_close_pattern.finditer(text))",
              "    minimum_chars = minimum_index_chars if path.name.lower() == 'index.html' else minimum_chapter_chars",
              "    minimum_blocks = minimum_index_blocks if path.name.lower() == 'index.html' else minimum_chapter_blocks",
 +            "    if len(body_close_matches) != 1:",
 +            "        issues.append(",
 +            "            f'{path}: expected exactly one closing </body> tag (found {len(body_close_matches)})'",
 +            "        )",
 +            "    if len(html_close_matches) != 1:",
 +            "        issues.append(",
 +            "            f'{path}: expected exactly one closing </html> tag (found {len(html_close_matches)})'",
 +            "        )",
 +            "    if html_close_matches and text[html_close_matches[-1].end():].strip():",
 +            "        issues.append(f'{path}: content appears after closing </html>')",
 +            "    if html_close_matches and body_close_matches and body_close_matches[-1].start() > html_close_matches[-1].start():",
 +            "        issues.append(f'{path}: closing </body> appears after closing </html>')",
              "    if not has_h1:",
              "        issues.append(f'{path}: missing <h1>')",
              "    if len(plain) < minimum_chars:",

src/loader/runtime/repair.pymodified

  from .parsing import parse_tool_calls
  from .path_display import display_runtime_path
  from .recovery import detect_missing_mutation_payload
 -from .repair_focus import ActiveRepairContext, extract_active_repair_context
 +from .repair_focus import (
 +    ActiveRepairContext,
 +    extract_active_repair_context,
 +    recent_repair_mutation_context_failed,
 +)
  from .workflow import (
      infer_output_outline_label,
      infer_pending_todo_output_target,
+         ]
          if issue_line:
              lines.append(f"- Current verifier issue: {issue_line[2:] if issue_line.startswith('- ') else issue_line}")
 -        lines.extend(
 -            [
 -                "- Use one bounded `edit`, `patch`, or `write` call for that same "
 -                "file now. Append or replace a body section with 4-6 substantive "
 -                "sections, lists, commands, or examples; do not attempt a giant "
 -                "full-page rewrite from memory.",
 -                "- Do not add table-of-contents entries, do not retarget links, and "
 -                "do not reopen unrelated reference files for this retry.",
 -                "- No narration, no TodoWrite, no final summary, and no empty "
 -                "response; emit the mutation tool call now.",
 -            ]
 +        force_write = recent_repair_mutation_context_failed(
 +            self.context.session.messages,
 +            target,
+         )
 +        if force_write:
 +            lines.extend(
 +                [
 +                    "- Recent `edit`/`patch` attempts for this same target failed "
 +                    "against stale or malformed context. Use exactly one "
 +                    "`write(file_path=..., content=...)` call now with a complete "
 +                    "valid HTML document for that file.",
 +                    "- Do not call `read`, `edit`, `patch`, TodoWrite, or a final "
 +                    "summary on this retry; emit the `write` mutation tool call now.",
 +                ]
 +            )
 +        else:
 +            lines.extend(
 +                [
 +                    "- Use one bounded `edit`, `patch`, or `write` call for that same "
 +                    "file now. Append or replace a body section with 4-6 substantive "
 +                    "sections, lists, commands, or examples; do not attempt a giant "
 +                    "full-page rewrite from memory.",
 +                    "- Do not add table-of-contents entries, do not retarget links, and "
 +                    "do not reopen unrelated reference files for this retry.",
 +                    "- No narration, no TodoWrite, no final summary, and no empty "
 +                    "response; emit the mutation tool call now.",
 +                ]
 +            )
          if remaining_line:
              lines.append(remaining_line)
          return "\n".join(lines)

src/loader/runtime/repair_focus.pymodified

  from ..llm.base import Message
 +_STALE_REPAIR_MUTATION_MARKERS = (
 +    "old_string not found",
 +    "old_string was stale",
 +    "do not retry the same remembered text",
 +    "patch hunks are missing",
 +    "provide structured patch hunks",
 +    "hunks must not be empty",
 +    "structured patch context mismatch",
 +    "structured patch hunk consumed",
 +    "structured patch references lines past the end",
 +    "structured patch hunks overlap",
 +    "failed to complete the operation after",
 +)
++
  @dataclass(frozen=True)
  class ActiveRepairContext:
      return normalized in normalized_paths
 +def recent_repair_mutation_context_failed(
 +    messages: list[Message],
 +    target: str,
 +    *,
 +    lookback: int = 24,
 +) -> bool:
 +    """Return whether recent repair attempts proved the target context is stale."""
++
 +    target_tokens = _target_match_tokens(target)
 +    if not target_tokens:
 +        return False
++
 +    for message in reversed(messages[-lookback:]):
 +        content = str(getattr(message, "content", "") or "")
 +        if not content:
 +            continue
 +        lowered = content.lower()
 +        if not any(token and token in content for token in target_tokens):
 +            continue
 +        if any(marker in lowered for marker in _STALE_REPAIR_MUTATION_MARKERS):
 +            return True
 +    return False
++
++
  def normalize_repair_path(raw_path: str) -> str:
      text = str(raw_path or "").strip()
      if not text:
          return str(Path(text).expanduser())
 +def _target_match_tokens(raw_path: str) -> tuple[str, ...]:
 +    text = str(raw_path or "").strip()
 +    if not text:
 +        return ()
 +    tokens: list[str] = [text]
 +    normalized = normalize_repair_path(text)
 +    if normalized and normalized not in tokens:
 +        tokens.append(normalized)
 +    try:
 +        name = Path(normalized or text).name
 +    except (OSError, RuntimeError, ValueError):
 +        name = ""
 +    if name and name not in tokens:
 +        tokens.append(name)
 +    return tuple(tokens)
++
++
  def _path_roots(paths: set[str]) -> set[str]:
      roots: set[str] = set()
      for raw_path in paths:

src/loader/runtime/tool_batches.pymodified

  from .path_display import display_runtime_path
  from .policy_timeline import append_verification_timeline_entry
  from .recovery import RecoveryContext, detect_missing_mutation_payload
 -from .repair_focus import extract_active_repair_context, path_within_allowed_roots
 +from .repair_focus import (
 +    extract_active_repair_context,
 +    path_within_allowed_roots,
 +    recent_repair_mutation_context_failed,
 +)
  from .safeguard_services import extract_shell_text_rewrite_target
  from .tool_batch_checks import ToolBatchConfidenceGate, ToolBatchVerificationGate
  from .tool_batch_recovery import ToolBatchRecoveryController
                  if repair_issue
                  else f"- Improve `{target}` until it satisfies the active content-quality verifier.\n"
+             )
 +            force_write = recent_repair_mutation_context_failed(
 +                self.context.session.messages,
 +                target,
 +            )
 +            if force_write:
 +                immediate_step = (
 +                    f"- Immediate next step: rewrite `{target}` with one `write` call.\n"
 +                    "- Recent `edit`/`patch` attempts for this file failed against stale "
 +                    "or malformed context. Use `write(file_path=..., content=...)` with "
 +                    "a complete valid HTML document, and do not call `read`, `edit`, "
 +                    "`patch`, or TodoWrite again first."
 +                )
 +            else:
 +                immediate_step = (
 +                    f"- Immediate next step: edit `{target}`.\n"
 +                    "- Continue with one concrete `edit`, `patch`, or `write` call that "
 +                    "actually changes the current generated file."
 +                )
              self.context.set_workflow_mode("execute")
              self.context.queue_steering_message(
                  "Todo tracking is updated, but verification still has an active "
                  "not finish yet.\n\n"
                  "Repair focus:\n"
                  f"{issue_line}"
 -                f"- Immediate next step: edit `{target}`.\n"
 -                "- Continue with one concrete `edit`, `patch`, or `write` call that "
 -                "actually changes the current generated file."
 +                f"{immediate_step}"
+             )
              return

src/loader/runtime/turn_completion.pymodified

      completion_timeline_kind,
+ )
  from .repair import ResponseRepairer
 -from .repair_focus import extract_active_repair_context
 +from .repair_focus import (
 +    extract_active_repair_context,
 +    recent_repair_mutation_context_failed,
 +)
  from .rollback import RollbackPlan
  from .verification_observations import VerificationObservation
  from .workflow import (
      if not target_text:
          return None
 +    force_write = recent_repair_mutation_context_failed(
 +        cast(list[Message], messages),
 +        target_text,
 +    )
      issue_line = next(
+         (
              line[2:] if line.startswith("- ") else line
          "",
+     )
      issue_sentence = f" Current verifier issue: {issue_line}" if issue_line else ""
 +    if force_write:
 +        prompt = (
 +            "[CONTINUE QUALITY REPAIR]\n"
 +            "You just described a content-quality repair, but did not execute it. "
 +            "Recent `patch`/`edit` attempts for this same file failed because their "
 +            "remembered context was stale or malformed. "
 +            f"Emit exactly one `write(file_path=..., content=...)` tool call for `{target_text}` now."
 +            f"{issue_sentence} "
 +            "Write a complete valid HTML document for this file that preserves the chapter topic "
 +            "and satisfies the listed quality issue. Do not call `read`, `edit`, `patch`, "
 +            "`TodoWrite`, or summarize."
 +        )
 +        return InProgressContinuation(prompt=prompt, target=None)
++
      prompt = (
          "[CONTINUE QUALITY REPAIR]\n"
          "You just described a content-quality repair, but did not execute it. "

src/loader/tools/fs_safety.pymodified

                  try:
                      value = ast.literal_eval(value)
                  except (SyntaxError, ValueError):
 -                    return []
 +                    repaired = _load_python_literal_with_balanced_closers(value)
 +                    if repaired is None:
 +                        return []
 +                    value = repaired
      if isinstance(value, StructuredPatchHunk):
          return [value]
          return None
 +def _load_python_literal_with_balanced_closers(value: str) -> object | None:
 +    suffix = _missing_json_closer_suffix(value)
 +    if not suffix:
 +        return None
 +    try:
 +        return ast.literal_eval(value + suffix)
 +    except (SyntaxError, ValueError):
 +        return None
++
++
  def _missing_json_closer_suffix(value: str) -> str:
      stack: list[str] = []
 -    in_string = False
 +    quote_char = ""
      escaped = False
      pairs = {"[": "]", "{": "}"}
      openers = set(pairs)
      closers = {"]": "[", "}": "{"}
      for char in value:
 -        if in_string:
 +        if quote_char:
              if escaped:
                  escaped = False
              elif char == "\\":
                  escaped = True
 -            elif char == '"':
 -                in_string = False
 +            elif char == quote_char:
 +                quote_char = ""
              continue
 -        if char == '"':
 -            in_string = True
 +        if char in {"'", '"'}:
 +            quote_char = char
          elif char in openers:
              stack.append(char)
          elif char in closers:
                  return ""
              stack.pop()
 -    if in_string:
 +    if quote_char:
          return ""
      return "".join(pairs[char] for char in reversed(stack))

tests/test_dod.pymodified

      assert "expected at least 15" in result.stdout
 +def test_html_guide_quality_check_flags_malformed_document_structure(
 +    tmp_path: Path,
 +) -> None:
 +    def rich_doc(title: str) -> str:
 +        body = "".join(
 +            f"<h2>Section {index}</h2><p>{'x' * 180}</p><ul><li>{'y' * 90}</li></ul>"
 +            for index in range(9)
 +        )
 +        return f"<!DOCTYPE html><html><body><h1>{title}</h1>{body}</body></html>\n"
++
 +    guide = tmp_path / "guide"
 +    chapters = guide / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide / "index.html"
 +    first = chapters / "01-introduction.html"
 +    second = chapters / "02-installation.html"
 +    third = chapters / "03-configuration.html"
 +    index_path.write_text(rich_doc("Guide"))
 +    first.write_text(rich_doc("Introduction"))
 +    second.write_text(rich_doc("Installation").rstrip() + "\n</html>\n")
 +    third.write_text(rich_doc("Configuration"))
++
 +    implementation_plan = tmp_path / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{index_path}`",
 +                f"- `{first}`",
 +                f"- `{second}`",
 +                f"- `{third}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done(
 +        "Create an equally thorough multi-page HTML guide with chapter files."
 +    )
 +    dod.implementation_plan = str(implementation_plan)
++
 +    commands = derive_verification_commands(
 +        dod,
 +        project_root=tmp_path,
 +        task_statement=dod.task_statement,
 +        supplement_existing=True,
 +    )
 +    quality_command = next(
 +        command for command in commands if "HTML guide content quality issues:" in command
 +    )
 +    result = subprocess.run(
 +        quality_command,
 +        shell=True,
 +        cwd=tmp_path,
 +        capture_output=True,
 +        text=True,
 +        check=False,
 +    )
++
 +    assert result.returncode == 1
 +    assert "02-installation.html: expected exactly one closing </html> tag" in result.stdout
++
++
  def test_derive_verification_commands_flags_insufficient_pages_for_broad_thorough_guide(
      tmp_path: Path,
  ) -> None:

tests/test_expanded_tools.pymodified

      assert target.read_text() == "alpha\nbeta from literal string\ngamma\n"
 +@pytest.mark.asyncio
 +async def test_patch_tool_accepts_python_literal_hunks_missing_outer_close(
 +    temp_dir: Path,
 +) -> None:
 +    target = temp_dir / "sample.txt"
 +    target.write_text("alpha\nbeta\ngamma\n")
 +    tool = PatchTool(workspace_root=temp_dir)
++
 +    hunk_payload = repr(
 +        [
 +            {
 +                "old_start": 2,
 +                "old_lines": 1,
 +                "new_start": 2,
 +                "new_lines": 1,
 +                "lines": ["-beta", "+beta from repaired literal string"],
 +            }
 +        ]
 +    )[:-1]
 +    result = await tool.execute(
 +        file_path=str(target),
 +        hunks=hunk_payload,
 +    )
++
 +    assert result.is_error is False
 +    assert target.read_text() == "alpha\nbeta from repaired literal string\ngamma\n"
++
++
  @pytest.mark.asyncio
  async def test_patch_tool_rejects_context_mismatch(temp_dir: Path) -> None:
      target = temp_dir / "sample.txt"

tests/test_repair.pymodified

      assert f"`{second_chapter.resolve(strict=False)}`" in decision.retry_message
 +def test_empty_response_retry_forces_write_after_stale_quality_repair_context(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
 +    guide = temp_dir / "guides" / "nginx"
 +    chapters = guide / "chapters"
 +    chapters.mkdir(parents=True)
 +    chapter = chapters / "05-load-balancing.html"
 +    chapter.write_text("<html><body><h1>Load Balancing</h1></body></html>\n")
 +    context.session.append(
 +        Message(
 +            role=Role.USER,
 +            content=(
 +                "Repair focus:\n"
 +                f"- Improve `{chapter}`: thin content "
 +                "(846 text chars, expected at least 1758).\n"
 +                f"- Immediate next step: edit `{chapter}`.\n"
 +            ),
 +        )
 +    )
 +    context.session.append(
 +        Message(
 +            role=Role.TOOL,
 +            content=(
 +                "Observation [edit]: Error: Failed to complete the operation "
 +                f"after 2 attempts for {chapter}. old_string not found in file."
 +            ),
 +        )
 +    )
 +    dod = create_definition_of_done("Create an equally thorough HTML guide.")
 +    dod.touched_files = [str(chapter)]
++
 +    decision = repairer.handle_empty_response(
 +        task="Create an equally thorough HTML guide.",
 +        original_task=None,
 +        empty_retry_count=1,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.should_continue is True
 +    assert decision.retry_message is not None
 +    assert "Use exactly one `write(file_path=..., content=...)`" in decision.retry_message
 +    assert "Do not call `read`, `edit`, `patch`, TodoWrite" in decision.retry_message
++
++
  def test_empty_response_retry_mentions_write_can_create_missing_parent_directories(
      temp_dir: Path,
  ) -> None:

tests/test_tool_batches.pymodified

      assert dod.completed_items == completed_before_todowrite
 +def test_todowrite_quality_repair_nudge_forces_write_after_stale_context(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence should not run for direct nudge test")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for direct nudge test")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    chapter_one = chapters / "05-load-balancing.html"
 +    chapter_one.write_text("<html><body><h1>Load Balancing</h1></body></html>\n")
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[
 +            Message(
 +                role=Role.USER,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Improve `{chapter_one}`: thin content "
 +                    "(846 text chars, expected at least 1758).\n"
 +                    f"- Immediate next step: edit `{chapter_one}`.\n"
 +                ),
 +            ),
 +            Message(
 +                role=Role.TOOL,
 +                content=(
 +                    "Observation [edit]: Error: Failed to complete the operation "
 +                    f"after 2 attempts for {chapter_one}. old_string not found in file."
 +                ),
 +            ),
 +        ],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
++
 +    runner._queue_todowrite_resume_nudge(dod=dod)
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert f"Immediate next step: rewrite `{chapter_one.resolve(strict=False)}`" in message
 +    assert "`write(file_path=..., content=...)`" in message
 +    assert "do not call `read`, `edit`, `patch`, or TodoWrite again first" in message
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verify_handoff(
      temp_dir: Path,

tests/test_turn_completion.pymodified

      assert "Do not rewrite the whole file from memory" in agent.session.messages[-1].content
 +@pytest.mark.asyncio
 +async def test_turn_completion_forces_write_after_stale_quality_repair_context(
 +    temp_dir: Path,
 +) -> None:
 +    backend = ScriptedBackend()
 +    config = non_streaming_config()
 +    config.reasoning.completion_check = False
 +    agent = Agent(
 +        backend=backend,
 +        config=config,
 +        project_root=temp_dir,
 +    )
 +    runtime = ConversationRuntime(agent)
 +    events = []
++
 +    async def capture(event) -> None:
 +        events.append(event)
++
 +    prepared = await runtime.turn_preparation.prepare(
 +        task="Create an equally thorough HTML guide.",
 +        emit=capture,
 +        requested_mode="execute",
 +        original_task=None,
 +        on_user_question=None,
 +    )
 +    await runtime.phase_tracker.enter(
 +        TurnPhase.ASSISTANT,
 +        capture,
 +        detail="Requesting assistant response",
 +        reason_code="request_assistant_response",
 +    )
++
 +    chapter = temp_dir / "guides" / "nginx" / "chapters" / "05-load-balancing.html"
 +    chapter.parent.mkdir(parents=True)
 +    chapter.write_text("<html><body><h1>Load Balancing</h1></body></html>\n")
 +    prepared.definition_of_done.touched_files.append(str(chapter))
 +    prepared.definition_of_done.mutating_actions.append("edit")
 +    agent.session.append(
 +        Message(
 +            role=Role.USER,
 +            content=(
 +                "Repair focus:\n"
 +                f"- Improve `{chapter}`: thin content "
 +                "(846 text chars, expected at least 1758).\n"
 +                f"- Immediate next step: edit `{chapter}`.\n"
 +            ),
 +        )
 +    )
 +    agent.session.append(
 +        Message(
 +            role=Role.TOOL,
 +            content=(
 +                "Observation [edit]: Error: Failed to complete the operation after "
 +                f"2 attempts for {chapter}. old_string not found in file."
 +            ),
 +        )
 +    )
++
 +    content = "I'll rewrite the load balancing chapter with comprehensive content."
 +    decision = await runtime.turn_completion.handle_text_response(
 +        content=content,
 +        response_content=content,
 +        task=prepared.task,
 +        effective_task=prepared.effective_task,
 +        iterations=1,
 +        max_iterations=agent.config.max_iterations,
 +        actions_taken=[],
 +        continuation_count=0,
 +        dod=prepared.definition_of_done,
 +        emit=capture,
 +        summary=prepared.summary,
 +        executor=prepared.executor,
 +        rollback_plan=prepared.rollback_plan,
 +    )
++
 +    assert decision.action == TurnCompletionAction.CONTINUE
 +    message = agent.session.messages[-1].content
 +    assert message.startswith("[CONTINUE QUALITY REPAIR]")
 +    assert "exactly one `write(file_path=..., content=...)`" in message
 +    assert "Do not call `read`, `edit`, `patch`, `TodoWrite`, or summarize." in message
++
++
  @pytest.mark.asyncio
  async def test_turn_completion_continues_queued_quality_repair_after_summary(
      temp_dir: Path,