Recover stale guide edits

Status	File	+	-
M	`src/loader/runtime/finalization.py`	8	2
M	`src/loader/runtime/recovery.py`	44	8
M	`src/loader/runtime/tool_batch_recovery.py`	33	0
M	`src/loader/runtime/tool_batches.py`	63	0
M	`tests/test_finalization.py`	1	0
M	`tests/test_recovery.py`	45	0
M	`tests/test_tool_batch_policies.py`	3	0
M	`tests/test_tool_batches.py`	84	0

src/loader/runtime/finalization.pymodified

          "exceed the expected text-character floor; if it says insufficient "
          "structured content, add enough real sections, lists, code, tables, or "
          "other content blocks to exceed the block floor.",
++        "- Prefer a complete `write` or a `patch`/`edit` anchored to exact current "
++        "on-disk text; do not use remembered minimal `old_string` snippets for "
++        "generated-document expansion.",
          "- Treat structured content as substantive page body material, not table-of-"
          "contents inflation: do not add duplicate navigation entries, relabel links "
          "to existing pages as new chapters, or introduce new missing page links just "
          return (
              "Verification now identifies generated artifact content quality issues. "
              "Do not restart discovery or keep auditing unrelated files. "
--            "Your next response should be one concrete `edit` or `write`-style tool "
++            "Your next response should be one concrete `write`, `patch`, or exact-current "
++            "`edit` tool "
              f"call that expands `{primary_target.artifact_path}` to address: "
              f"{primary_target.issue}. Make a substantial change that clears the stated "
--            "threshold, not a small incremental edit, and do not summarize completion "
++            "threshold, not a small incremental edit. Do not use remembered minimal "
++            "`old_string` snippets; anchor edits to current on-disk text or rewrite the "
++            "file completely. Do not summarize completion "
              f"after only one target is touched.{remaining_hint}"
+         )

src/loader/runtime/recovery.pymodified

      return "\n".join(f"- {hint}" for hint in category_hints)
++def _edit_old_string_mismatch_hints(
++    tool_name: str,
++    args: dict[str, Any] | None,
++    error: str,
++) -> list[str]:
++    """Return specific recovery hints for stale edit replacement strings."""
++
++    if tool_name != "edit":
++        return []
++    if "old_string not found" not in str(error or "").lower():
++        return []
++
++    target = str((args or {}).get("file_path") or (args or {}).get("path") or "").strip()
++    target_suffix = f" for `{target}`" if target else ""
++    return [
++        f"`old_string` is stale or copied from memory{target_suffix}; do not retry the same text.",
++        "If you use `edit`, copy one exact contiguous `old_string` from the current on-disk file.",
++        "If the repair rewrites most of the file, use `write` with the complete replacement content or `patch` anchored to current text instead.",
++    ]
++
++
  RECOVERY_PROMPT = """## TOOL FAILURE - INVESTIGATE AND ADAPT
  The command failed. You MUST analyze the error and take a DIFFERENT action.
      category = categorize_error(error)
      hints = get_recovery_hints(category, tool_name, args)
++    mismatch_hints = _edit_old_string_mismatch_hints(tool_name, args, error)
++    if mismatch_hints:
++        hints = "\n".join(f"- {hint}" for hint in mismatch_hints) + "\n" + hints
      args_str = ", ".join(f"{key}={value!r}" for key, value in args.items())
      return RECOVERY_PROMPT.format(
          ],
+     }
--    specific_suggestions = suggestions.get(
++    last_attempt = context.attempts[-1] if context.attempts else None
--        last_category,
++    if (
--        [
++        last_attempt is not None
--            "Manually check the file/directory structure",
++        and last_attempt.tool_name == "edit"
--            "Review the error messages for clues",
++        and "old_string not found" in str(last_attempt.error or "").lower()
--            "Try a completely different approach",
++    ):
--        ],
++        specific_suggestions = [
--    )
++            "`old_string` was stale; do not retry the same remembered text",
++            "Use exact current on-disk text for `edit`, or switch to `patch`/`write`",
++            "For large generated-file repairs, replace the whole file or anchor a patch to current content",
++        ]
++    else:
++        specific_suggestions = suggestions.get(
++            last_category,
++            [
++                "Manually check the file/directory structure",
++                "Review the error messages for clues",
++                "Try a completely different approach",
++            ],
++        )
      lines.extend(["", "Suggestions:"])
      for suggestion in specific_suggestions:

src/loader/runtime/tool_batch_recovery.pymodified

          target_excerpt_lines = self._target_excerpt_lines(tool_call)
          if target_excerpt_lines:
              lines.extend(["", "## CURRENT TARGET EXCERPT", *target_excerpt_lines])
++        stale_edit_lines = self._stale_edit_recovery_lines(tool_call, outcome)
++        if stale_edit_lines:
++            lines.extend(["", "## STALE EDIT RECOVERY", *stale_edit_lines])
          payload_fix_lines = self._missing_payload_fix_lines(tool_call, outcome)
          if payload_fix_lines:
              lines.extend(["", "## PAYLOAD FORMAT FIX", *payload_fix_lines])
          return "\n".join(lines)
++    def _stale_edit_recovery_lines(
++        self,
++        tool_call: ToolCall,
++        outcome: ToolExecutionOutcome,
++    ) -> list[str]:
++        """Steer old_string misses away from repeated remembered edits."""
++
++        if tool_call.name != "edit":
++            return []
++        if "old_string not found" not in outcome.result_output.lower():
++            return []
++
++        raw_path = str(
++            tool_call.arguments.get("file_path")
++            or tool_call.arguments.get("path")
++            or ""
++        ).strip()
++        target = self._canonicalize_path(raw_path)
++        target_line = (
++            f"- The failed `old_string` is stale for `{target}`; do not retry it from memory."
++            if target
++            else "- The failed `old_string` is stale; do not retry it from memory."
++        )
++        return [
++            target_line,
++            "- If using `edit`, copy an exact contiguous `old_string` from the current target excerpt above.",
++            "- For generated-document expansion or large rewrites, prefer `write` with the complete replacement file or `patch` anchored to current text.",
++            "- Do not reopen unrelated reference materials before applying the next mutation.",
++        ]
++
      def _missing_payload_fix_lines(
          self,
          tool_call: ToolCall,

src/loader/runtime/tool_batches.pymodified

          if changed_path not in allowed_paths:
              return
++        if _repair_context_is_html_quality(repair):
++            next_target = _next_quality_repair_path(
++                repair,
++                changed_path=changed_path,
++            )
++            if next_target:
++                self.context.queue_steering_message(
++                    "The active HTML content-quality repair target was updated. "
++                    "If the current file now comfortably clears its stated threshold, "
++                    f"continue directly with the next listed quality target `{next_target}` "
++                    "using one substantial write/edit/patch anchored to current content. "
++                    "If it still looks thin, expand this same file further now. "
++                    "Do not rerun verification, reopen unrelated references, or summarize "
++                    "completion after only one quality target."
++                )
++                return
++
          if changed_path == str(Path(repair.artifact_path).expanduser().resolve(strict=False)):
              self.context.queue_steering_message(
                  "The active verification repair target was updated. "
      return ""
++def _repair_context_is_html_quality(repair: Any) -> bool:
++    """Return whether the active repair context is for generated HTML quality."""
++
++    return any(
++        _repair_line_is_html_quality(line)
++        for line in getattr(repair, "repair_lines", ()) or ()
++    )
++
++
++def _repair_line_is_html_quality(line: str) -> bool:
++    lowered = str(line or "").lower()
++    return (
++        "thin content" in lowered
++        or "insufficient structured content" in lowered
++        or "content-quality" in lowered
++        or "content quality" in lowered
++    )
++
++
++def _next_quality_repair_path(repair: Any, *, changed_path: str) -> str:
++    """Return the next concrete repair file after a successful quality mutation."""
++
++    try:
++        normalized_changed = str(Path(changed_path).expanduser().resolve(strict=False))
++    except (OSError, RuntimeError, ValueError):
++        normalized_changed = str(Path(changed_path).expanduser())
++
++    normalized_paths: list[str] = []
++    for raw_path in getattr(repair, "allowed_paths", ()) or ():
++        try:
++            normalized = str(Path(raw_path).expanduser().resolve(strict=False))
++        except (OSError, RuntimeError, ValueError):
++            normalized = str(Path(raw_path).expanduser())
++        if normalized and normalized not in normalized_paths:
++            normalized_paths.append(normalized)
++
++    if normalized_changed in normalized_paths:
++        index = normalized_paths.index(normalized_changed)
++        if index + 1 < len(normalized_paths):
++            return normalized_paths[index + 1]
++    for normalized in normalized_paths:
++        if normalized != normalized_changed:
++            return normalized
++    return ""
++
++
  def _tool_call_targets_path(tool_call: ToolCall, target: str) -> bool:
      if not target:
          return False

tests/test_finalization.pymodified


     assert f"Improve `{chapter_paths[0]}`: thin content" in guidance
     assert f"Improve `{chapter_paths[-1]}`: thin content" in guidance
     assert "add enough concrete prose" in guidance
+    assert "exact current on-disk text" in guidance
     assert "not table-of-contents inflation" in guidance
     assert "do not add duplicate navigation entries" in guidance
     assert "do not stop after touching only the first file" in guidance

tests/test_recovery.pymodified

          assert "content_chars" in prompt
          assert "index.html" in prompt
++    def test_format_recovery_prompt_for_old_string_miss_prefers_current_text(self):
++        ctx = RecoveryContext(
++            original_tool="edit",
++            original_args={
++                "file_path": "~/Loader/guides/nginx/chapters/02-installation.html",
++                "old_string": "<h1>Installation</h1>",
++                "new_string": "<h1>Installation</h1><p>Expanded.</p>",
++            },
++        )
++        ctx.add_attempt(
++            "edit",
++            ctx.original_args,
++            "old_string not found in file. Make sure it matches exactly.",
++        )
++
++        prompt = format_recovery_prompt(
++            ctx,
++            "edit",
++            ctx.original_args,
++            "old_string not found in file. Make sure it matches exactly.",
++        )
++
++        assert "`old_string` is stale" in prompt
++        assert "current on-disk file" in prompt
++        assert "write` with the complete replacement content" in prompt
++
  class TestFormatFailureMessage:
      """Tests for failure message formatting."""
          assert "Error 1" in msg
          assert "Error 2" in msg
          assert "Error 3" in msg
++
++    def test_format_failure_message_for_old_string_miss_is_specific(self):
++        ctx = RecoveryContext(
++            original_tool="edit",
++            original_args={
++                "file_path": "guide.html",
++                "old_string": "<h1>Old</h1>",
++                "new_string": "<h1>New</h1>",
++            },
++            max_retries=2,
++        )
++        ctx.add_attempt("edit", ctx.original_args, "old_string not found in file")
++        ctx.add_attempt("edit", ctx.original_args, "old_string not found in file")
++
++        msg = format_failure_message(ctx)
++
++        assert "`old_string` was stale" in msg
++        assert "exact current on-disk text" in msg
++        assert "Try a completely different approach" not in msg

tests/test_tool_batch_policies.pymodified


     assert "6 | ## Status" in follow_up.content
     assert "7 | The runtime is stable." in follow_up.content
     assert "replace the containing block in one edit" in follow_up.content
+    assert "## STALE EDIT RECOVERY" in follow_up.content
+    assert "do not retry it from memory" in follow_up.content
+    assert "complete replacement file" in follow_up.content
 
 
 @pytest.mark.asyncio

tests/test_tool_batches.pymodified

      assert "make one real edit" in queued[0]
++@pytest.mark.asyncio
++async def test_tool_batch_runner_quality_repair_success_hands_to_next_target(
++    temp_dir: Path,
++) -> None:
++    async def assess_confidence(
++        tool_name: str,
++        tool_args: dict,
++        context: str,
++    ) -> ConfidenceAssessment:
++        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
++    async def verify_action(
++        tool_name: str,
++        tool_args: dict,
++        result: str,
++        expected: str = "",
++    ) -> ActionVerification:
++        raise AssertionError("Verification should not run in this scenario")
++
++    chapters = temp_dir / "guide" / "chapters"
++    first = chapters / "01-introduction.html"
++    second = chapters / "02-installation.html"
++    chapters.mkdir(parents=True)
++    first.write_text("<h1>Intro</h1>\n")
++    second.write_text("<h1>Install</h1>\n")
++    context = build_context(
++        temp_dir=temp_dir,
++        messages=[
++            Message(
++                role=Role.ASSISTANT,
++                content=(
++                    "Repair focus:\n"
++                    f"- Improve `{first}`: thin content (400 text chars, expected at least 1758).\n"
++                    f"- Improve `{second}`: insufficient structured content (6 blocks, expected at least 18).\n"
++                    f"- Immediate next step: edit `{first}` with a substantial expansion or replacement.\n"
++                    "- Repair every listed quality target in order before any final answer.\n"
++                ),
++            )
++        ],
++        safeguards=FakeSafeguards(),
++        assess_confidence=assess_confidence,
++        verify_action=verify_action,
++    )
++    queued: list[str] = []
++    context.queue_steering_message_callback = queued.append
++    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
++    dod = create_definition_of_done("Repair generated HTML guide quality.")
++    tool_call = ToolCall(
++        id="write-intro",
++        name="write",
++        arguments={
++            "file_path": str(first),
++            "content": "<h1>Intro</h1><p>Substantial expansion.</p>\n",
++        },
++    )
++
++    await runner.execute_batch(
++        tool_calls=[tool_call],
++        tool_source="assistant",
++        pending_tool_calls_seen=set(),
++        emit=_noop_emit,
++        summary=TurnSummary(final_response=""),
++        dod=dod,
++        executor=FakeExecutor(
++            [
++                tool_outcome(
++                    tool_call=tool_call,
++                    output=f"Successfully wrote {first}",
++                    is_error=False,
++                )
++            ]
++        ),  # type: ignore[arg-type]
++        on_confirmation=None,
++        on_user_question=None,
++        emit_confirmation=None,
++        consecutive_errors=0,
++    )
++
++    assert queued
++    assert any("next listed quality target" in message for message in queued)
++    assert any(str(second.resolve(strict=False)) in message for message in queued)
++    assert any("Do not rerun verification" in message for message in queued)
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_hands_off_after_active_repair_support_file_write(
      temp_dir: Path,

`@@ -489,6 +489,7 @@` def test_verification_repair_guidance_keeps_multi_file_quality_worklist(
489	assert f"Improve `{chapter_paths[0]}`: thin content" in guidance	489	assert f"Improve `{chapter_paths[0]}`: thin content" in guidance
490	assert f"Improve `{chapter_paths[-1]}`: thin content" in guidance	490	assert f"Improve `{chapter_paths[-1]}`: thin content" in guidance
491	assert "add enough concrete prose" in guidance	491	assert "add enough concrete prose" in guidance
		492	+ assert "exact current on-disk text" in guidance
492	assert "not table-of-contents inflation" in guidance	493	assert "not table-of-contents inflation" in guidance
493	assert "do not add duplicate navigation entries" in guidance	494	assert "do not add duplicate navigation entries" in guidance
494	assert "do not stop after touching only the first file" in guidance	495	assert "do not stop after touching only the first file" in guidance

`@@ -576,6 +576,9 @@` async def test_tool_batch_recovery_controller_includes_current_target_excerpt_fo
576	assert "6 \| ## Status" in follow_up.content	576	assert "6 \| ## Status" in follow_up.content
577	assert "7 \| The runtime is stable." in follow_up.content	577	assert "7 \| The runtime is stable." in follow_up.content
578	assert "replace the containing block in one edit" in follow_up.content	578	assert "replace the containing block in one edit" in follow_up.content
		579	+ assert "## STALE EDIT RECOVERY" in follow_up.content
		580	+ assert "do not retry it from memory" in follow_up.content
		581	+ assert "complete replacement file" in follow_up.content
579		582
580		583
581	@pytest.mark.asyncio	584	@pytest.mark.asyncio

tenseleyflow/loader / `7a4495a`

8 changed files