`7a4495a`

Recover stale guide edits

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 week ago

SHA: 7a4495ac004853da076b066fea24c139f2940e15
Parents: bea3236
Tree: 6e040e4

8 changed files

Status	File	+	-
M	`src/loader/runtime/finalization.py`	8	2
M	`src/loader/runtime/recovery.py`	44	8
M	`src/loader/runtime/tool_batch_recovery.py`	33	0
M	`src/loader/runtime/tool_batches.py`	63	0
M	`tests/test_finalization.py`	1	0
M	`tests/test_recovery.py`	45	0
M	`tests/test_tool_batch_policies.py`	3	0
M	`tests/test_tool_batches.py`	84	0

src/loader/runtime/finalization.pymodified

          "exceed the expected text-character floor; if it says insufficient "
          "structured content, add enough real sections, lists, code, tables, or "
          "other content blocks to exceed the block floor.",
 +        "- Prefer a complete `write` or a `patch`/`edit` anchored to exact current "
 +        "on-disk text; do not use remembered minimal `old_string` snippets for "
 +        "generated-document expansion.",
          "- Treat structured content as substantive page body material, not table-of-"
          "contents inflation: do not add duplicate navigation entries, relabel links "
          "to existing pages as new chapters, or introduce new missing page links just "
          return (
              "Verification now identifies generated artifact content quality issues. "
              "Do not restart discovery or keep auditing unrelated files. "
 -            "Your next response should be one concrete `edit` or `write`-style tool "
 +            "Your next response should be one concrete `write`, `patch`, or exact-current "
 +            "`edit` tool "
              f"call that expands `{primary_target.artifact_path}` to address: "
              f"{primary_target.issue}. Make a substantial change that clears the stated "
 -            "threshold, not a small incremental edit, and do not summarize completion "
 +            "threshold, not a small incremental edit. Do not use remembered minimal "
 +            "`old_string` snippets; anchor edits to current on-disk text or rewrite the "
 +            "file completely. Do not summarize completion "
              f"after only one target is touched.{remaining_hint}"
+         )

src/loader/runtime/recovery.pymodified

      return "\n".join(f"- {hint}" for hint in category_hints)
 +def _edit_old_string_mismatch_hints(
 +    tool_name: str,
 +    args: dict[str, Any] | None,
 +    error: str,
 +) -> list[str]:
 +    """Return specific recovery hints for stale edit replacement strings."""
++
 +    if tool_name != "edit":
 +        return []
 +    if "old_string not found" not in str(error or "").lower():
 +        return []
++
 +    target = str((args or {}).get("file_path") or (args or {}).get("path") or "").strip()
 +    target_suffix = f" for `{target}`" if target else ""
 +    return [
 +        f"`old_string` is stale or copied from memory{target_suffix}; do not retry the same text.",
 +        "If you use `edit`, copy one exact contiguous `old_string` from the current on-disk file.",
 +        "If the repair rewrites most of the file, use `write` with the complete replacement content or `patch` anchored to current text instead.",
 +    ]
++
++
  RECOVERY_PROMPT = """## TOOL FAILURE - INVESTIGATE AND ADAPT
  The command failed. You MUST analyze the error and take a DIFFERENT action.
      category = categorize_error(error)
      hints = get_recovery_hints(category, tool_name, args)
 +    mismatch_hints = _edit_old_string_mismatch_hints(tool_name, args, error)
 +    if mismatch_hints:
 +        hints = "\n".join(f"- {hint}" for hint in mismatch_hints) + "\n" + hints
      args_str = ", ".join(f"{key}={value!r}" for key, value in args.items())
      return RECOVERY_PROMPT.format(
          ],
+     }
 -    specific_suggestions = suggestions.get(
 -        last_category,
 -        [
 -            "Manually check the file/directory structure",
 -            "Review the error messages for clues",
 -            "Try a completely different approach",
 -        ],
 -    )
 +    last_attempt = context.attempts[-1] if context.attempts else None
 +    if (
 +        last_attempt is not None
 +        and last_attempt.tool_name == "edit"
 +        and "old_string not found" in str(last_attempt.error or "").lower()
 +    ):
 +        specific_suggestions = [
 +            "`old_string` was stale; do not retry the same remembered text",
 +            "Use exact current on-disk text for `edit`, or switch to `patch`/`write`",
 +            "For large generated-file repairs, replace the whole file or anchor a patch to current content",
 +        ]
 +    else:
 +        specific_suggestions = suggestions.get(
 +            last_category,
 +            [
 +                "Manually check the file/directory structure",
 +                "Review the error messages for clues",
 +                "Try a completely different approach",
 +            ],
 +        )
      lines.extend(["", "Suggestions:"])
      for suggestion in specific_suggestions:

src/loader/runtime/tool_batch_recovery.pymodified

          target_excerpt_lines = self._target_excerpt_lines(tool_call)
          if target_excerpt_lines:
              lines.extend(["", "## CURRENT TARGET EXCERPT", *target_excerpt_lines])
 +        stale_edit_lines = self._stale_edit_recovery_lines(tool_call, outcome)
 +        if stale_edit_lines:
 +            lines.extend(["", "## STALE EDIT RECOVERY", *stale_edit_lines])
          payload_fix_lines = self._missing_payload_fix_lines(tool_call, outcome)
          if payload_fix_lines:
              lines.extend(["", "## PAYLOAD FORMAT FIX", *payload_fix_lines])
          return "\n".join(lines)
 +    def _stale_edit_recovery_lines(
 +        self,
 +        tool_call: ToolCall,
 +        outcome: ToolExecutionOutcome,
 +    ) -> list[str]:
 +        """Steer old_string misses away from repeated remembered edits."""
++
 +        if tool_call.name != "edit":
 +            return []
 +        if "old_string not found" not in outcome.result_output.lower():
 +            return []
++
 +        raw_path = str(
 +            tool_call.arguments.get("file_path")
 +            or tool_call.arguments.get("path")
 +            or ""
 +        ).strip()
 +        target = self._canonicalize_path(raw_path)
 +        target_line = (
 +            f"- The failed `old_string` is stale for `{target}`; do not retry it from memory."
 +            if target
 +            else "- The failed `old_string` is stale; do not retry it from memory."
 +        )
 +        return [
 +            target_line,
 +            "- If using `edit`, copy an exact contiguous `old_string` from the current target excerpt above.",
 +            "- For generated-document expansion or large rewrites, prefer `write` with the complete replacement file or `patch` anchored to current text.",
 +            "- Do not reopen unrelated reference materials before applying the next mutation.",
 +        ]
++
      def _missing_payload_fix_lines(
          self,
          tool_call: ToolCall,

src/loader/runtime/tool_batches.pymodified

          if changed_path not in allowed_paths:
              return
 +        if _repair_context_is_html_quality(repair):
 +            next_target = _next_quality_repair_path(
 +                repair,
 +                changed_path=changed_path,
 +            )
 +            if next_target:
 +                self.context.queue_steering_message(
 +                    "The active HTML content-quality repair target was updated. "
 +                    "If the current file now comfortably clears its stated threshold, "
 +                    f"continue directly with the next listed quality target `{next_target}` "
 +                    "using one substantial write/edit/patch anchored to current content. "
 +                    "If it still looks thin, expand this same file further now. "
 +                    "Do not rerun verification, reopen unrelated references, or summarize "
 +                    "completion after only one quality target."
 +                )
 +                return
++
          if changed_path == str(Path(repair.artifact_path).expanduser().resolve(strict=False)):
              self.context.queue_steering_message(
                  "The active verification repair target was updated. "
      return ""
 +def _repair_context_is_html_quality(repair: Any) -> bool:
 +    """Return whether the active repair context is for generated HTML quality."""
++
 +    return any(
 +        _repair_line_is_html_quality(line)
 +        for line in getattr(repair, "repair_lines", ()) or ()
 +    )
++
++
 +def _repair_line_is_html_quality(line: str) -> bool:
 +    lowered = str(line or "").lower()
 +    return (
 +        "thin content" in lowered
 +        or "insufficient structured content" in lowered
 +        or "content-quality" in lowered
 +        or "content quality" in lowered
 +    )
++
++
 +def _next_quality_repair_path(repair: Any, *, changed_path: str) -> str:
 +    """Return the next concrete repair file after a successful quality mutation."""
++
 +    try:
 +        normalized_changed = str(Path(changed_path).expanduser().resolve(strict=False))
 +    except (OSError, RuntimeError, ValueError):
 +        normalized_changed = str(Path(changed_path).expanduser())
++
 +    normalized_paths: list[str] = []
 +    for raw_path in getattr(repair, "allowed_paths", ()) or ():
 +        try:
 +            normalized = str(Path(raw_path).expanduser().resolve(strict=False))
 +        except (OSError, RuntimeError, ValueError):
 +            normalized = str(Path(raw_path).expanduser())
 +        if normalized and normalized not in normalized_paths:
 +            normalized_paths.append(normalized)
++
 +    if normalized_changed in normalized_paths:
 +        index = normalized_paths.index(normalized_changed)
 +        if index + 1 < len(normalized_paths):
 +            return normalized_paths[index + 1]
 +    for normalized in normalized_paths:
 +        if normalized != normalized_changed:
 +            return normalized
 +    return ""
++
++
  def _tool_call_targets_path(tool_call: ToolCall, target: str) -> bool:
      if not target:
          return False

tests/test_finalization.pymodified

      assert f"Improve `{chapter_paths[0]}`: thin content" in guidance
      assert f"Improve `{chapter_paths[-1]}`: thin content" in guidance
      assert "add enough concrete prose" in guidance
 +    assert "exact current on-disk text" in guidance
      assert "not table-of-contents inflation" in guidance
      assert "do not add duplicate navigation entries" in guidance
      assert "do not stop after touching only the first file" in guidance

tests/test_recovery.pymodified

          assert "content_chars" in prompt
          assert "index.html" in prompt
 +    def test_format_recovery_prompt_for_old_string_miss_prefers_current_text(self):
 +        ctx = RecoveryContext(
 +            original_tool="edit",
 +            original_args={
 +                "file_path": "~/Loader/guides/nginx/chapters/02-installation.html",
 +                "old_string": "<h1>Installation</h1>",
 +                "new_string": "<h1>Installation</h1><p>Expanded.</p>",
 +            },
 +        )
 +        ctx.add_attempt(
 +            "edit",
 +            ctx.original_args,
 +            "old_string not found in file. Make sure it matches exactly.",
 +        )
++
 +        prompt = format_recovery_prompt(
 +            ctx,
 +            "edit",
 +            ctx.original_args,
 +            "old_string not found in file. Make sure it matches exactly.",
 +        )
++
 +        assert "`old_string` is stale" in prompt
 +        assert "current on-disk file" in prompt
 +        assert "write` with the complete replacement content" in prompt
++
  class TestFormatFailureMessage:
      """Tests for failure message formatting."""
          assert "Error 1" in msg
          assert "Error 2" in msg
          assert "Error 3" in msg
++
 +    def test_format_failure_message_for_old_string_miss_is_specific(self):
 +        ctx = RecoveryContext(
 +            original_tool="edit",
 +            original_args={
 +                "file_path": "guide.html",
 +                "old_string": "<h1>Old</h1>",
 +                "new_string": "<h1>New</h1>",
 +            },
 +            max_retries=2,
 +        )
 +        ctx.add_attempt("edit", ctx.original_args, "old_string not found in file")
 +        ctx.add_attempt("edit", ctx.original_args, "old_string not found in file")
++
 +        msg = format_failure_message(ctx)
++
 +        assert "`old_string` was stale" in msg
 +        assert "exact current on-disk text" in msg
 +        assert "Try a completely different approach" not in msg

tests/test_tool_batch_policies.pymodified

      assert "6 | ## Status" in follow_up.content
      assert "7 | The runtime is stable." in follow_up.content
      assert "replace the containing block in one edit" in follow_up.content
 +    assert "## STALE EDIT RECOVERY" in follow_up.content
 +    assert "do not retry it from memory" in follow_up.content
 +    assert "complete replacement file" in follow_up.content
  @pytest.mark.asyncio

tests/test_tool_batches.pymodified

      assert "make one real edit" in queued[0]
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_quality_repair_success_hands_to_next_target(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    chapters = temp_dir / "guide" / "chapters"
 +    first = chapters / "01-introduction.html"
 +    second = chapters / "02-installation.html"
 +    chapters.mkdir(parents=True)
 +    first.write_text("<h1>Intro</h1>\n")
 +    second.write_text("<h1>Install</h1>\n")
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Improve `{first}`: thin content (400 text chars, expected at least 1758).\n"
 +                    f"- Improve `{second}`: insufficient structured content (6 blocks, expected at least 18).\n"
 +                    f"- Immediate next step: edit `{first}` with a substantial expansion or replacement.\n"
 +                    "- Repair every listed quality target in order before any final answer.\n"
 +                ),
 +            )
 +        ],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    queued: list[str] = []
 +    context.queue_steering_message_callback = queued.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Repair generated HTML guide quality.")
 +    tool_call = ToolCall(
 +        id="write-intro",
 +        name="write",
 +        arguments={
 +            "file_path": str(first),
 +            "content": "<h1>Intro</h1><p>Substantial expansion.</p>\n",
 +        },
 +    )
++
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=TurnSummary(final_response=""),
 +        dod=dod,
 +        executor=FakeExecutor(
 +            [
 +                tool_outcome(
 +                    tool_call=tool_call,
 +                    output=f"Successfully wrote {first}",
 +                    is_error=False,
 +                )
 +            ]
 +        ),  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued
 +    assert any("next listed quality target" in message for message in queued)
 +    assert any(str(second.resolve(strict=False)) in message for message in queued)
 +    assert any("Do not rerun verification" in message for message in queued)
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_hands_off_after_active_repair_support_file_write(
      temp_dir: Path,