tenseleyflow/loader / 7a4495a

Browse files

Recover stale guide edits

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
7a4495ac004853da076b066fea24c139f2940e15
Parents
bea3236
Tree
6e040e4

8 changed files

StatusFile+-
M src/loader/runtime/finalization.py 8 2
M src/loader/runtime/recovery.py 44 8
M src/loader/runtime/tool_batch_recovery.py 33 0
M src/loader/runtime/tool_batches.py 63 0
M tests/test_finalization.py 1 0
M tests/test_recovery.py 45 0
M tests/test_tool_batch_policies.py 3 0
M tests/test_tool_batches.py 84 0
src/loader/runtime/finalization.pymodified
@@ -1362,6 +1362,9 @@ def _html_quality_repair_strategy_lines() -> list[str]:
13621362
         "exceed the expected text-character floor; if it says insufficient "
13631363
         "structured content, add enough real sections, lists, code, tables, or "
13641364
         "other content blocks to exceed the block floor.",
1365
+        "- Prefer a complete `write` or a `patch`/`edit` anchored to exact current "
1366
+        "on-disk text; do not use remembered minimal `old_string` snippets for "
1367
+        "generated-document expansion.",
13651368
         "- Treat structured content as substantive page body material, not table-of-"
13661369
         "contents inflation: do not add duplicate navigation entries, relabel links "
13671370
         "to existing pages as new chapters, or introduce new missing page links just "
@@ -1451,10 +1454,13 @@ def _build_verification_failure_recovery_nudge(
14511454
         return (
14521455
             "Verification now identifies generated artifact content quality issues. "
14531456
             "Do not restart discovery or keep auditing unrelated files. "
1454
-            "Your next response should be one concrete `edit` or `write`-style tool "
1457
+            "Your next response should be one concrete `write`, `patch`, or exact-current "
1458
+            "`edit` tool "
14551459
             f"call that expands `{primary_target.artifact_path}` to address: "
14561460
             f"{primary_target.issue}. Make a substantial change that clears the stated "
1457
-            "threshold, not a small incremental edit, and do not summarize completion "
1461
+            "threshold, not a small incremental edit. Do not use remembered minimal "
1462
+            "`old_string` snippets; anchor edits to current on-disk text or rewrite the "
1463
+            "file completely. Do not summarize completion "
14581464
             f"after only one target is touched.{remaining_hint}"
14591465
         )
14601466
 
src/loader/runtime/recovery.pymodified
@@ -872,6 +872,27 @@ def get_recovery_hints(
872872
     return "\n".join(f"- {hint}" for hint in category_hints)
873873
 
874874
 
875
+def _edit_old_string_mismatch_hints(
876
+    tool_name: str,
877
+    args: dict[str, Any] | None,
878
+    error: str,
879
+) -> list[str]:
880
+    """Return specific recovery hints for stale edit replacement strings."""
881
+
882
+    if tool_name != "edit":
883
+        return []
884
+    if "old_string not found" not in str(error or "").lower():
885
+        return []
886
+
887
+    target = str((args or {}).get("file_path") or (args or {}).get("path") or "").strip()
888
+    target_suffix = f" for `{target}`" if target else ""
889
+    return [
890
+        f"`old_string` is stale or copied from memory{target_suffix}; do not retry the same text.",
891
+        "If you use `edit`, copy one exact contiguous `old_string` from the current on-disk file.",
892
+        "If the repair rewrites most of the file, use `write` with the complete replacement content or `patch` anchored to current text instead.",
893
+    ]
894
+
895
+
875896
 RECOVERY_PROMPT = """## TOOL FAILURE - INVESTIGATE AND ADAPT
876897
 
877898
 The command failed. You MUST analyze the error and take a DIFFERENT action.
@@ -911,6 +932,9 @@ def format_recovery_prompt(
911932
 
912933
     category = categorize_error(error)
913934
     hints = get_recovery_hints(category, tool_name, args)
935
+    mismatch_hints = _edit_old_string_mismatch_hints(tool_name, args, error)
936
+    if mismatch_hints:
937
+        hints = "\n".join(f"- {hint}" for hint in mismatch_hints) + "\n" + hints
914938
     args_str = ", ".join(f"{key}={value!r}" for key, value in args.items())
915939
 
916940
     return RECOVERY_PROMPT.format(
@@ -1004,14 +1028,26 @@ def format_failure_message(context: RecoveryContext) -> str:
10041028
         ],
10051029
     }
10061030
 
1007
-    specific_suggestions = suggestions.get(
1008
-        last_category,
1009
-        [
1010
-            "Manually check the file/directory structure",
1011
-            "Review the error messages for clues",
1012
-            "Try a completely different approach",
1013
-        ],
1014
-    )
1031
+    last_attempt = context.attempts[-1] if context.attempts else None
1032
+    if (
1033
+        last_attempt is not None
1034
+        and last_attempt.tool_name == "edit"
1035
+        and "old_string not found" in str(last_attempt.error or "").lower()
1036
+    ):
1037
+        specific_suggestions = [
1038
+            "`old_string` was stale; do not retry the same remembered text",
1039
+            "Use exact current on-disk text for `edit`, or switch to `patch`/`write`",
1040
+            "For large generated-file repairs, replace the whole file or anchor a patch to current content",
1041
+        ]
1042
+    else:
1043
+        specific_suggestions = suggestions.get(
1044
+            last_category,
1045
+            [
1046
+                "Manually check the file/directory structure",
1047
+                "Review the error messages for clues",
1048
+                "Try a completely different approach",
1049
+            ],
1050
+        )
10151051
 
10161052
     lines.extend(["", "Suggestions:"])
10171053
     for suggestion in specific_suggestions:
src/loader/runtime/tool_batch_recovery.pymodified
@@ -238,11 +238,44 @@ class ToolBatchRecoveryController:
238238
         target_excerpt_lines = self._target_excerpt_lines(tool_call)
239239
         if target_excerpt_lines:
240240
             lines.extend(["", "## CURRENT TARGET EXCERPT", *target_excerpt_lines])
241
+        stale_edit_lines = self._stale_edit_recovery_lines(tool_call, outcome)
242
+        if stale_edit_lines:
243
+            lines.extend(["", "## STALE EDIT RECOVERY", *stale_edit_lines])
241244
         payload_fix_lines = self._missing_payload_fix_lines(tool_call, outcome)
242245
         if payload_fix_lines:
243246
             lines.extend(["", "## PAYLOAD FORMAT FIX", *payload_fix_lines])
244247
         return "\n".join(lines)
245248
 
249
+    def _stale_edit_recovery_lines(
250
+        self,
251
+        tool_call: ToolCall,
252
+        outcome: ToolExecutionOutcome,
253
+    ) -> list[str]:
254
+        """Steer old_string misses away from repeated remembered edits."""
255
+
256
+        if tool_call.name != "edit":
257
+            return []
258
+        if "old_string not found" not in outcome.result_output.lower():
259
+            return []
260
+
261
+        raw_path = str(
262
+            tool_call.arguments.get("file_path")
263
+            or tool_call.arguments.get("path")
264
+            or ""
265
+        ).strip()
266
+        target = self._canonicalize_path(raw_path)
267
+        target_line = (
268
+            f"- The failed `old_string` is stale for `{target}`; do not retry it from memory."
269
+            if target
270
+            else "- The failed `old_string` is stale; do not retry it from memory."
271
+        )
272
+        return [
273
+            target_line,
274
+            "- If using `edit`, copy an exact contiguous `old_string` from the current target excerpt above.",
275
+            "- For generated-document expansion or large rewrites, prefer `write` with the complete replacement file or `patch` anchored to current text.",
276
+            "- Do not reopen unrelated reference materials before applying the next mutation.",
277
+        ]
278
+
246279
     def _missing_payload_fix_lines(
247280
         self,
248281
         tool_call: ToolCall,
src/loader/runtime/tool_batches.pymodified
@@ -1000,6 +1000,23 @@ class ToolBatchRunner:
10001000
         if changed_path not in allowed_paths:
10011001
             return
10021002
 
1003
+        if _repair_context_is_html_quality(repair):
1004
+            next_target = _next_quality_repair_path(
1005
+                repair,
1006
+                changed_path=changed_path,
1007
+            )
1008
+            if next_target:
1009
+                self.context.queue_steering_message(
1010
+                    "The active HTML content-quality repair target was updated. "
1011
+                    "If the current file now comfortably clears its stated threshold, "
1012
+                    f"continue directly with the next listed quality target `{next_target}` "
1013
+                    "using one substantial write/edit/patch anchored to current content. "
1014
+                    "If it still looks thin, expand this same file further now. "
1015
+                    "Do not rerun verification, reopen unrelated references, or summarize "
1016
+                    "completion after only one quality target."
1017
+                )
1018
+                return
1019
+
10031020
         if changed_path == str(Path(repair.artifact_path).expanduser().resolve(strict=False)):
10041021
             self.context.queue_steering_message(
10051022
                 "The active verification repair target was updated. "
@@ -3245,6 +3262,52 @@ def _recent_edit_string_mismatch_target(recovery_context: RecoveryContext | None
32453262
     return ""
32463263
 
32473264
 
3265
+def _repair_context_is_html_quality(repair: Any) -> bool:
3266
+    """Return whether the active repair context is for generated HTML quality."""
3267
+
3268
+    return any(
3269
+        _repair_line_is_html_quality(line)
3270
+        for line in getattr(repair, "repair_lines", ()) or ()
3271
+    )
3272
+
3273
+
3274
+def _repair_line_is_html_quality(line: str) -> bool:
3275
+    lowered = str(line or "").lower()
3276
+    return (
3277
+        "thin content" in lowered
3278
+        or "insufficient structured content" in lowered
3279
+        or "content-quality" in lowered
3280
+        or "content quality" in lowered
3281
+    )
3282
+
3283
+
3284
+def _next_quality_repair_path(repair: Any, *, changed_path: str) -> str:
3285
+    """Return the next concrete repair file after a successful quality mutation."""
3286
+
3287
+    try:
3288
+        normalized_changed = str(Path(changed_path).expanduser().resolve(strict=False))
3289
+    except (OSError, RuntimeError, ValueError):
3290
+        normalized_changed = str(Path(changed_path).expanduser())
3291
+
3292
+    normalized_paths: list[str] = []
3293
+    for raw_path in getattr(repair, "allowed_paths", ()) or ():
3294
+        try:
3295
+            normalized = str(Path(raw_path).expanduser().resolve(strict=False))
3296
+        except (OSError, RuntimeError, ValueError):
3297
+            normalized = str(Path(raw_path).expanduser())
3298
+        if normalized and normalized not in normalized_paths:
3299
+            normalized_paths.append(normalized)
3300
+
3301
+    if normalized_changed in normalized_paths:
3302
+        index = normalized_paths.index(normalized_changed)
3303
+        if index + 1 < len(normalized_paths):
3304
+            return normalized_paths[index + 1]
3305
+    for normalized in normalized_paths:
3306
+        if normalized != normalized_changed:
3307
+            return normalized
3308
+    return ""
3309
+
3310
+
32483311
 def _tool_call_targets_path(tool_call: ToolCall, target: str) -> bool:
32493312
     if not target:
32503313
         return False
tests/test_finalization.pymodified
@@ -489,6 +489,7 @@ def test_verification_repair_guidance_keeps_multi_file_quality_worklist(
489489
     assert f"Improve `{chapter_paths[0]}`: thin content" in guidance
490490
     assert f"Improve `{chapter_paths[-1]}`: thin content" in guidance
491491
     assert "add enough concrete prose" in guidance
492
+    assert "exact current on-disk text" in guidance
492493
     assert "not table-of-contents inflation" in guidance
493494
     assert "do not add duplicate navigation entries" in guidance
494495
     assert "do not stop after touching only the first file" in guidance
tests/test_recovery.pymodified
@@ -224,6 +224,32 @@ class TestFormatRecoveryPrompt:
224224
         assert "content_chars" in prompt
225225
         assert "index.html" in prompt
226226
 
227
+    def test_format_recovery_prompt_for_old_string_miss_prefers_current_text(self):
228
+        ctx = RecoveryContext(
229
+            original_tool="edit",
230
+            original_args={
231
+                "file_path": "~/Loader/guides/nginx/chapters/02-installation.html",
232
+                "old_string": "<h1>Installation</h1>",
233
+                "new_string": "<h1>Installation</h1><p>Expanded.</p>",
234
+            },
235
+        )
236
+        ctx.add_attempt(
237
+            "edit",
238
+            ctx.original_args,
239
+            "old_string not found in file. Make sure it matches exactly.",
240
+        )
241
+
242
+        prompt = format_recovery_prompt(
243
+            ctx,
244
+            "edit",
245
+            ctx.original_args,
246
+            "old_string not found in file. Make sure it matches exactly.",
247
+        )
248
+
249
+        assert "`old_string` is stale" in prompt
250
+        assert "current on-disk file" in prompt
251
+        assert "write` with the complete replacement content" in prompt
252
+
227253
 
228254
 class TestFormatFailureMessage:
229255
     """Tests for failure message formatting."""
@@ -244,3 +270,22 @@ class TestFormatFailureMessage:
244270
         assert "Error 1" in msg
245271
         assert "Error 2" in msg
246272
         assert "Error 3" in msg
273
+
274
+    def test_format_failure_message_for_old_string_miss_is_specific(self):
275
+        ctx = RecoveryContext(
276
+            original_tool="edit",
277
+            original_args={
278
+                "file_path": "guide.html",
279
+                "old_string": "<h1>Old</h1>",
280
+                "new_string": "<h1>New</h1>",
281
+            },
282
+            max_retries=2,
283
+        )
284
+        ctx.add_attempt("edit", ctx.original_args, "old_string not found in file")
285
+        ctx.add_attempt("edit", ctx.original_args, "old_string not found in file")
286
+
287
+        msg = format_failure_message(ctx)
288
+
289
+        assert "`old_string` was stale" in msg
290
+        assert "exact current on-disk text" in msg
291
+        assert "Try a completely different approach" not in msg
tests/test_tool_batch_policies.pymodified
@@ -576,6 +576,9 @@ async def test_tool_batch_recovery_controller_includes_current_target_excerpt_fo
576576
     assert "6 | ## Status" in follow_up.content
577577
     assert "7 | The runtime is stable." in follow_up.content
578578
     assert "replace the containing block in one edit" in follow_up.content
579
+    assert "## STALE EDIT RECOVERY" in follow_up.content
580
+    assert "do not retry it from memory" in follow_up.content
581
+    assert "complete replacement file" in follow_up.content
579582
 
580583
 
581584
 @pytest.mark.asyncio
tests/test_tool_batches.pymodified
@@ -6993,6 +6993,90 @@ def test_tool_batch_runner_duplicate_repair_mutation_restates_verifier_deltas(
69936993
     assert "make one real edit" in queued[0]
69946994
 
69956995
 
6996
+@pytest.mark.asyncio
6997
+async def test_tool_batch_runner_quality_repair_success_hands_to_next_target(
6998
+    temp_dir: Path,
6999
+) -> None:
7000
+    async def assess_confidence(
7001
+        tool_name: str,
7002
+        tool_args: dict,
7003
+        context: str,
7004
+    ) -> ConfidenceAssessment:
7005
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
7006
+
7007
+    async def verify_action(
7008
+        tool_name: str,
7009
+        tool_args: dict,
7010
+        result: str,
7011
+        expected: str = "",
7012
+    ) -> ActionVerification:
7013
+        raise AssertionError("Verification should not run in this scenario")
7014
+
7015
+    chapters = temp_dir / "guide" / "chapters"
7016
+    first = chapters / "01-introduction.html"
7017
+    second = chapters / "02-installation.html"
7018
+    chapters.mkdir(parents=True)
7019
+    first.write_text("<h1>Intro</h1>\n")
7020
+    second.write_text("<h1>Install</h1>\n")
7021
+    context = build_context(
7022
+        temp_dir=temp_dir,
7023
+        messages=[
7024
+            Message(
7025
+                role=Role.ASSISTANT,
7026
+                content=(
7027
+                    "Repair focus:\n"
7028
+                    f"- Improve `{first}`: thin content (400 text chars, expected at least 1758).\n"
7029
+                    f"- Improve `{second}`: insufficient structured content (6 blocks, expected at least 18).\n"
7030
+                    f"- Immediate next step: edit `{first}` with a substantial expansion or replacement.\n"
7031
+                    "- Repair every listed quality target in order before any final answer.\n"
7032
+                ),
7033
+            )
7034
+        ],
7035
+        safeguards=FakeSafeguards(),
7036
+        assess_confidence=assess_confidence,
7037
+        verify_action=verify_action,
7038
+    )
7039
+    queued: list[str] = []
7040
+    context.queue_steering_message_callback = queued.append
7041
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
7042
+    dod = create_definition_of_done("Repair generated HTML guide quality.")
7043
+    tool_call = ToolCall(
7044
+        id="write-intro",
7045
+        name="write",
7046
+        arguments={
7047
+            "file_path": str(first),
7048
+            "content": "<h1>Intro</h1><p>Substantial expansion.</p>\n",
7049
+        },
7050
+    )
7051
+
7052
+    await runner.execute_batch(
7053
+        tool_calls=[tool_call],
7054
+        tool_source="assistant",
7055
+        pending_tool_calls_seen=set(),
7056
+        emit=_noop_emit,
7057
+        summary=TurnSummary(final_response=""),
7058
+        dod=dod,
7059
+        executor=FakeExecutor(
7060
+            [
7061
+                tool_outcome(
7062
+                    tool_call=tool_call,
7063
+                    output=f"Successfully wrote {first}",
7064
+                    is_error=False,
7065
+                )
7066
+            ]
7067
+        ),  # type: ignore[arg-type]
7068
+        on_confirmation=None,
7069
+        on_user_question=None,
7070
+        emit_confirmation=None,
7071
+        consecutive_errors=0,
7072
+    )
7073
+
7074
+    assert queued
7075
+    assert any("next listed quality target" in message for message in queued)
7076
+    assert any(str(second.resolve(strict=False)) in message for message in queued)
7077
+    assert any("Do not rerun verification" in message for message in queued)
7078
+
7079
+
69967080
 @pytest.mark.asyncio
69977081
 async def test_tool_batch_runner_hands_off_after_active_repair_support_file_write(
69987082
     temp_dir: Path,