tenseleyflow/loader / 7a4495a

Browse files

Recover stale guide edits

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
7a4495ac004853da076b066fea24c139f2940e15
Parents
bea3236
Tree
6e040e4

8 changed files

StatusFile+-
M src/loader/runtime/finalization.py 8 2
M src/loader/runtime/recovery.py 44 8
M src/loader/runtime/tool_batch_recovery.py 33 0
M src/loader/runtime/tool_batches.py 63 0
M tests/test_finalization.py 1 0
M tests/test_recovery.py 45 0
M tests/test_tool_batch_policies.py 3 0
M tests/test_tool_batches.py 84 0
src/loader/runtime/finalization.pymodified
@@ -1362,6 +1362,9 @@ def _html_quality_repair_strategy_lines() -> list[str]:
1362
         "exceed the expected text-character floor; if it says insufficient "
1362
         "exceed the expected text-character floor; if it says insufficient "
1363
         "structured content, add enough real sections, lists, code, tables, or "
1363
         "structured content, add enough real sections, lists, code, tables, or "
1364
         "other content blocks to exceed the block floor.",
1364
         "other content blocks to exceed the block floor.",
1365
+        "- Prefer a complete `write` or a `patch`/`edit` anchored to exact current "
1366
+        "on-disk text; do not use remembered minimal `old_string` snippets for "
1367
+        "generated-document expansion.",
1365
         "- Treat structured content as substantive page body material, not table-of-"
1368
         "- Treat structured content as substantive page body material, not table-of-"
1366
         "contents inflation: do not add duplicate navigation entries, relabel links "
1369
         "contents inflation: do not add duplicate navigation entries, relabel links "
1367
         "to existing pages as new chapters, or introduce new missing page links just "
1370
         "to existing pages as new chapters, or introduce new missing page links just "
@@ -1451,10 +1454,13 @@ def _build_verification_failure_recovery_nudge(
1451
         return (
1454
         return (
1452
             "Verification now identifies generated artifact content quality issues. "
1455
             "Verification now identifies generated artifact content quality issues. "
1453
             "Do not restart discovery or keep auditing unrelated files. "
1456
             "Do not restart discovery or keep auditing unrelated files. "
1454
-            "Your next response should be one concrete `edit` or `write`-style tool "
1457
+            "Your next response should be one concrete `write`, `patch`, or exact-current "
1458
+            "`edit` tool "
1455
             f"call that expands `{primary_target.artifact_path}` to address: "
1459
             f"call that expands `{primary_target.artifact_path}` to address: "
1456
             f"{primary_target.issue}. Make a substantial change that clears the stated "
1460
             f"{primary_target.issue}. Make a substantial change that clears the stated "
1457
-            "threshold, not a small incremental edit, and do not summarize completion "
1461
+            "threshold, not a small incremental edit. Do not use remembered minimal "
1462
+            "`old_string` snippets; anchor edits to current on-disk text or rewrite the "
1463
+            "file completely. Do not summarize completion "
1458
             f"after only one target is touched.{remaining_hint}"
1464
             f"after only one target is touched.{remaining_hint}"
1459
         )
1465
         )
1460
 
1466
 
src/loader/runtime/recovery.pymodified
@@ -872,6 +872,27 @@ def get_recovery_hints(
872
     return "\n".join(f"- {hint}" for hint in category_hints)
872
     return "\n".join(f"- {hint}" for hint in category_hints)
873
 
873
 
874
 
874
 
875
+def _edit_old_string_mismatch_hints(
876
+    tool_name: str,
877
+    args: dict[str, Any] | None,
878
+    error: str,
879
+) -> list[str]:
880
+    """Return specific recovery hints for stale edit replacement strings."""
881
+
882
+    if tool_name != "edit":
883
+        return []
884
+    if "old_string not found" not in str(error or "").lower():
885
+        return []
886
+
887
+    target = str((args or {}).get("file_path") or (args or {}).get("path") or "").strip()
888
+    target_suffix = f" for `{target}`" if target else ""
889
+    return [
890
+        f"`old_string` is stale or copied from memory{target_suffix}; do not retry the same text.",
891
+        "If you use `edit`, copy one exact contiguous `old_string` from the current on-disk file.",
892
+        "If the repair rewrites most of the file, use `write` with the complete replacement content or `patch` anchored to current text instead.",
893
+    ]
894
+
895
+
875
 RECOVERY_PROMPT = """## TOOL FAILURE - INVESTIGATE AND ADAPT
896
 RECOVERY_PROMPT = """## TOOL FAILURE - INVESTIGATE AND ADAPT
876
 
897
 
877
 The command failed. You MUST analyze the error and take a DIFFERENT action.
898
 The command failed. You MUST analyze the error and take a DIFFERENT action.
@@ -911,6 +932,9 @@ def format_recovery_prompt(
911
 
932
 
912
     category = categorize_error(error)
933
     category = categorize_error(error)
913
     hints = get_recovery_hints(category, tool_name, args)
934
     hints = get_recovery_hints(category, tool_name, args)
935
+    mismatch_hints = _edit_old_string_mismatch_hints(tool_name, args, error)
936
+    if mismatch_hints:
937
+        hints = "\n".join(f"- {hint}" for hint in mismatch_hints) + "\n" + hints
914
     args_str = ", ".join(f"{key}={value!r}" for key, value in args.items())
938
     args_str = ", ".join(f"{key}={value!r}" for key, value in args.items())
915
 
939
 
916
     return RECOVERY_PROMPT.format(
940
     return RECOVERY_PROMPT.format(
@@ -1004,14 +1028,26 @@ def format_failure_message(context: RecoveryContext) -> str:
1004
         ],
1028
         ],
1005
     }
1029
     }
1006
 
1030
 
1007
-    specific_suggestions = suggestions.get(
1031
+    last_attempt = context.attempts[-1] if context.attempts else None
1008
-        last_category,
1032
+    if (
1009
-        [
1033
+        last_attempt is not None
1010
-            "Manually check the file/directory structure",
1034
+        and last_attempt.tool_name == "edit"
1011
-            "Review the error messages for clues",
1035
+        and "old_string not found" in str(last_attempt.error or "").lower()
1012
-            "Try a completely different approach",
1036
+    ):
1013
-        ],
1037
+        specific_suggestions = [
1014
-    )
1038
+            "`old_string` was stale; do not retry the same remembered text",
1039
+            "Use exact current on-disk text for `edit`, or switch to `patch`/`write`",
1040
+            "For large generated-file repairs, replace the whole file or anchor a patch to current content",
1041
+        ]
1042
+    else:
1043
+        specific_suggestions = suggestions.get(
1044
+            last_category,
1045
+            [
1046
+                "Manually check the file/directory structure",
1047
+                "Review the error messages for clues",
1048
+                "Try a completely different approach",
1049
+            ],
1050
+        )
1015
 
1051
 
1016
     lines.extend(["", "Suggestions:"])
1052
     lines.extend(["", "Suggestions:"])
1017
     for suggestion in specific_suggestions:
1053
     for suggestion in specific_suggestions:
src/loader/runtime/tool_batch_recovery.pymodified
@@ -238,11 +238,44 @@ class ToolBatchRecoveryController:
238
         target_excerpt_lines = self._target_excerpt_lines(tool_call)
238
         target_excerpt_lines = self._target_excerpt_lines(tool_call)
239
         if target_excerpt_lines:
239
         if target_excerpt_lines:
240
             lines.extend(["", "## CURRENT TARGET EXCERPT", *target_excerpt_lines])
240
             lines.extend(["", "## CURRENT TARGET EXCERPT", *target_excerpt_lines])
241
+        stale_edit_lines = self._stale_edit_recovery_lines(tool_call, outcome)
242
+        if stale_edit_lines:
243
+            lines.extend(["", "## STALE EDIT RECOVERY", *stale_edit_lines])
241
         payload_fix_lines = self._missing_payload_fix_lines(tool_call, outcome)
244
         payload_fix_lines = self._missing_payload_fix_lines(tool_call, outcome)
242
         if payload_fix_lines:
245
         if payload_fix_lines:
243
             lines.extend(["", "## PAYLOAD FORMAT FIX", *payload_fix_lines])
246
             lines.extend(["", "## PAYLOAD FORMAT FIX", *payload_fix_lines])
244
         return "\n".join(lines)
247
         return "\n".join(lines)
245
 
248
 
249
+    def _stale_edit_recovery_lines(
250
+        self,
251
+        tool_call: ToolCall,
252
+        outcome: ToolExecutionOutcome,
253
+    ) -> list[str]:
254
+        """Steer old_string misses away from repeated remembered edits."""
255
+
256
+        if tool_call.name != "edit":
257
+            return []
258
+        if "old_string not found" not in outcome.result_output.lower():
259
+            return []
260
+
261
+        raw_path = str(
262
+            tool_call.arguments.get("file_path")
263
+            or tool_call.arguments.get("path")
264
+            or ""
265
+        ).strip()
266
+        target = self._canonicalize_path(raw_path)
267
+        target_line = (
268
+            f"- The failed `old_string` is stale for `{target}`; do not retry it from memory."
269
+            if target
270
+            else "- The failed `old_string` is stale; do not retry it from memory."
271
+        )
272
+        return [
273
+            target_line,
274
+            "- If using `edit`, copy an exact contiguous `old_string` from the current target excerpt above.",
275
+            "- For generated-document expansion or large rewrites, prefer `write` with the complete replacement file or `patch` anchored to current text.",
276
+            "- Do not reopen unrelated reference materials before applying the next mutation.",
277
+        ]
278
+
246
     def _missing_payload_fix_lines(
279
     def _missing_payload_fix_lines(
247
         self,
280
         self,
248
         tool_call: ToolCall,
281
         tool_call: ToolCall,
src/loader/runtime/tool_batches.pymodified
@@ -1000,6 +1000,23 @@ class ToolBatchRunner:
1000
         if changed_path not in allowed_paths:
1000
         if changed_path not in allowed_paths:
1001
             return
1001
             return
1002
 
1002
 
1003
+        if _repair_context_is_html_quality(repair):
1004
+            next_target = _next_quality_repair_path(
1005
+                repair,
1006
+                changed_path=changed_path,
1007
+            )
1008
+            if next_target:
1009
+                self.context.queue_steering_message(
1010
+                    "The active HTML content-quality repair target was updated. "
1011
+                    "If the current file now comfortably clears its stated threshold, "
1012
+                    f"continue directly with the next listed quality target `{next_target}` "
1013
+                    "using one substantial write/edit/patch anchored to current content. "
1014
+                    "If it still looks thin, expand this same file further now. "
1015
+                    "Do not rerun verification, reopen unrelated references, or summarize "
1016
+                    "completion after only one quality target."
1017
+                )
1018
+                return
1019
+
1003
         if changed_path == str(Path(repair.artifact_path).expanduser().resolve(strict=False)):
1020
         if changed_path == str(Path(repair.artifact_path).expanduser().resolve(strict=False)):
1004
             self.context.queue_steering_message(
1021
             self.context.queue_steering_message(
1005
                 "The active verification repair target was updated. "
1022
                 "The active verification repair target was updated. "
@@ -3245,6 +3262,52 @@ def _recent_edit_string_mismatch_target(recovery_context: RecoveryContext | None
3245
     return ""
3262
     return ""
3246
 
3263
 
3247
 
3264
 
3265
+def _repair_context_is_html_quality(repair: Any) -> bool:
3266
+    """Return whether the active repair context is for generated HTML quality."""
3267
+
3268
+    return any(
3269
+        _repair_line_is_html_quality(line)
3270
+        for line in getattr(repair, "repair_lines", ()) or ()
3271
+    )
3272
+
3273
+
3274
+def _repair_line_is_html_quality(line: str) -> bool:
3275
+    lowered = str(line or "").lower()
3276
+    return (
3277
+        "thin content" in lowered
3278
+        or "insufficient structured content" in lowered
3279
+        or "content-quality" in lowered
3280
+        or "content quality" in lowered
3281
+    )
3282
+
3283
+
3284
+def _next_quality_repair_path(repair: Any, *, changed_path: str) -> str:
3285
+    """Return the next concrete repair file after a successful quality mutation."""
3286
+
3287
+    try:
3288
+        normalized_changed = str(Path(changed_path).expanduser().resolve(strict=False))
3289
+    except (OSError, RuntimeError, ValueError):
3290
+        normalized_changed = str(Path(changed_path).expanduser())
3291
+
3292
+    normalized_paths: list[str] = []
3293
+    for raw_path in getattr(repair, "allowed_paths", ()) or ():
3294
+        try:
3295
+            normalized = str(Path(raw_path).expanduser().resolve(strict=False))
3296
+        except (OSError, RuntimeError, ValueError):
3297
+            normalized = str(Path(raw_path).expanduser())
3298
+        if normalized and normalized not in normalized_paths:
3299
+            normalized_paths.append(normalized)
3300
+
3301
+    if normalized_changed in normalized_paths:
3302
+        index = normalized_paths.index(normalized_changed)
3303
+        if index + 1 < len(normalized_paths):
3304
+            return normalized_paths[index + 1]
3305
+    for normalized in normalized_paths:
3306
+        if normalized != normalized_changed:
3307
+            return normalized
3308
+    return ""
3309
+
3310
+
3248
 def _tool_call_targets_path(tool_call: ToolCall, target: str) -> bool:
3311
 def _tool_call_targets_path(tool_call: ToolCall, target: str) -> bool:
3249
     if not target:
3312
     if not target:
3250
         return False
3313
         return False
tests/test_finalization.pymodified
@@ -489,6 +489,7 @@ def test_verification_repair_guidance_keeps_multi_file_quality_worklist(
489
     assert f"Improve `{chapter_paths[0]}`: thin content" in guidance
489
     assert f"Improve `{chapter_paths[0]}`: thin content" in guidance
490
     assert f"Improve `{chapter_paths[-1]}`: thin content" in guidance
490
     assert f"Improve `{chapter_paths[-1]}`: thin content" in guidance
491
     assert "add enough concrete prose" in guidance
491
     assert "add enough concrete prose" in guidance
492
+    assert "exact current on-disk text" in guidance
492
     assert "not table-of-contents inflation" in guidance
493
     assert "not table-of-contents inflation" in guidance
493
     assert "do not add duplicate navigation entries" in guidance
494
     assert "do not add duplicate navigation entries" in guidance
494
     assert "do not stop after touching only the first file" in guidance
495
     assert "do not stop after touching only the first file" in guidance
tests/test_recovery.pymodified
@@ -224,6 +224,32 @@ class TestFormatRecoveryPrompt:
224
         assert "content_chars" in prompt
224
         assert "content_chars" in prompt
225
         assert "index.html" in prompt
225
         assert "index.html" in prompt
226
 
226
 
227
+    def test_format_recovery_prompt_for_old_string_miss_prefers_current_text(self):
228
+        ctx = RecoveryContext(
229
+            original_tool="edit",
230
+            original_args={
231
+                "file_path": "~/Loader/guides/nginx/chapters/02-installation.html",
232
+                "old_string": "<h1>Installation</h1>",
233
+                "new_string": "<h1>Installation</h1><p>Expanded.</p>",
234
+            },
235
+        )
236
+        ctx.add_attempt(
237
+            "edit",
238
+            ctx.original_args,
239
+            "old_string not found in file. Make sure it matches exactly.",
240
+        )
241
+
242
+        prompt = format_recovery_prompt(
243
+            ctx,
244
+            "edit",
245
+            ctx.original_args,
246
+            "old_string not found in file. Make sure it matches exactly.",
247
+        )
248
+
249
+        assert "`old_string` is stale" in prompt
250
+        assert "current on-disk file" in prompt
251
+        assert "write` with the complete replacement content" in prompt
252
+
227
 
253
 
228
 class TestFormatFailureMessage:
254
 class TestFormatFailureMessage:
229
     """Tests for failure message formatting."""
255
     """Tests for failure message formatting."""
@@ -244,3 +270,22 @@ class TestFormatFailureMessage:
244
         assert "Error 1" in msg
270
         assert "Error 1" in msg
245
         assert "Error 2" in msg
271
         assert "Error 2" in msg
246
         assert "Error 3" in msg
272
         assert "Error 3" in msg
273
+
274
+    def test_format_failure_message_for_old_string_miss_is_specific(self):
275
+        ctx = RecoveryContext(
276
+            original_tool="edit",
277
+            original_args={
278
+                "file_path": "guide.html",
279
+                "old_string": "<h1>Old</h1>",
280
+                "new_string": "<h1>New</h1>",
281
+            },
282
+            max_retries=2,
283
+        )
284
+        ctx.add_attempt("edit", ctx.original_args, "old_string not found in file")
285
+        ctx.add_attempt("edit", ctx.original_args, "old_string not found in file")
286
+
287
+        msg = format_failure_message(ctx)
288
+
289
+        assert "`old_string` was stale" in msg
290
+        assert "exact current on-disk text" in msg
291
+        assert "Try a completely different approach" not in msg
tests/test_tool_batch_policies.pymodified
@@ -576,6 +576,9 @@ async def test_tool_batch_recovery_controller_includes_current_target_excerpt_fo
576
     assert "6 | ## Status" in follow_up.content
576
     assert "6 | ## Status" in follow_up.content
577
     assert "7 | The runtime is stable." in follow_up.content
577
     assert "7 | The runtime is stable." in follow_up.content
578
     assert "replace the containing block in one edit" in follow_up.content
578
     assert "replace the containing block in one edit" in follow_up.content
579
+    assert "## STALE EDIT RECOVERY" in follow_up.content
580
+    assert "do not retry it from memory" in follow_up.content
581
+    assert "complete replacement file" in follow_up.content
579
 
582
 
580
 
583
 
581
 @pytest.mark.asyncio
584
 @pytest.mark.asyncio
tests/test_tool_batches.pymodified
@@ -6993,6 +6993,90 @@ def test_tool_batch_runner_duplicate_repair_mutation_restates_verifier_deltas(
6993
     assert "make one real edit" in queued[0]
6993
     assert "make one real edit" in queued[0]
6994
 
6994
 
6995
 
6995
 
6996
+@pytest.mark.asyncio
6997
+async def test_tool_batch_runner_quality_repair_success_hands_to_next_target(
6998
+    temp_dir: Path,
6999
+) -> None:
7000
+    async def assess_confidence(
7001
+        tool_name: str,
7002
+        tool_args: dict,
7003
+        context: str,
7004
+    ) -> ConfidenceAssessment:
7005
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
7006
+
7007
+    async def verify_action(
7008
+        tool_name: str,
7009
+        tool_args: dict,
7010
+        result: str,
7011
+        expected: str = "",
7012
+    ) -> ActionVerification:
7013
+        raise AssertionError("Verification should not run in this scenario")
7014
+
7015
+    chapters = temp_dir / "guide" / "chapters"
7016
+    first = chapters / "01-introduction.html"
7017
+    second = chapters / "02-installation.html"
7018
+    chapters.mkdir(parents=True)
7019
+    first.write_text("<h1>Intro</h1>\n")
7020
+    second.write_text("<h1>Install</h1>\n")
7021
+    context = build_context(
7022
+        temp_dir=temp_dir,
7023
+        messages=[
7024
+            Message(
7025
+                role=Role.ASSISTANT,
7026
+                content=(
7027
+                    "Repair focus:\n"
7028
+                    f"- Improve `{first}`: thin content (400 text chars, expected at least 1758).\n"
7029
+                    f"- Improve `{second}`: insufficient structured content (6 blocks, expected at least 18).\n"
7030
+                    f"- Immediate next step: edit `{first}` with a substantial expansion or replacement.\n"
7031
+                    "- Repair every listed quality target in order before any final answer.\n"
7032
+                ),
7033
+            )
7034
+        ],
7035
+        safeguards=FakeSafeguards(),
7036
+        assess_confidence=assess_confidence,
7037
+        verify_action=verify_action,
7038
+    )
7039
+    queued: list[str] = []
7040
+    context.queue_steering_message_callback = queued.append
7041
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
7042
+    dod = create_definition_of_done("Repair generated HTML guide quality.")
7043
+    tool_call = ToolCall(
7044
+        id="write-intro",
7045
+        name="write",
7046
+        arguments={
7047
+            "file_path": str(first),
7048
+            "content": "<h1>Intro</h1><p>Substantial expansion.</p>\n",
7049
+        },
7050
+    )
7051
+
7052
+    await runner.execute_batch(
7053
+        tool_calls=[tool_call],
7054
+        tool_source="assistant",
7055
+        pending_tool_calls_seen=set(),
7056
+        emit=_noop_emit,
7057
+        summary=TurnSummary(final_response=""),
7058
+        dod=dod,
7059
+        executor=FakeExecutor(
7060
+            [
7061
+                tool_outcome(
7062
+                    tool_call=tool_call,
7063
+                    output=f"Successfully wrote {first}",
7064
+                    is_error=False,
7065
+                )
7066
+            ]
7067
+        ),  # type: ignore[arg-type]
7068
+        on_confirmation=None,
7069
+        on_user_question=None,
7070
+        emit_confirmation=None,
7071
+        consecutive_errors=0,
7072
+    )
7073
+
7074
+    assert queued
7075
+    assert any("next listed quality target" in message for message in queued)
7076
+    assert any(str(second.resolve(strict=False)) in message for message in queued)
7077
+    assert any("Do not rerun verification" in message for message in queued)
7078
+
7079
+
6996
 @pytest.mark.asyncio
7080
 @pytest.mark.asyncio
6997
 async def test_tool_batch_runner_hands_off_after_active_repair_support_file_write(
7081
 async def test_tool_batch_runner_hands_off_after_active_repair_support_file_write(
6998
     temp_dir: Path,
7082
     temp_dir: Path,