tenseleyflow/loader / c10dbd1

Browse files

Refine repair loop steering

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
c10dbd184e4fba56433bf8f81f7aca5dd0217ea0
Parents
b210a18
Tree
15f7763

4 changed files

StatusFile+-
M src/loader/runtime/hooks.py 4 5
M src/loader/runtime/tool_batches.py 57 0
M tests/test_permissions.py 9 18
M tests/test_tool_batches.py 104 0
src/loader/runtime/hooks.pymodified
@@ -997,6 +997,8 @@ class LateReferenceDriftHook(BaseToolHook):
997997
     async def pre_tool_use(self, context: HookContext) -> HookResult:
998998
         if context.tool_call.name not in _OBSERVATION_TOOLS:
999999
             return HookResult()
1000
+        if context.source == "verification":
1001
+            return HookResult()
10001002
 
10011003
         completed_scope = self._completed_artifact_scope()
10021004
         if completed_scope is not None:
@@ -1039,9 +1041,6 @@ class LateReferenceDriftHook(BaseToolHook):
10391041
                 terminal_state="blocked",
10401042
             )
10411043
 
1042
-        if context.source == "verification":
1043
-            return HookResult()
1044
-
10451044
         late_stage = self._late_stage_missing_artifact()
10461045
         if late_stage is None:
10471046
             return HookResult()
@@ -1142,6 +1141,8 @@ class LateReferenceDriftHook(BaseToolHook):
11421141
         return False
11431142
 
11441143
     async def post_tool_use(self, context: HookContext) -> HookResult:
1144
+        if context.source == "verification":
1145
+            return HookResult()
11451146
         if _tool_call_is_effective_mutation(context.tool_call):
11461147
             self._reset_completed_scope_state()
11471148
             return HookResult()
@@ -1150,8 +1151,6 @@ class LateReferenceDriftHook(BaseToolHook):
11501151
 
11511152
         completed_scope = self._completed_artifact_scope()
11521153
         if completed_scope is None:
1153
-            if context.source == "verification":
1154
-                return HookResult()
11551154
             self._reset_completed_scope_state()
11561155
             return HookResult()
11571156
 
src/loader/runtime/tool_batches.pymodified
@@ -529,6 +529,20 @@ class ToolBatchRunner:
529529
             self.context.session.messages,
530530
             max_items=2,
531531
         )
532
+        edit_mismatch_target = _recent_edit_string_mismatch_target(
533
+            self.context.recovery_context,
534
+        )
535
+        if edit_mismatch_target and _tool_call_targets_path(tool_call, edit_mismatch_target):
536
+            self.context.queue_steering_message(
537
+                "Reuse the earlier observation instead of repeating it. "
538
+                f"The last edit on `{edit_mismatch_target}` failed because `old_string` "
539
+                "did not exactly match the current file. Use the already-read contents "
540
+                "as the source of truth and send one concrete mutation now: `edit` with "
541
+                "an exact `old_string` copied from that file, `patch`, or `write` with "
542
+                "the complete replacement content if the change rewrites most of the file. "
543
+                "Do not read the same file again first."
544
+            )
545
+            return
532546
         if _should_prioritize_missing_artifact(
533547
             dod=dod,
534548
             next_pending=next_pending,
@@ -3107,6 +3121,49 @@ def _is_recoverable_guidance_block(event_content: str) -> bool:
31073121
     )
31083122
 
31093123
 
3124
+def _recent_edit_string_mismatch_target(recovery_context: RecoveryContext | None) -> str:
3125
+    """Return the active edit target when recovery is from an old_string miss."""
3126
+
3127
+    if recovery_context is None:
3128
+        return ""
3129
+    for attempt in reversed(recovery_context.attempts):
3130
+        if attempt.tool_name != "edit":
3131
+            continue
3132
+        if "old_string not found" not in str(attempt.error or "").lower():
3133
+            continue
3134
+        target = str(
3135
+            attempt.arguments.get("file_path")
3136
+            or attempt.arguments.get("path")
3137
+            or ""
3138
+        ).strip()
3139
+        if target:
3140
+            return target
3141
+    return ""
3142
+
3143
+
3144
+def _tool_call_targets_path(tool_call: ToolCall, target: str) -> bool:
3145
+    if not target:
3146
+        return False
3147
+    candidate = str(
3148
+        tool_call.arguments.get("file_path")
3149
+        or tool_call.arguments.get("path")
3150
+        or ""
3151
+    ).strip()
3152
+    if not candidate and tool_call.name == "bash":
3153
+        paths = _extract_bash_paths(
3154
+            str(tool_call.arguments.get("command") or "").strip(),
3155
+        )
3156
+        candidate = paths[0] if paths else ""
3157
+    if not candidate:
3158
+        return False
3159
+    try:
3160
+        return Path(candidate).expanduser().resolve(strict=False) == Path(
3161
+            target
3162
+        ).expanduser().resolve(strict=False)
3163
+    except (OSError, RuntimeError, ValueError):
3164
+        return candidate == target
3165
+
3166
+
31103167
 def _active_repair_focus_preview(repair_lines: list[str], *, max_lines: int = 4) -> str:
31113168
     """Compact repair-focus bullets for steering after no-op mutations."""
31123169
 
tests/test_permissions.pymodified
@@ -1770,7 +1770,7 @@ async def test_late_reference_drift_hook_blocks_reference_reads_when_outputs_exi
17701770
 
17711771
 
17721772
 @pytest.mark.asyncio
1773
-async def test_late_reference_drift_hook_blocks_verification_reference_reads_after_artifacts_exist(
1773
+async def test_late_reference_drift_hook_allows_verification_reference_reads_after_artifacts_exist(
17741774
     temp_dir: Path,
17751775
 ) -> None:
17761776
     registry = create_default_registry(temp_dir)
@@ -1826,10 +1826,7 @@ async def test_late_reference_drift_hook_blocks_verification_reference_reads_aft
18261826
         )
18271827
     )
18281828
 
1829
-    assert result.decision == HookDecision.DENY
1830
-    assert result.terminal_state == "blocked"
1831
-    assert result.message is not None
1832
-    assert "completed artifact set scope" in result.message
1829
+    assert result.decision == HookDecision.CONTINUE
18331830
 
18341831
 
18351832
 @pytest.mark.asyncio
@@ -1902,7 +1899,7 @@ async def test_late_reference_drift_hook_blocks_excessive_post_build_self_audits
19021899
 
19031900
 
19041901
 @pytest.mark.asyncio
1905
-async def test_late_reference_drift_hook_blocks_excessive_post_build_self_audits_during_verification(
1902
+async def test_late_reference_drift_hook_allows_post_build_self_audits_during_verification(
19061903
     temp_dir: Path,
19071904
 ) -> None:
19081905
     registry = create_default_registry(temp_dir)
@@ -1962,12 +1959,9 @@ async def test_late_reference_drift_hook_blocks_excessive_post_build_self_audits
19621959
         assert result.decision == HookDecision.CONTINUE
19631960
         await hook.post_tool_use(context)
19641961
 
1965
-    blocked = await hook.pre_tool_use(make_context(5))
1962
+    result = await hook.pre_tool_use(make_context(5))
19661963
 
1967
-    assert blocked.decision == HookDecision.DENY
1968
-    assert blocked.terminal_state == "blocked"
1969
-    assert blocked.message is not None
1970
-    assert "post-build audit loop" in blocked.message
1964
+    assert result.decision == HookDecision.CONTINUE
19711965
 
19721966
 
19731967
 @pytest.mark.asyncio
@@ -2025,7 +2019,7 @@ async def test_late_reference_drift_hook_blocks_relative_bash_reference_reads_af
20252019
             tool=registry.get("bash"),
20262020
             registry=registry,
20272021
             permission_policy=policy,
2028
-            source="verification",
2022
+            source="native",
20292023
         )
20302024
     )
20312025
 
@@ -2036,7 +2030,7 @@ async def test_late_reference_drift_hook_blocks_relative_bash_reference_reads_af
20362030
 
20372031
 
20382032
 @pytest.mark.asyncio
2039
-async def test_late_reference_drift_hook_blocks_relative_bash_post_build_audit_loop(
2033
+async def test_late_reference_drift_hook_allows_relative_bash_post_build_audit_loop_during_verification(
20402034
     temp_dir: Path,
20412035
 ) -> None:
20422036
     registry = create_default_registry(temp_dir)
@@ -2097,12 +2091,9 @@ async def test_late_reference_drift_hook_blocks_relative_bash_post_build_audit_l
20972091
         assert result.decision == HookDecision.CONTINUE
20982092
         await hook.post_tool_use(context)
20992093
 
2100
-    blocked = await hook.pre_tool_use(make_context(5))
2094
+    result = await hook.pre_tool_use(make_context(5))
21012095
 
2102
-    assert blocked.decision == HookDecision.DENY
2103
-    assert blocked.terminal_state == "blocked"
2104
-    assert blocked.message is not None
2105
-    assert "post-build audit loop" in blocked.message
2096
+    assert result.decision == HookDecision.CONTINUE
21062097
 
21072098
 
21082099
 @pytest.mark.asyncio
tests/test_tool_batches.pymodified
@@ -826,6 +826,110 @@ async def test_tool_batch_runner_duplicate_read_keeps_root_declared_missing_html
826826
     assert ephemeral_messages == []
827827
 
828828
 
829
+@pytest.mark.asyncio
830
+async def test_tool_batch_runner_duplicate_read_after_edit_mismatch_steers_to_mutation(
831
+    temp_dir: Path,
832
+) -> None:
833
+    async def assess_confidence(
834
+        tool_name: str,
835
+        tool_args: dict,
836
+        context: str,
837
+    ) -> ConfidenceAssessment:
838
+        raise AssertionError("Confidence scoring should not run for this scenario")
839
+
840
+    async def verify_action(
841
+        tool_name: str,
842
+        tool_args: dict,
843
+        result: str,
844
+        expected: str = "",
845
+    ) -> ActionVerification:
846
+        raise AssertionError("Verification should not run for this scenario")
847
+
848
+    target = temp_dir / "guide" / "chapters" / "02-installation.html"
849
+    target.parent.mkdir(parents=True)
850
+    target.write_text(
851
+        "<h1>Chapter 2: Installation Guide</h1>\n"
852
+        "<p>This chapter is still too thin.</p>\n"
853
+    )
854
+    recovery_context = RecoveryContext(
855
+        original_tool="edit",
856
+        original_args={
857
+            "file_path": str(target),
858
+            "old_string": "<h1>Installation</h1>",
859
+            "new_string": "<h1>Installation</h1><p>Expanded.</p>",
860
+        },
861
+        max_retries=2,
862
+    )
863
+    recovery_context.add_attempt(
864
+        "edit",
865
+        {
866
+            "file_path": str(target),
867
+            "old_string": "<h1>Installation</h1>",
868
+            "new_string": "<h1>Installation</h1><p>Expanded.</p>",
869
+        },
870
+        "old_string not found in file. Make sure it matches exactly.",
871
+    )
872
+    context = build_context(
873
+        temp_dir=temp_dir,
874
+        messages=[],
875
+        safeguards=FakeSafeguards(),
876
+        assess_confidence=assess_confidence,
877
+        verify_action=verify_action,
878
+        recovery_context=recovery_context,
879
+        auto_recover=False,
880
+    )
881
+    persistent_messages: list[str] = []
882
+    context.queue_steering_message_callback = persistent_messages.append
883
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
884
+    tool_call = ToolCall(
885
+        id="read-dup-after-edit-miss",
886
+        name="read",
887
+        arguments={"file_path": str(target)},
888
+    )
889
+    duplicate_message = (
890
+        "[Skipped - duplicate action: Already read "
891
+        f"{target} recently without any intervening changes; "
892
+        "reuse the earlier read result instead of rereading]"
893
+    )
894
+    executor = FakeExecutor(
895
+        [
896
+            ToolExecutionOutcome(
897
+                tool_call=tool_call,
898
+                state=ToolExecutionState.DUPLICATE,
899
+                message=Message.tool_result_message(
900
+                    tool_call_id=tool_call.id,
901
+                    display_content=duplicate_message,
902
+                    result_content=duplicate_message,
903
+                ),
904
+                event_content=duplicate_message,
905
+                is_error=False,
906
+                result_output=duplicate_message,
907
+            )
908
+        ]
909
+    )
910
+    dod = create_definition_of_done("Expand thin generated guide chapters.")
911
+
912
+    await runner.execute_batch(
913
+        tool_calls=[tool_call],
914
+        tool_source="assistant",
915
+        pending_tool_calls_seen=set(),
916
+        emit=_noop_emit,
917
+        summary=TurnSummary(final_response=""),
918
+        dod=dod,
919
+        executor=executor,  # type: ignore[arg-type]
920
+        on_confirmation=None,
921
+        on_user_question=None,
922
+        emit_confirmation=None,
923
+        consecutive_errors=0,
924
+    )
925
+
926
+    assert len(persistent_messages) == 1
927
+    assert "last edit" in persistent_messages[0]
928
+    assert "`old_string` did not exactly match" in persistent_messages[0]
929
+    assert "send one concrete mutation now" in persistent_messages[0]
930
+    assert "`write` with the complete replacement content" in persistent_messages[0]
931
+
932
+
829933
 @pytest.mark.asyncio
830934
 async def test_tool_batch_runner_todo_write_does_not_regress_completed_file_todo(
831935
     temp_dir: Path,