tenseleyflow/loader / 67643a7

Browse files

Prefer verification after no-op edits

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
67643a7c9d445bdbf26aa0bfbfadfb2e1e314fc2
Parents
a6df5b8
Tree
62b1d96

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 32 2
M tests/test_tool_batches.py 87 0
src/loader/runtime/tool_batches.pymodified
@@ -341,7 +341,11 @@ class ToolBatchRunner:
341341
                     dod=dod,
342342
                 )
343343
                 self._queue_blocked_shell_rewrite_nudge(tool_call)
344
-                self._queue_blocked_html_edit_nudge(tool_call, outcome.event_content)
344
+                self._queue_blocked_html_edit_nudge(
345
+                    tool_call,
346
+                    outcome.event_content,
347
+                    dod=dod,
348
+                )
345349
             else:
346350
                 self._queue_post_mutation_self_audit_nudge(tool_call, dod=dod)
347351
                 self._queue_completed_artifact_observation_handoff_nudge(
@@ -818,7 +822,13 @@ class ToolBatchRunner:
818822
             "Do not reopen earlier reference materials."
819823
         )
820824
 
821
-    def _queue_blocked_html_edit_nudge(self, tool_call: ToolCall, event_content: str) -> None:
825
+    def _queue_blocked_html_edit_nudge(
826
+        self,
827
+        tool_call: ToolCall,
828
+        event_content: str,
829
+        *,
830
+        dod: DefinitionOfDone,
831
+    ) -> None:
822832
         """Keep blocked edit feedback generic; avoid task-class-specific steering."""
823833
 
824834
         if tool_call.name != "edit":
@@ -836,6 +846,26 @@ class ToolBatchRunner:
836846
         if not target:
837847
             return
838848
 
849
+        verification_commands = dod.verification_commands or derive_verification_commands(
850
+            dod,
851
+            project_root=self.context.project_root,
852
+            task_statement=getattr(self.context.session, "current_task", "") or "",
853
+            supplement_existing=True,
854
+        )
855
+        if all_planned_artifacts_exist(dod, project_root=self.context.project_root):
856
+            verification_suffix = (
857
+                " Move to verification or final confirmation using the files already on disk."
858
+                if verification_commands
859
+                else " If no concrete mismatch remains, stop editing and finish from the files already on disk."
860
+            )
861
+            self.context.queue_steering_message(
862
+                "That edit would make no on-disk change. "
863
+                f"`{target}` already matches the change you attempted. "
864
+                "All explicitly planned artifacts already exist."
865
+                + verification_suffix
866
+            )
867
+            return
868
+
839869
         self.context.queue_steering_message(
840870
             "That edit would make no on-disk change. "
841871
             f"Stay on `{target}` and use the current file contents as the source of truth. "
tests/test_tool_batches.pymodified
@@ -5034,6 +5034,7 @@ def test_tool_batch_runner_blocked_noop_edit_nudge_stays_on_active_repair_target
50345034
     queued: list[str] = []
50355035
     context.queue_steering_message_callback = queued.append
50365036
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
5037
+    dod = create_definition_of_done("Repair a guide page.")
50375038
 
50385039
     runner._queue_blocked_html_edit_nudge(
50395040
         ToolCall(
@@ -5046,6 +5047,7 @@ def test_tool_batch_runner_blocked_noop_edit_nudge_stays_on_active_repair_target
50465047
             },
50475048
         ),
50485049
         "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
5050
+        dod=dod,
50495051
     )
50505052
 
50515053
     assert queued
@@ -5055,6 +5057,91 @@ def test_tool_batch_runner_blocked_noop_edit_nudge_stays_on_active_repair_target
50555057
     assert "Do not reopen unrelated reference materials" in queued[0]
50565058
 
50575059
 
5060
+def test_tool_batch_runner_blocked_noop_edit_after_full_build_prefers_verification(
5061
+    temp_dir: Path,
5062
+) -> None:
5063
+    async def assess_confidence(
5064
+        tool_name: str,
5065
+        tool_args: dict,
5066
+        context: str,
5067
+    ) -> ConfidenceAssessment:
5068
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
5069
+
5070
+    async def verify_action(
5071
+        tool_name: str,
5072
+        tool_args: dict,
5073
+        result: str,
5074
+        expected: str = "",
5075
+    ) -> ActionVerification:
5076
+        raise AssertionError("Verification should not run in this scenario")
5077
+
5078
+    guide_root = temp_dir / "guide"
5079
+    chapters = guide_root / "chapters"
5080
+    chapters.mkdir(parents=True)
5081
+    index_path = guide_root / "index.html"
5082
+    chapter_one = chapters / "01-introduction.html"
5083
+    index_path.write_text("<html></html>\n")
5084
+    chapter_one.write_text("<html></html>\n")
5085
+
5086
+    implementation_plan = temp_dir / "implementation.md"
5087
+    implementation_plan.write_text(
5088
+        "\n".join(
5089
+            [
5090
+                "# Implementation Plan",
5091
+                "",
5092
+                "## File Changes",
5093
+                f"- `{index_path}`",
5094
+                f"- `{chapter_one}`",
5095
+                "",
5096
+            ]
5097
+        )
5098
+    )
5099
+
5100
+    context = build_context(
5101
+        temp_dir=temp_dir,
5102
+        messages=[
5103
+            Message(
5104
+                role=Role.ASSISTANT,
5105
+                content=(
5106
+                    "Repair focus:\n"
5107
+                    f"- Confirm the final guide state in `{index_path}`.\n"
5108
+                    f"- Immediate next step: verify `{index_path}` if no concrete mismatch remains.\n"
5109
+                ),
5110
+            )
5111
+        ],
5112
+        safeguards=FakeSafeguards(),
5113
+        assess_confidence=assess_confidence,
5114
+        verify_action=verify_action,
5115
+    )
5116
+    queued: list[str] = []
5117
+    context.queue_steering_message_callback = queued.append
5118
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
5119
+
5120
+    dod = create_definition_of_done("Create a multi-file guide.")
5121
+    dod.implementation_plan = str(implementation_plan)
5122
+    dod.touched_files.extend([str(index_path), str(chapter_one)])
5123
+    dod.verification_commands = [f"ls -la {guide_root}"]
5124
+
5125
+    runner._queue_blocked_html_edit_nudge(
5126
+        ToolCall(
5127
+            id="edit-1",
5128
+            name="edit",
5129
+            arguments={
5130
+                "file_path": str(index_path),
5131
+                "old_string": "same",
5132
+                "new_string": "same",
5133
+            },
5134
+        ),
5135
+        "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
5136
+        dod=dod,
5137
+    )
5138
+
5139
+    assert queued
5140
+    assert "All explicitly planned artifacts already exist." in queued[0]
5141
+    assert "Move to verification or final confirmation using the files already on disk." in queued[0]
5142
+    assert "replace the surrounding block" not in queued[0]
5143
+
5144
+
50585145
 async def _noop_emit(event: AgentEvent) -> None:
50595146
     return None
50605147