`67643a7`

Prefer verification after no-op edits

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 week ago

SHA: 67643a7c9d445bdbf26aa0bfbfadfb2e1e314fc2
Parents: a6df5b8
Tree: 62b1d96

2 changed files

Status	File	+	-
M	`src/loader/runtime/tool_batches.py`	32	2
M	`tests/test_tool_batches.py`	87	0

src/loader/runtime/tool_batches.pymodified

                      dod=dod,
+                 )
                  self._queue_blocked_shell_rewrite_nudge(tool_call)
 -                self._queue_blocked_html_edit_nudge(tool_call, outcome.event_content)
 +                self._queue_blocked_html_edit_nudge(
 +                    tool_call,
 +                    outcome.event_content,
 +                    dod=dod,
 +                )
              else:
                  self._queue_post_mutation_self_audit_nudge(tool_call, dod=dod)
                  self._queue_completed_artifact_observation_handoff_nudge(
              "Do not reopen earlier reference materials."
+         )
 -    def _queue_blocked_html_edit_nudge(self, tool_call: ToolCall, event_content: str) -> None:
 +    def _queue_blocked_html_edit_nudge(
 +        self,
 +        tool_call: ToolCall,
 +        event_content: str,
 +        *,
 +        dod: DefinitionOfDone,
 +    ) -> None:
          """Keep blocked edit feedback generic; avoid task-class-specific steering."""
          if tool_call.name != "edit":
          if not target:
              return
 +        verification_commands = dod.verification_commands or derive_verification_commands(
 +            dod,
 +            project_root=self.context.project_root,
 +            task_statement=getattr(self.context.session, "current_task", "") or "",
 +            supplement_existing=True,
 +        )
 +        if all_planned_artifacts_exist(dod, project_root=self.context.project_root):
 +            verification_suffix = (
 +                " Move to verification or final confirmation using the files already on disk."
 +                if verification_commands
 +                else " If no concrete mismatch remains, stop editing and finish from the files already on disk."
 +            )
 +            self.context.queue_steering_message(
 +                "That edit would make no on-disk change. "
 +                f"`{target}` already matches the change you attempted. "
 +                "All explicitly planned artifacts already exist."
 +                + verification_suffix
 +            )
 +            return
++
          self.context.queue_steering_message(
              "That edit would make no on-disk change. "
              f"Stay on `{target}` and use the current file contents as the source of truth. "

tests/test_tool_batches.pymodified

      queued: list[str] = []
      context.queue_steering_message_callback = queued.append
      runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Repair a guide page.")
      runner._queue_blocked_html_edit_nudge(
          ToolCall(
              },
          ),
          "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
 +        dod=dod,
+     )
      assert queued
      assert "Do not reopen unrelated reference materials" in queued[0]
 +def test_tool_batch_runner_blocked_noop_edit_after_full_build_prefers_verification(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guide"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-introduction.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<html></html>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content=(
 +                    "Repair focus:\n"
 +                    f"- Confirm the final guide state in `{index_path}`.\n"
 +                    f"- Immediate next step: verify `{index_path}` if no concrete mismatch remains.\n"
 +                ),
 +            )
 +        ],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    queued: list[str] = []
 +    context.queue_steering_message_callback = queued.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
++
 +    dod = create_definition_of_done("Create a multi-file guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.extend([str(index_path), str(chapter_one)])
 +    dod.verification_commands = [f"ls -la {guide_root}"]
++
 +    runner._queue_blocked_html_edit_nudge(
 +        ToolCall(
 +            id="edit-1",
 +            name="edit",
 +            arguments={
 +                "file_path": str(index_path),
 +                "old_string": "same",
 +                "new_string": "same",
 +            },
 +        ),
 +        "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
 +        dod=dod,
 +    )
++
 +    assert queued
 +    assert "All explicitly planned artifacts already exist." in queued[0]
 +    assert "Move to verification or final confirmation using the files already on disk." in queued[0]
 +    assert "replace the surrounding block" not in queued[0]
++
++
  async def _noop_emit(event: AgentEvent) -> None:
      return None