@@ -5034,6 +5034,7 @@ def test_tool_batch_runner_blocked_noop_edit_nudge_stays_on_active_repair_target |
| 5034 | 5034 | queued: list[str] = [] |
| 5035 | 5035 | context.queue_steering_message_callback = queued.append |
| 5036 | 5036 | runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| 5037 | + dod = create_definition_of_done("Repair a guide page.") |
| 5037 | 5038 | |
| 5038 | 5039 | runner._queue_blocked_html_edit_nudge( |
| 5039 | 5040 | ToolCall( |
@@ -5046,6 +5047,7 @@ def test_tool_batch_runner_blocked_noop_edit_nudge_stays_on_active_repair_target |
| 5046 | 5047 | }, |
| 5047 | 5048 | ), |
| 5048 | 5049 | "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings", |
| 5050 | + dod=dod, |
| 5049 | 5051 | ) |
| 5050 | 5052 | |
| 5051 | 5053 | assert queued |
@@ -5055,6 +5057,91 @@ def test_tool_batch_runner_blocked_noop_edit_nudge_stays_on_active_repair_target |
| 5055 | 5057 | assert "Do not reopen unrelated reference materials" in queued[0] |
| 5056 | 5058 | |
| 5057 | 5059 | |
| 5060 | +def test_tool_batch_runner_blocked_noop_edit_after_full_build_prefers_verification( |
| 5061 | + temp_dir: Path, |
| 5062 | +) -> None: |
| 5063 | + async def assess_confidence( |
| 5064 | + tool_name: str, |
| 5065 | + tool_args: dict, |
| 5066 | + context: str, |
| 5067 | + ) -> ConfidenceAssessment: |
| 5068 | + raise AssertionError("Confidence scoring should be disabled in this scenario") |
| 5069 | + |
| 5070 | + async def verify_action( |
| 5071 | + tool_name: str, |
| 5072 | + tool_args: dict, |
| 5073 | + result: str, |
| 5074 | + expected: str = "", |
| 5075 | + ) -> ActionVerification: |
| 5076 | + raise AssertionError("Verification should not run in this scenario") |
| 5077 | + |
| 5078 | + guide_root = temp_dir / "guide" |
| 5079 | + chapters = guide_root / "chapters" |
| 5080 | + chapters.mkdir(parents=True) |
| 5081 | + index_path = guide_root / "index.html" |
| 5082 | + chapter_one = chapters / "01-introduction.html" |
| 5083 | + index_path.write_text("<html></html>\n") |
| 5084 | + chapter_one.write_text("<html></html>\n") |
| 5085 | + |
| 5086 | + implementation_plan = temp_dir / "implementation.md" |
| 5087 | + implementation_plan.write_text( |
| 5088 | + "\n".join( |
| 5089 | + [ |
| 5090 | + "# Implementation Plan", |
| 5091 | + "", |
| 5092 | + "## File Changes", |
| 5093 | + f"- `{index_path}`", |
| 5094 | + f"- `{chapter_one}`", |
| 5095 | + "", |
| 5096 | + ] |
| 5097 | + ) |
| 5098 | + ) |
| 5099 | + |
| 5100 | + context = build_context( |
| 5101 | + temp_dir=temp_dir, |
| 5102 | + messages=[ |
| 5103 | + Message( |
| 5104 | + role=Role.ASSISTANT, |
| 5105 | + content=( |
| 5106 | + "Repair focus:\n" |
| 5107 | + f"- Confirm the final guide state in `{index_path}`.\n" |
| 5108 | + f"- Immediate next step: verify `{index_path}` if no concrete mismatch remains.\n" |
| 5109 | + ), |
| 5110 | + ) |
| 5111 | + ], |
| 5112 | + safeguards=FakeSafeguards(), |
| 5113 | + assess_confidence=assess_confidence, |
| 5114 | + verify_action=verify_action, |
| 5115 | + ) |
| 5116 | + queued: list[str] = [] |
| 5117 | + context.queue_steering_message_callback = queued.append |
| 5118 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| 5119 | + |
| 5120 | + dod = create_definition_of_done("Create a multi-file guide.") |
| 5121 | + dod.implementation_plan = str(implementation_plan) |
| 5122 | + dod.touched_files.extend([str(index_path), str(chapter_one)]) |
| 5123 | + dod.verification_commands = [f"ls -la {guide_root}"] |
| 5124 | + |
| 5125 | + runner._queue_blocked_html_edit_nudge( |
| 5126 | + ToolCall( |
| 5127 | + id="edit-1", |
| 5128 | + name="edit", |
| 5129 | + arguments={ |
| 5130 | + "file_path": str(index_path), |
| 5131 | + "old_string": "same", |
| 5132 | + "new_string": "same", |
| 5133 | + }, |
| 5134 | + ), |
| 5135 | + "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings", |
| 5136 | + dod=dod, |
| 5137 | + ) |
| 5138 | + |
| 5139 | + assert queued |
| 5140 | + assert "All explicitly planned artifacts already exist." in queued[0] |
| 5141 | + assert "Move to verification or final confirmation using the files already on disk." in queued[0] |
| 5142 | + assert "replace the surrounding block" not in queued[0] |
| 5143 | + |
| 5144 | + |
| 5058 | 5145 | async def _noop_emit(event: AgentEvent) -> None: |
| 5059 | 5146 | return None |
| 5060 | 5147 | |