@@ -2199,6 +2199,102 @@ async def test_tool_batch_runner_successful_read_after_plan_complete_pushes_revi |
| 2199 | assert "If no specific mismatch remains, move to verification now." in message | 2199 | assert "If no specific mismatch remains, move to verification now." in message |
| 2200 | | 2200 | |
| 2201 | | 2201 | |
| | 2202 | +@pytest.mark.asyncio |
| | 2203 | +async def test_tool_batch_runner_successful_read_after_plan_complete_switches_to_verify( |
| | 2204 | + temp_dir: Path, |
| | 2205 | +) -> None: |
| | 2206 | + async def assess_confidence( |
| | 2207 | + tool_name: str, |
| | 2208 | + tool_args: dict, |
| | 2209 | + context: str, |
| | 2210 | + ) -> ConfidenceAssessment: |
| | 2211 | + raise AssertionError("Confidence scoring should not run for this scenario") |
| | 2212 | + |
| | 2213 | + async def verify_action( |
| | 2214 | + tool_name: str, |
| | 2215 | + tool_args: dict, |
| | 2216 | + result: str, |
| | 2217 | + expected: str = "", |
| | 2218 | + ) -> ActionVerification: |
| | 2219 | + raise AssertionError("Verification should not run for this scenario") |
| | 2220 | + |
| | 2221 | + guide_root = temp_dir / "guides" / "nginx" |
| | 2222 | + chapters = guide_root / "chapters" |
| | 2223 | + guide_root.mkdir(parents=True) |
| | 2224 | + chapters.mkdir() |
| | 2225 | + index_path = guide_root / "index.html" |
| | 2226 | + chapter_one = chapters / "01-getting-started.html" |
| | 2227 | + chapter_two = chapters / "02-installation.html" |
| | 2228 | + index_path.write_text("<html></html>\n") |
| | 2229 | + chapter_one.write_text("<h1>One</h1>\n") |
| | 2230 | + chapter_two.write_text("<h1>Two</h1>\n") |
| | 2231 | + |
| | 2232 | + implementation_plan = temp_dir / "implementation.md" |
| | 2233 | + implementation_plan.write_text( |
| | 2234 | + "\n".join( |
| | 2235 | + [ |
| | 2236 | + "# Implementation Plan", |
| | 2237 | + "", |
| | 2238 | + "## File Changes", |
| | 2239 | + f"- `{guide_root}/`", |
| | 2240 | + f"- `{chapters}/`", |
| | 2241 | + f"- `{index_path}`", |
| | 2242 | + f"- `{chapter_one}`", |
| | 2243 | + f"- `{chapter_two}`", |
| | 2244 | + "", |
| | 2245 | + ] |
| | 2246 | + ) |
| | 2247 | + ) |
| | 2248 | + |
| | 2249 | + context = build_context( |
| | 2250 | + temp_dir=temp_dir, |
| | 2251 | + messages=[], |
| | 2252 | + safeguards=FakeSafeguards(), |
| | 2253 | + assess_confidence=assess_confidence, |
| | 2254 | + verify_action=verify_action, |
| | 2255 | + auto_recover=False, |
| | 2256 | + ) |
| | 2257 | + persistent_messages: list[str] = [] |
| | 2258 | + ephemeral_messages: list[str] = [] |
| | 2259 | + context.queue_steering_message_callback = persistent_messages.append |
| | 2260 | + context.queue_ephemeral_steering_message_callback = ephemeral_messages.append |
| | 2261 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| | 2262 | + dod = create_definition_of_done("Create a multi-file nginx guide.") |
| | 2263 | + dod.implementation_plan = str(implementation_plan) |
| | 2264 | + dod.verification_commands = [f"ls -la {guide_root}"] |
| | 2265 | + |
| | 2266 | + tool_call = ToolCall( |
| | 2267 | + id="read-built-verify", |
| | 2268 | + name="read", |
| | 2269 | + arguments={"file_path": str(chapter_one)}, |
| | 2270 | + ) |
| | 2271 | + executor = FakeExecutor( |
| | 2272 | + [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)] |
| | 2273 | + ) |
| | 2274 | + |
| | 2275 | + summary = TurnSummary(final_response="") |
| | 2276 | + await runner.execute_batch( |
| | 2277 | + tool_calls=[tool_call], |
| | 2278 | + tool_source="assistant", |
| | 2279 | + pending_tool_calls_seen=set(), |
| | 2280 | + emit=_noop_emit, |
| | 2281 | + summary=summary, |
| | 2282 | + dod=dod, |
| | 2283 | + executor=executor, # type: ignore[arg-type] |
| | 2284 | + on_confirmation=None, |
| | 2285 | + on_user_question=None, |
| | 2286 | + emit_confirmation=None, |
| | 2287 | + consecutive_errors=0, |
| | 2288 | + ) |
| | 2289 | + |
| | 2290 | + assert len(persistent_messages) == 1 |
| | 2291 | + assert "All explicitly planned artifacts already exist." in persistent_messages[0] |
| | 2292 | + assert "Verification should run next." in persistent_messages[0] |
| | 2293 | + assert "stop broad rereads" in persistent_messages[0] |
| | 2294 | + assert ephemeral_messages == [] |
| | 2295 | + assert context.workflow_mode == "verify" |
| | 2296 | + |
| | 2297 | + |
| 2202 | @pytest.mark.asyncio | 2298 | @pytest.mark.asyncio |
| 2203 | async def test_tool_batch_runner_observation_handoff_pushes_mutation_step( | 2299 | async def test_tool_batch_runner_observation_handoff_pushes_mutation_step( |
| 2204 | temp_dir: Path, | 2300 | temp_dir: Path, |
@@ -3932,6 +4028,7 @@ async def test_tool_batch_runner_todowrite_after_artifacts_exist_pushes_verifica |
| 3932 | assert "Move to verification once no specific mismatch remains." in message | 4028 | assert "Move to verification once no specific mismatch remains." in message |
| 3933 | assert "reopen reference materials" in message | 4029 | assert "reopen reference materials" in message |
| 3934 | assert "Fortran guide structure" not in message | 4030 | assert "Fortran guide structure" not in message |
| | 4031 | + assert context.workflow_mode == "execute" |
| 3935 | | 4032 | |
| 3936 | | 4033 | |
| 3937 | @pytest.mark.asyncio | 4034 | @pytest.mark.asyncio |
@@ -4066,8 +4163,9 @@ async def test_tool_batch_runner_todowrite_after_outputs_exist_but_links_missing |
| 4066 | assert queued_messages | 4163 | assert queued_messages |
| 4067 | message = queued_messages[-1] | 4164 | message = queued_messages[-1] |
| 4068 | assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message | 4165 | assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message |
| 4069 | - assert "Repair or verify the current files instead of expanding the artifact set." in message | 4166 | + assert "Verification should run next." in message |
| 4070 | - assert "Move to verification or final confirmation using the files already on disk." in message | 4167 | + assert "Repair or verify the current files instead of expanding the artifact set." not in message |
| | 4168 | + assert context.workflow_mode == "verify" |
| 4071 | | 4169 | |
| 4072 | | 4170 | |
| 4073 | @pytest.mark.asyncio | 4171 | @pytest.mark.asyncio |
@@ -4221,9 +4319,10 @@ async def test_tool_batch_runner_todowrite_drops_unplanned_expansion_after_outpu |
| 4221 | assert queued_messages | 4319 | assert queued_messages |
| 4222 | message = queued_messages[-1] | 4320 | message = queued_messages[-1] |
| 4223 | assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message | 4321 | assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message |
| 4224 | - assert "Repair or verify the current files instead of expanding the artifact set." in message | 4322 | + assert "Verification should run next." in message |
| 4225 | - assert "Move to verification or final confirmation using the files already on disk." in message | 4323 | + assert "Repair or verify the current files instead of expanding the artifact set." not in message |
| 4226 | assert "08-troubleshooting.html" not in message | 4324 | assert "08-troubleshooting.html" not in message |
| | 4325 | + assert context.workflow_mode == "verify" |
| 4227 | | 4326 | |
| 4228 | | 4327 | |
| 4229 | @pytest.mark.asyncio | 4328 | @pytest.mark.asyncio |