@@ -4280,6 +4280,133 @@ async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verif |
| 4280 | 4280 | assert "Verification should run next." in queued_messages[-1] |
| 4281 | 4281 | |
| 4282 | 4282 | |
| 4283 | +@pytest.mark.asyncio |
| 4284 | +async def test_tool_batch_runner_todowrite_complete_directory_plan_does_not_reinfer_first_child( |
| 4285 | + temp_dir: Path, |
| 4286 | +) -> None: |
| 4287 | + async def assess_confidence( |
| 4288 | + tool_name: str, |
| 4289 | + tool_args: dict, |
| 4290 | + context: str, |
| 4291 | + ) -> ConfidenceAssessment: |
| 4292 | + raise AssertionError("Confidence scoring should not run for this scenario") |
| 4293 | + |
| 4294 | + async def verify_action( |
| 4295 | + tool_name: str, |
| 4296 | + tool_args: dict, |
| 4297 | + result: str, |
| 4298 | + expected: str = "", |
| 4299 | + ) -> ActionVerification: |
| 4300 | + raise AssertionError("Verification should not run for this scenario") |
| 4301 | + |
| 4302 | + reference = temp_dir / "fortran" / "chapters" / "01-introduction.html" |
| 4303 | + reference.parent.mkdir(parents=True) |
| 4304 | + reference.write_text("<h1>Introduction</h1>\n") |
| 4305 | + |
| 4306 | + guide_root = temp_dir / "Loader" / "guides" / "nginx" |
| 4307 | + chapters = guide_root / "chapters" |
| 4308 | + guide_root.mkdir(parents=True) |
| 4309 | + chapters.mkdir() |
| 4310 | + index_path = guide_root / "index.html" |
| 4311 | + chapter_one = chapters / "01-introduction.html" |
| 4312 | + chapter_two = chapters / "02-installation.html" |
| 4313 | + chapter_three = chapters / "03-basic-configuration.html" |
| 4314 | + index_path.write_text( |
| 4315 | + "\n".join( |
| 4316 | + [ |
| 4317 | + '<a href="chapters/01-introduction.html">Introduction</a>', |
| 4318 | + '<a href="chapters/02-installation.html">Installation</a>', |
| 4319 | + '<a href="chapters/03-basic-configuration.html">Configuration</a>', |
| 4320 | + "", |
| 4321 | + ] |
| 4322 | + ) |
| 4323 | + ) |
| 4324 | + chapter_one.write_text("<html></html>\n") |
| 4325 | + chapter_two.write_text("<html></html>\n") |
| 4326 | + chapter_three.write_text("<html></html>\n") |
| 4327 | + |
| 4328 | + implementation_plan = temp_dir / "implementation.md" |
| 4329 | + implementation_plan.write_text( |
| 4330 | + "\n".join( |
| 4331 | + [ |
| 4332 | + "# Implementation Plan", |
| 4333 | + "", |
| 4334 | + "## File Changes", |
| 4335 | + f"- `{guide_root / 'index.html'}`", |
| 4336 | + f"- `{chapters}/`", |
| 4337 | + "", |
| 4338 | + ] |
| 4339 | + ) |
| 4340 | + ) |
| 4341 | + |
| 4342 | + messages = [ |
| 4343 | + Message( |
| 4344 | + role=Role.ASSISTANT, |
| 4345 | + content="I examined the reference guide structure.", |
| 4346 | + tool_calls=[ |
| 4347 | + ToolCall( |
| 4348 | + id="read-reference-child", |
| 4349 | + name="read", |
| 4350 | + arguments={"file_path": str(reference)}, |
| 4351 | + ) |
| 4352 | + ], |
| 4353 | + ) |
| 4354 | + ] |
| 4355 | + context = build_context( |
| 4356 | + temp_dir=temp_dir, |
| 4357 | + messages=messages, |
| 4358 | + safeguards=FakeSafeguards(), |
| 4359 | + assess_confidence=assess_confidence, |
| 4360 | + verify_action=verify_action, |
| 4361 | + auto_recover=False, |
| 4362 | + ) |
| 4363 | + queued_messages: list[str] = [] |
| 4364 | + context.queue_steering_message_callback = queued_messages.append |
| 4365 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| 4366 | + dod = create_definition_of_done("Create an equally thorough nginx guide.") |
| 4367 | + dod.implementation_plan = str(implementation_plan) |
| 4368 | + dod.verification_commands = [f"ls -la {guide_root}"] |
| 4369 | + |
| 4370 | + todo_call = ToolCall( |
| 4371 | + id="todo-complete-directory-plan", |
| 4372 | + name="TodoWrite", |
| 4373 | + arguments={"todos": []}, |
| 4374 | + ) |
| 4375 | + executor = FakeExecutor( |
| 4376 | + [ |
| 4377 | + tool_outcome( |
| 4378 | + tool_call=todo_call, |
| 4379 | + output="Todos updated", |
| 4380 | + is_error=False, |
| 4381 | + metadata={"new_todos": []}, |
| 4382 | + ) |
| 4383 | + ] |
| 4384 | + ) |
| 4385 | + |
| 4386 | + summary = TurnSummary(final_response="") |
| 4387 | + result = await runner.execute_batch( |
| 4388 | + tool_calls=[todo_call], |
| 4389 | + tool_source="assistant", |
| 4390 | + pending_tool_calls_seen=set(), |
| 4391 | + emit=_noop_emit, |
| 4392 | + summary=summary, |
| 4393 | + dod=dod, |
| 4394 | + executor=executor, # type: ignore[arg-type] |
| 4395 | + on_confirmation=None, |
| 4396 | + on_user_question=None, |
| 4397 | + emit_confirmation=None, |
| 4398 | + consecutive_errors=0, |
| 4399 | + ) |
| 4400 | + |
| 4401 | + assert result.continue_after_batch is True |
| 4402 | + assert queued_messages |
| 4403 | + message = queued_messages[-1] |
| 4404 | + assert "Verification should run next." in message |
| 4405 | + assert "01-introduction.html" not in message |
| 4406 | + assert "chapter files" not in message.lower() |
| 4407 | + assert context.workflow_mode == "verify" |
| 4408 | + |
| 4409 | + |
| 4283 | 4410 | @pytest.mark.asyncio |
| 4284 | 4411 | async def test_tool_batch_runner_todowrite_drops_unplanned_expansion_after_outputs_exist( |
| 4285 | 4412 | temp_dir: Path, |