@@ -2363,6 +2363,114 @@ async def test_tool_batch_runner_first_chapter_handoff_becomes_ephemeral_after_f |
| 2363 | 2363 | assert "Do not reread reference material or spend the next turn on bookkeeping." in message |
| 2364 | 2364 | |
| 2365 | 2365 | |
| 2366 | +@pytest.mark.asyncio |
| 2367 | +async def test_tool_batch_runner_redirects_post_write_self_audit_to_next_missing_artifact( |
| 2368 | + temp_dir: Path, |
| 2369 | +) -> None: |
| 2370 | + async def assess_confidence( |
| 2371 | + tool_name: str, |
| 2372 | + tool_args: dict, |
| 2373 | + context: str, |
| 2374 | + ) -> ConfidenceAssessment: |
| 2375 | + raise AssertionError("Confidence scoring should not run in this scenario") |
| 2376 | + |
| 2377 | + async def verify_action( |
| 2378 | + tool_name: str, |
| 2379 | + tool_args: dict, |
| 2380 | + result: str, |
| 2381 | + expected: str = "", |
| 2382 | + ) -> ActionVerification: |
| 2383 | + raise AssertionError("Verification should not run in this scenario") |
| 2384 | + |
| 2385 | + nginx_root = temp_dir / "guides" / "nginx" |
| 2386 | + chapters = nginx_root / "chapters" |
| 2387 | + chapters.mkdir(parents=True) |
| 2388 | + index_path = nginx_root / "index.html" |
| 2389 | + index_path.write_text( |
| 2390 | + "\n".join( |
| 2391 | + [ |
| 2392 | + "<html>", |
| 2393 | + '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>', |
| 2394 | + '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>', |
| 2395 | + "</html>", |
| 2396 | + ] |
| 2397 | + ) |
| 2398 | + + "\n" |
| 2399 | + ) |
| 2400 | + |
| 2401 | + implementation_plan = temp_dir / "implementation.md" |
| 2402 | + implementation_plan.write_text( |
| 2403 | + "\n".join( |
| 2404 | + [ |
| 2405 | + "# Implementation Plan", |
| 2406 | + "", |
| 2407 | + "## File Changes", |
| 2408 | + f"- `{nginx_root}/`", |
| 2409 | + f"- `{chapters}/`", |
| 2410 | + f"- `{index_path}`", |
| 2411 | + f"- `{chapters / '01-introduction.html'}`", |
| 2412 | + "", |
| 2413 | + ] |
| 2414 | + ) |
| 2415 | + ) |
| 2416 | + |
| 2417 | + context = build_context( |
| 2418 | + temp_dir=temp_dir, |
| 2419 | + messages=[], |
| 2420 | + safeguards=FakeSafeguards(), |
| 2421 | + assess_confidence=assess_confidence, |
| 2422 | + verify_action=verify_action, |
| 2423 | + auto_recover=False, |
| 2424 | + ) |
| 2425 | + persistent_messages: list[str] = [] |
| 2426 | + ephemeral_messages: list[str] = [] |
| 2427 | + context.queue_steering_message_callback = persistent_messages.append |
| 2428 | + context.queue_ephemeral_steering_message_callback = ephemeral_messages.append |
| 2429 | + runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| 2430 | + dod = create_definition_of_done("Create a multi-file nginx guide.") |
| 2431 | + dod.implementation_plan = str(implementation_plan) |
| 2432 | + dod.touched_files.append(str(index_path)) |
| 2433 | + dod.completed_items.append("Develop the main index.html file for the nginx guide") |
| 2434 | + dod.pending_items.append("Create chapter files for the nginx guide") |
| 2435 | + |
| 2436 | + tool_call = ToolCall( |
| 2437 | + id="read-index-self-audit", |
| 2438 | + name="read", |
| 2439 | + arguments={"file_path": str(index_path)}, |
| 2440 | + ) |
| 2441 | + executor = FakeExecutor( |
| 2442 | + [ |
| 2443 | + tool_outcome( |
| 2444 | + tool_call=tool_call, |
| 2445 | + output="1\t<html>\n", |
| 2446 | + is_error=False, |
| 2447 | + ) |
| 2448 | + ] |
| 2449 | + ) |
| 2450 | + |
| 2451 | + summary = TurnSummary(final_response="") |
| 2452 | + await runner.execute_batch( |
| 2453 | + tool_calls=[tool_call], |
| 2454 | + tool_source="assistant", |
| 2455 | + pending_tool_calls_seen=set(), |
| 2456 | + emit=_noop_emit, |
| 2457 | + summary=summary, |
| 2458 | + dod=dod, |
| 2459 | + executor=executor, # type: ignore[arg-type] |
| 2460 | + on_confirmation=None, |
| 2461 | + on_user_question=None, |
| 2462 | + emit_confirmation=None, |
| 2463 | + consecutive_errors=0, |
| 2464 | + ) |
| 2465 | + |
| 2466 | + assert persistent_messages |
| 2467 | + message = persistent_messages[-1] |
| 2468 | + assert "You already have the current contents of `index.html` from the successful write." in message |
| 2469 | + assert "Resume by creating `01-introduction.html` now." in message |
| 2470 | + assert "Do not spend another turn rereading the file you just wrote or on TodoWrite alone." in message |
| 2471 | + assert ephemeral_messages == [] |
| 2472 | + |
| 2473 | + |
| 2366 | 2474 | @pytest.mark.asyncio |
| 2367 | 2475 | async def test_tool_batch_runner_softens_first_file_handoff_after_recovery_prompt( |
| 2368 | 2476 | temp_dir: Path, |