@@ -2288,8 +2288,10 @@ async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_rev |
| 2288 | 2288 | verify_action=verify_action, |
| 2289 | 2289 | auto_recover=False, |
| 2290 | 2290 | ) |
| 2291 | | - queued_messages: list[str] = [] |
| 2292 | | - context.queue_steering_message_callback = queued_messages.append |
| 2291 | + persistent_messages: list[str] = [] |
| 2292 | + ephemeral_messages: list[str] = [] |
| 2293 | + context.queue_steering_message_callback = persistent_messages.append |
| 2294 | + context.queue_ephemeral_steering_message_callback = ephemeral_messages.append |
| 2293 | 2295 | runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| 2294 | 2296 | dod = create_definition_of_done("Create a multi-file nginx guide.") |
| 2295 | 2297 | dod.implementation_plan = str(implementation_plan) |
@@ -2322,8 +2324,8 @@ async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_rev |
| 2322 | 2324 | ) |
| 2323 | 2325 | runner._queue_duplicate_observation_nudge(tool_call, dod=dod) # type: ignore[attr-defined] |
| 2324 | 2326 | |
| 2325 | | - assert queued_messages |
| 2326 | | - message = queued_messages[-1] |
| 2327 | + assert persistent_messages |
| 2328 | + message = persistent_messages[-1] |
| 2327 | 2329 | assert "06-ssl-configuration.html" in message |
| 2328 | 2330 | assert "Do not switch into review or consistency-check mode" in message |
| 2329 | 2331 | assert ( |
@@ -2385,8 +2387,10 @@ async def test_tool_batch_runner_hands_off_to_verification_once_planned_artifact |
| 2385 | 2387 | verify_action=verify_action, |
| 2386 | 2388 | auto_recover=False, |
| 2387 | 2389 | ) |
| 2388 | | - queued_messages: list[str] = [] |
| 2389 | | - context.queue_steering_message_callback = queued_messages.append |
| 2390 | + persistent_messages: list[str] = [] |
| 2391 | + ephemeral_messages: list[str] = [] |
| 2392 | + context.queue_steering_message_callback = persistent_messages.append |
| 2393 | + context.queue_ephemeral_steering_message_callback = ephemeral_messages.append |
| 2390 | 2394 | runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| 2391 | 2395 | dod = create_definition_of_done("Create a multi-file nginx guide.") |
| 2392 | 2396 | dod.implementation_plan = str(implementation_plan) |
@@ -2440,15 +2444,15 @@ async def test_tool_batch_runner_hands_off_to_verification_once_planned_artifact |
| 2440 | 2444 | |
| 2441 | 2445 | assert any( |
| 2442 | 2446 | "All explicitly planned artifacts now exist." in message |
| 2443 | | - for message in queued_messages |
| 2447 | + for message in persistent_messages |
| 2444 | 2448 | ) |
| 2445 | 2449 | assert any( |
| 2446 | 2450 | "Ensure all files are properly linked and formatted consistently" in message |
| 2447 | | - for message in queued_messages |
| 2451 | + for message in persistent_messages |
| 2448 | 2452 | ) |
| 2449 | 2453 | assert any( |
| 2450 | 2454 | "Move to verification once no specific mismatch remains." in message |
| 2451 | | - for message in queued_messages |
| 2455 | + for message in persistent_messages |
| 2452 | 2456 | ) |
| 2453 | 2457 | |
| 2454 | 2458 | |
@@ -2503,8 +2507,10 @@ async def test_tool_batch_runner_mutation_handoff_points_at_next_missing_artifac |
| 2503 | 2507 | verify_action=verify_action, |
| 2504 | 2508 | auto_recover=False, |
| 2505 | 2509 | ) |
| 2506 | | - queued_messages: list[str] = [] |
| 2507 | | - context.queue_steering_message_callback = queued_messages.append |
| 2510 | + persistent_messages: list[str] = [] |
| 2511 | + ephemeral_messages: list[str] = [] |
| 2512 | + context.queue_steering_message_callback = persistent_messages.append |
| 2513 | + context.queue_ephemeral_steering_message_callback = ephemeral_messages.append |
| 2508 | 2514 | runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| 2509 | 2515 | dod = create_definition_of_done("Create a multi-file nginx guide.") |
| 2510 | 2516 | dod.implementation_plan = str(implementation_plan) |
@@ -2552,8 +2558,8 @@ async def test_tool_batch_runner_mutation_handoff_points_at_next_missing_artifac |
| 2552 | 2558 | consecutive_errors=0, |
| 2553 | 2559 | ) |
| 2554 | 2560 | |
| 2555 | | - assert queued_messages |
| 2556 | | - message = queued_messages[-1] |
| 2561 | + assert persistent_messages |
| 2562 | + message = persistent_messages[-1] |
| 2557 | 2563 | assert "Next step: create `01-getting-started.html`." in message |
| 2558 | 2564 | assert ( |
| 2559 | 2565 | f"Prefer one `write(file_path=..., content=...)` call for `{chapter_one.resolve(strict=False)}` now." |
@@ -2626,8 +2632,10 @@ async def test_tool_batch_runner_large_plan_does_not_claim_completion_early( |
| 2626 | 2632 | verify_action=verify_action, |
| 2627 | 2633 | auto_recover=False, |
| 2628 | 2634 | ) |
| 2629 | | - queued_messages: list[str] = [] |
| 2630 | | - context.queue_steering_message_callback = queued_messages.append |
| 2635 | + persistent_messages: list[str] = [] |
| 2636 | + ephemeral_messages: list[str] = [] |
| 2637 | + context.queue_steering_message_callback = persistent_messages.append |
| 2638 | + context.queue_ephemeral_steering_message_callback = ephemeral_messages.append |
| 2631 | 2639 | runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| 2632 | 2640 | dod = create_definition_of_done("Create a thorough nginx guide.") |
| 2633 | 2641 | dod.implementation_plan = str(implementation_plan) |
@@ -2681,11 +2689,11 @@ async def test_tool_batch_runner_large_plan_does_not_claim_completion_early( |
| 2681 | 2689 | |
| 2682 | 2690 | assert any( |
| 2683 | 2691 | "Resume by creating `06-performance-tuning.html` now." in message |
| 2684 | | - for message in queued_messages |
| 2692 | + for message in ephemeral_messages |
| 2685 | 2693 | ) |
| 2686 | 2694 | assert not any( |
| 2687 | 2695 | "All explicitly planned artifacts now exist." in message |
| 2688 | | - for message in queued_messages |
| 2696 | + for message in ephemeral_messages |
| 2689 | 2697 | ) |
| 2690 | 2698 | |
| 2691 | 2699 | |
@@ -2747,8 +2755,10 @@ async def test_tool_batch_runner_uses_compact_missing_artifact_nudge_after_subst |
| 2747 | 2755 | verify_action=verify_action, |
| 2748 | 2756 | auto_recover=False, |
| 2749 | 2757 | ) |
| 2750 | | - queued_messages: list[str] = [] |
| 2751 | | - context.queue_steering_message_callback = queued_messages.append |
| 2758 | + persistent_messages: list[str] = [] |
| 2759 | + ephemeral_messages: list[str] = [] |
| 2760 | + context.queue_steering_message_callback = persistent_messages.append |
| 2761 | + context.queue_ephemeral_steering_message_callback = ephemeral_messages.append |
| 2752 | 2762 | runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| 2753 | 2763 | dod = create_definition_of_done("Create a thorough nginx guide.") |
| 2754 | 2764 | dod.implementation_plan = str(implementation_plan) |
@@ -2802,8 +2812,8 @@ async def test_tool_batch_runner_uses_compact_missing_artifact_nudge_after_subst |
| 2802 | 2812 | consecutive_errors=0, |
| 2803 | 2813 | ) |
| 2804 | 2814 | |
| 2805 | | - assert queued_messages |
| 2806 | | - message = queued_messages[-1] |
| 2815 | + assert ephemeral_messages |
| 2816 | + message = ephemeral_messages[-1] |
| 2807 | 2817 | assert "Resume by creating `05-advanced-features.html` now." in message |
| 2808 | 2818 | assert "No TodoWrite, no verification, no rereads until that artifact exists." in message |
| 2809 | 2819 | assert "refresh `TodoWrite`" not in message |
@@ -2863,8 +2873,10 @@ async def test_tool_batch_runner_todowrite_with_missing_artifact_requeues_exact_ |
| 2863 | 2873 | verify_action=verify_action, |
| 2864 | 2874 | auto_recover=False, |
| 2865 | 2875 | ) |
| 2866 | | - queued_messages: list[str] = [] |
| 2867 | | - context.queue_steering_message_callback = queued_messages.append |
| 2876 | + persistent_messages: list[str] = [] |
| 2877 | + ephemeral_messages: list[str] = [] |
| 2878 | + context.queue_steering_message_callback = persistent_messages.append |
| 2879 | + context.queue_ephemeral_steering_message_callback = ephemeral_messages.append |
| 2868 | 2880 | runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir)) |
| 2869 | 2881 | dod = create_definition_of_done("Create a multi-file nginx guide.") |
| 2870 | 2882 | dod.implementation_plan = str(implementation_plan) |
@@ -2942,12 +2954,13 @@ async def test_tool_batch_runner_todowrite_with_missing_artifact_requeues_exact_ |
| 2942 | 2954 | consecutive_errors=0, |
| 2943 | 2955 | ) |
| 2944 | 2956 | |
| 2945 | | - assert queued_messages |
| 2946 | | - message = queued_messages[-1] |
| 2957 | + assert persistent_messages |
| 2958 | + message = persistent_messages[-1] |
| 2947 | 2959 | assert "Todo tracking is updated. A declared output artifact is still missing." in message |
| 2948 | 2960 | assert "Resume by creating `02-installation.html` now." in message |
| 2949 | 2961 | assert "refresh `TodoWrite`" in message |
| 2950 | 2962 | assert "Do not spend the next turn on TodoWrite alone" in message |
| 2963 | + assert ephemeral_messages == [] |
| 2951 | 2964 | |
| 2952 | 2965 | |
| 2953 | 2966 | @pytest.mark.asyncio |