tenseleyflow/loader / 1403a6a

Browse files

Persist TodoWrite resumes

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
1403a6a1b81c3f2952cc48c1b44ba639e894d896
Parents
fa5d5a8
Tree
d76c856

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 3 3
M tests/test_tool_batches.py 38 25
src/loader/runtime/tool_batches.pymodified
@@ -1231,9 +1231,9 @@ class ToolBatchRunner:
1231
                         "Perform the mutation now instead of spending another turn on "
1231
                         "Perform the mutation now instead of spending another turn on "
1232
                         "planning, rereads, or verification."
1232
                         "planning, rereads, or verification."
1233
                     )
1233
                     )
1234
-                    self.context.queue_ephemeral_steering_message(concrete_message)
1234
+                    self.context.queue_steering_message(concrete_message)
1235
                     return
1235
                     return
1236
-                self.context.queue_ephemeral_steering_message(
1236
+                self.context.queue_steering_message(
1237
                     "Todo tracking is updated. Continue with the next pending item: "
1237
                     "Todo tracking is updated. Continue with the next pending item: "
1238
                     f"`{next_pending}`. Use the current output files as the source of "
1238
                     f"`{next_pending}`. Use the current output files as the source of "
1239
                     "truth, and do not reopen reference materials unless one specific "
1239
                     "truth, and do not reopen reference materials unless one specific "
@@ -1305,7 +1305,7 @@ class ToolBatchRunner:
1305
             if next_pending
1305
             if next_pending
1306
             else ""
1306
             else ""
1307
         )
1307
         )
1308
-        self.context.queue_ephemeral_steering_message(
1308
+        self.context.queue_steering_message(
1309
             "Todo tracking is updated. A declared output artifact is still missing."
1309
             "Todo tracking is updated. A declared output artifact is still missing."
1310
             + next_pending_suffix
1310
             + next_pending_suffix
1311
             + _missing_artifact_resume_suffix(
1311
             + _missing_artifact_resume_suffix(
tests/test_tool_batches.pymodified
@@ -2288,8 +2288,10 @@ async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_rev
2288
         verify_action=verify_action,
2288
         verify_action=verify_action,
2289
         auto_recover=False,
2289
         auto_recover=False,
2290
     )
2290
     )
2291
-    queued_messages: list[str] = []
2291
+    persistent_messages: list[str] = []
2292
-    context.queue_steering_message_callback = queued_messages.append
2292
+    ephemeral_messages: list[str] = []
2293
+    context.queue_steering_message_callback = persistent_messages.append
2294
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2293
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2295
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2294
     dod = create_definition_of_done("Create a multi-file nginx guide.")
2296
     dod = create_definition_of_done("Create a multi-file nginx guide.")
2295
     dod.implementation_plan = str(implementation_plan)
2297
     dod.implementation_plan = str(implementation_plan)
@@ -2322,8 +2324,8 @@ async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_rev
2322
     )
2324
     )
2323
     runner._queue_duplicate_observation_nudge(tool_call, dod=dod)  # type: ignore[attr-defined]
2325
     runner._queue_duplicate_observation_nudge(tool_call, dod=dod)  # type: ignore[attr-defined]
2324
 
2326
 
2325
-    assert queued_messages
2327
+    assert persistent_messages
2326
-    message = queued_messages[-1]
2328
+    message = persistent_messages[-1]
2327
     assert "06-ssl-configuration.html" in message
2329
     assert "06-ssl-configuration.html" in message
2328
     assert "Do not switch into review or consistency-check mode" in message
2330
     assert "Do not switch into review or consistency-check mode" in message
2329
     assert (
2331
     assert (
@@ -2385,8 +2387,10 @@ async def test_tool_batch_runner_hands_off_to_verification_once_planned_artifact
2385
         verify_action=verify_action,
2387
         verify_action=verify_action,
2386
         auto_recover=False,
2388
         auto_recover=False,
2387
     )
2389
     )
2388
-    queued_messages: list[str] = []
2390
+    persistent_messages: list[str] = []
2389
-    context.queue_steering_message_callback = queued_messages.append
2391
+    ephemeral_messages: list[str] = []
2392
+    context.queue_steering_message_callback = persistent_messages.append
2393
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2390
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2394
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2391
     dod = create_definition_of_done("Create a multi-file nginx guide.")
2395
     dod = create_definition_of_done("Create a multi-file nginx guide.")
2392
     dod.implementation_plan = str(implementation_plan)
2396
     dod.implementation_plan = str(implementation_plan)
@@ -2440,15 +2444,15 @@ async def test_tool_batch_runner_hands_off_to_verification_once_planned_artifact
2440
 
2444
 
2441
     assert any(
2445
     assert any(
2442
         "All explicitly planned artifacts now exist." in message
2446
         "All explicitly planned artifacts now exist." in message
2443
-        for message in queued_messages
2447
+        for message in persistent_messages
2444
     )
2448
     )
2445
     assert any(
2449
     assert any(
2446
         "Ensure all files are properly linked and formatted consistently" in message
2450
         "Ensure all files are properly linked and formatted consistently" in message
2447
-        for message in queued_messages
2451
+        for message in persistent_messages
2448
     )
2452
     )
2449
     assert any(
2453
     assert any(
2450
         "Move to verification once no specific mismatch remains." in message
2454
         "Move to verification once no specific mismatch remains." in message
2451
-        for message in queued_messages
2455
+        for message in persistent_messages
2452
     )
2456
     )
2453
 
2457
 
2454
 
2458
 
@@ -2503,8 +2507,10 @@ async def test_tool_batch_runner_mutation_handoff_points_at_next_missing_artifac
2503
         verify_action=verify_action,
2507
         verify_action=verify_action,
2504
         auto_recover=False,
2508
         auto_recover=False,
2505
     )
2509
     )
2506
-    queued_messages: list[str] = []
2510
+    persistent_messages: list[str] = []
2507
-    context.queue_steering_message_callback = queued_messages.append
2511
+    ephemeral_messages: list[str] = []
2512
+    context.queue_steering_message_callback = persistent_messages.append
2513
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2508
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2514
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2509
     dod = create_definition_of_done("Create a multi-file nginx guide.")
2515
     dod = create_definition_of_done("Create a multi-file nginx guide.")
2510
     dod.implementation_plan = str(implementation_plan)
2516
     dod.implementation_plan = str(implementation_plan)
@@ -2552,8 +2558,8 @@ async def test_tool_batch_runner_mutation_handoff_points_at_next_missing_artifac
2552
         consecutive_errors=0,
2558
         consecutive_errors=0,
2553
     )
2559
     )
2554
 
2560
 
2555
-    assert queued_messages
2561
+    assert persistent_messages
2556
-    message = queued_messages[-1]
2562
+    message = persistent_messages[-1]
2557
     assert "Next step: create `01-getting-started.html`." in message
2563
     assert "Next step: create `01-getting-started.html`." in message
2558
     assert (
2564
     assert (
2559
         f"Prefer one `write(file_path=..., content=...)` call for `{chapter_one.resolve(strict=False)}` now."
2565
         f"Prefer one `write(file_path=..., content=...)` call for `{chapter_one.resolve(strict=False)}` now."
@@ -2626,8 +2632,10 @@ async def test_tool_batch_runner_large_plan_does_not_claim_completion_early(
2626
         verify_action=verify_action,
2632
         verify_action=verify_action,
2627
         auto_recover=False,
2633
         auto_recover=False,
2628
     )
2634
     )
2629
-    queued_messages: list[str] = []
2635
+    persistent_messages: list[str] = []
2630
-    context.queue_steering_message_callback = queued_messages.append
2636
+    ephemeral_messages: list[str] = []
2637
+    context.queue_steering_message_callback = persistent_messages.append
2638
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2631
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2639
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2632
     dod = create_definition_of_done("Create a thorough nginx guide.")
2640
     dod = create_definition_of_done("Create a thorough nginx guide.")
2633
     dod.implementation_plan = str(implementation_plan)
2641
     dod.implementation_plan = str(implementation_plan)
@@ -2681,11 +2689,11 @@ async def test_tool_batch_runner_large_plan_does_not_claim_completion_early(
2681
 
2689
 
2682
     assert any(
2690
     assert any(
2683
         "Resume by creating `06-performance-tuning.html` now." in message
2691
         "Resume by creating `06-performance-tuning.html` now." in message
2684
-        for message in queued_messages
2692
+        for message in ephemeral_messages
2685
     )
2693
     )
2686
     assert not any(
2694
     assert not any(
2687
         "All explicitly planned artifacts now exist." in message
2695
         "All explicitly planned artifacts now exist." in message
2688
-        for message in queued_messages
2696
+        for message in ephemeral_messages
2689
     )
2697
     )
2690
 
2698
 
2691
 
2699
 
@@ -2747,8 +2755,10 @@ async def test_tool_batch_runner_uses_compact_missing_artifact_nudge_after_subst
2747
         verify_action=verify_action,
2755
         verify_action=verify_action,
2748
         auto_recover=False,
2756
         auto_recover=False,
2749
     )
2757
     )
2750
-    queued_messages: list[str] = []
2758
+    persistent_messages: list[str] = []
2751
-    context.queue_steering_message_callback = queued_messages.append
2759
+    ephemeral_messages: list[str] = []
2760
+    context.queue_steering_message_callback = persistent_messages.append
2761
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2752
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2762
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2753
     dod = create_definition_of_done("Create a thorough nginx guide.")
2763
     dod = create_definition_of_done("Create a thorough nginx guide.")
2754
     dod.implementation_plan = str(implementation_plan)
2764
     dod.implementation_plan = str(implementation_plan)
@@ -2802,8 +2812,8 @@ async def test_tool_batch_runner_uses_compact_missing_artifact_nudge_after_subst
2802
         consecutive_errors=0,
2812
         consecutive_errors=0,
2803
     )
2813
     )
2804
 
2814
 
2805
-    assert queued_messages
2815
+    assert ephemeral_messages
2806
-    message = queued_messages[-1]
2816
+    message = ephemeral_messages[-1]
2807
     assert "Resume by creating `05-advanced-features.html` now." in message
2817
     assert "Resume by creating `05-advanced-features.html` now." in message
2808
     assert "No TodoWrite, no verification, no rereads until that artifact exists." in message
2818
     assert "No TodoWrite, no verification, no rereads until that artifact exists." in message
2809
     assert "refresh `TodoWrite`" not in message
2819
     assert "refresh `TodoWrite`" not in message
@@ -2863,8 +2873,10 @@ async def test_tool_batch_runner_todowrite_with_missing_artifact_requeues_exact_
2863
         verify_action=verify_action,
2873
         verify_action=verify_action,
2864
         auto_recover=False,
2874
         auto_recover=False,
2865
     )
2875
     )
2866
-    queued_messages: list[str] = []
2876
+    persistent_messages: list[str] = []
2867
-    context.queue_steering_message_callback = queued_messages.append
2877
+    ephemeral_messages: list[str] = []
2878
+    context.queue_steering_message_callback = persistent_messages.append
2879
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2868
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2880
     runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2869
     dod = create_definition_of_done("Create a multi-file nginx guide.")
2881
     dod = create_definition_of_done("Create a multi-file nginx guide.")
2870
     dod.implementation_plan = str(implementation_plan)
2882
     dod.implementation_plan = str(implementation_plan)
@@ -2942,12 +2954,13 @@ async def test_tool_batch_runner_todowrite_with_missing_artifact_requeues_exact_
2942
         consecutive_errors=0,
2954
         consecutive_errors=0,
2943
     )
2955
     )
2944
 
2956
 
2945
-    assert queued_messages
2957
+    assert persistent_messages
2946
-    message = queued_messages[-1]
2958
+    message = persistent_messages[-1]
2947
     assert "Todo tracking is updated. A declared output artifact is still missing." in message
2959
     assert "Todo tracking is updated. A declared output artifact is still missing." in message
2948
     assert "Resume by creating `02-installation.html` now." in message
2960
     assert "Resume by creating `02-installation.html` now." in message
2949
     assert "refresh `TodoWrite`" in message
2961
     assert "refresh `TodoWrite`" in message
2950
     assert "Do not spend the next turn on TodoWrite alone" in message
2962
     assert "Do not spend the next turn on TodoWrite alone" in message
2963
+    assert ephemeral_messages == []
2951
 
2964
 
2952
 
2965
 
2953
 @pytest.mark.asyncio
2966
 @pytest.mark.asyncio