tenseleyflow/loader / 8b1407b

Browse files

Clarify verification handoff

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
8b1407bbd52358cb2def7df0e12fc4de777de73b
Parents
d22c38c
Tree
72f8922

4 changed files

StatusFile+-
M src/loader/runtime/hooks.py 2 2
M src/loader/runtime/parsing.py 3 1
M src/loader/runtime/tool_batches.py 29 20
M tests/test_tool_batches.py 26 19
src/loader/runtime/hooks.pymodified
@@ -964,8 +964,8 @@ class LateReferenceDriftHook(BaseToolHook):
964964
                         message=(
965965
                             "[Blocked - post-build audit loop: all explicitly planned artifacts "
966966
                             "already exist and the current output set has already been inspected "
967
-                            "several times.] Suggestion: move to verification now or make one "
968
-                            "concrete edit for a specific mismatch inside "
967
+                            "several times.] Suggestion: finish with a final response so Loader "
968
+                            "can verify automatically, or make one concrete edit for a specific mismatch inside "
969969
                             f"{roots_preview} instead of more rereads."
970970
                         ),
971971
                         terminal_state="blocked",
src/loader/runtime/parsing.pymodified
@@ -443,7 +443,9 @@ def format_tool_result(tool_name: str, result: str, is_error: bool = False) -> s
443443
                 if next_pending:
444444
                     summary_parts.append(f"next pending: {next_pending}")
445445
                 if payload.get("verification_nudge_needed") is True:
446
-                    summary_parts.append("verification should be reviewed next")
446
+                    summary_parts.append(
447
+                        "final response should be provided next for Loader verification"
448
+                    )
447449
                 result = "; ".join(summary_parts)
448450
 
449451
     prefix = "Error" if is_error else "Result"
src/loader/runtime/tool_batches.pymodified
@@ -140,6 +140,13 @@ _MUTATING_BASH_FRAGMENTS = (
140140
 )
141141
 
142142
 
143
+def _verification_handoff_instruction() -> str:
144
+    return (
145
+        "Finish with a final response now so Loader can run verification automatically. "
146
+        "Do not run more ad hoc audit commands unless you know a specific mismatch to repair."
147
+    )
148
+
149
+
143150
 @dataclass
144151
 class ToolBatchResult:
145152
     """Outcome of running one assistant-proposed tool batch."""
@@ -439,12 +446,12 @@ class ToolBatchRunner:
439446
         guidance = (
440447
             "The remaining work is review/verification of the generated files. "
441448
             "Do not ask the user for more clarification about the reference pattern now. "
442
-            "Use the generated files as the source of truth and move to verification or "
443
-            "repair from concrete failures in those files."
449
+            "Use the generated files as the source of truth and repair any concrete failures "
450
+            f"in those files. {_verification_handoff_instruction()}"
444451
         )
445452
         if verification_commands:
446453
             self.context.workflow_mode = "verify"
447
-            guidance += " Verification should run next."
454
+            guidance += " Do not run more ad hoc audit commands."
448455
 
449456
         self.context.queue_steering_message(guidance)
450457
         message = Message.tool_result_message(
@@ -591,7 +598,7 @@ class ToolBatchRunner:
591598
                 supplement_existing=True,
592599
             )
593600
             verification_suffix = (
594
-                "Move to verification or final confirmation using the files already on disk."
601
+                _verification_handoff_instruction()
595602
                 if verification_commands
596603
                 else "Finish the current review using the files already on disk."
597604
             )
@@ -729,7 +736,7 @@ class ToolBatchRunner:
729736
 
730737
         if next_pending and _todo_is_consistency_review_step(next_pending):
731738
             verification_suffix = (
732
-                " If no specific mismatch remains, move to verification now."
739
+                " If no specific mismatch remains, finish with a final response so Loader can verify."
733740
                 if verification_commands
734741
                 else " If no specific mismatch remains, finish the task now."
735742
             )
@@ -748,12 +755,13 @@ class ToolBatchRunner:
748755
                 "All explicitly planned artifacts already exist. "
749756
                 f"Use the generated files under {roots_preview} as the source of truth and stop broad rereads. "
750757
                 "If you already know a concrete mismatch, fix it directly. "
751
-                "Verification should run next. Do not reopen reference materials or keep auditing the same files."
758
+                f"{_verification_handoff_instruction()} Do not reopen reference materials "
759
+                "or keep auditing the same files."
752760
             )
753761
             return
754762
 
755763
         verification_suffix = (
756
-            "Move to verification or final confirmation using the files already on disk."
764
+            _verification_handoff_instruction()
757765
             if verification_commands
758766
             else "Finish the task using the files already on disk."
759767
         )
@@ -946,7 +954,7 @@ class ToolBatchRunner:
946954
                 f"with `{next_pending}` using the generated files as the source of truth. "
947955
                 "Do not reopen earlier reference materials."
948956
                 + (
949
-                    " Verification should run next using those generated files."
957
+                    " Finish with a final response so Loader can verify those generated files."
950958
                     if verification_commands
951959
                     else ""
952960
                 )
@@ -956,7 +964,7 @@ class ToolBatchRunner:
956964
         self.context.queue_steering_message(
957965
             "All explicitly planned artifacts already exist. "
958966
             f"Stay within the current output roots under {roots_preview} "
959
-            "and move to verification or final confirmation using the generated files. "
967
+            "and finish with a final response so Loader can verify the generated files. "
960968
             "Do not reopen earlier reference materials."
961969
         )
962970
 
@@ -992,7 +1000,7 @@ class ToolBatchRunner:
9921000
         )
9931001
         if all_planned_artifacts_exist(dod, project_root=self.context.project_root):
9941002
             verification_suffix = (
995
-                " Move to verification or final confirmation using the files already on disk."
1003
+                " " + _verification_handoff_instruction()
9961004
                 if verification_commands
9971005
                 else " If no concrete mismatch remains, stop editing and finish from the files already on disk."
9981006
             )
@@ -1110,7 +1118,7 @@ class ToolBatchRunner:
11101118
                 supplement_existing=True,
11111119
             )
11121120
             verification_suffix = (
1113
-                " Move to verification or final confirmation using the files already on disk."
1121
+                " " + _verification_handoff_instruction()
11141122
                 if verification_commands
11151123
                 else " Finish the task using the files already on disk."
11161124
             )
@@ -1169,7 +1177,7 @@ class ToolBatchRunner:
11691177
             supplement_existing=True,
11701178
         )
11711179
         verification_suffix = (
1172
-            " Move to verification or final confirmation using the files already on disk."
1180
+            " " + _verification_handoff_instruction()
11731181
             if verification_commands
11741182
             else " Finish the task using the files already on disk."
11751183
         )
@@ -1410,7 +1418,7 @@ class ToolBatchRunner:
14101418
                 project_root=self.context.project_root,
14111419
             )
14121420
         ):
1413
-            summary_parts.append("verification should be reviewed next")
1421
+            summary_parts.append("final response should be provided next for Loader verification")
14141422
 
14151423
         result = "; ".join(summary_parts)
14161424
         content = f"Observation [TodoWrite]: Result: {result}"
@@ -1559,7 +1567,7 @@ class ToolBatchRunner:
15591567
 
15601568
         if next_pending and _todo_is_consistency_review_step(next_pending):
15611569
             verification_suffix = (
1562
-                " Move to verification once no specific mismatch remains."
1570
+                " Finish with a final response once no specific mismatch remains so Loader can verify."
15631571
                 if verification_commands
15641572
                 else " Avoid another full reread unless one specific inconsistency is still unknown."
15651573
             )
@@ -1576,8 +1584,8 @@ class ToolBatchRunner:
15761584
             self.context.queue_steering_message(
15771585
                 "All explicitly planned artifacts now exist on disk. "
15781586
                 "Do not expand the artifact set or restart discovery unless a specific gap is "
1579
-                "still known. Move to verification or final confirmation using the files that "
1580
-                "already exist."
1587
+                "still known. Finish with a final response now so Loader can verify the files "
1588
+                "that already exist."
15811589
             )
15821590
 
15831591
     def _queue_missing_artifact_progress_nudge(
@@ -1818,7 +1826,7 @@ class ToolBatchRunner:
18181826
             )
18191827
             if next_pending and _todo_is_consistency_review_step(next_pending):
18201828
                 verification_suffix = (
1821
-                    " Move to verification once no specific mismatch remains."
1829
+                    " Finish with a final response once no specific mismatch remains so Loader can verify."
18221830
                     if verification_commands
18231831
                     else " Finish the targeted consistency pass without reopening reference materials."
18241832
                 )
@@ -1835,14 +1843,15 @@ class ToolBatchRunner:
18351843
                 self.context.set_workflow_mode("verify")
18361844
                 self.context.queue_steering_message(
18371845
                     "Todo tracking is updated. All explicitly planned artifacts now exist on disk. "
1838
-                    "Verification should run next. Use the current output files as the source of truth, "
1839
-                    "and do not restart discovery, reopen reference materials, or spend another turn "
1846
+                    "Finish with a final response now so Loader can run verification automatically. "
1847
+                    "Use the current output files as the source of truth, and do not restart discovery, "
1848
+                    "reopen reference materials, run more ad hoc audit commands, or spend another turn "
18401849
                     "on TodoWrite alone."
18411850
                 )
18421851
                 return
18431852
 
18441853
             verification_suffix = (
1845
-                " Move to verification or final confirmation using the files already on disk."
1854
+                " " + _verification_handoff_instruction()
18461855
                 if verification_commands
18471856
                 else " Finish the task using the files already on disk."
18481857
             )
tests/test_tool_batches.pymodified
@@ -1957,7 +1957,7 @@ async def test_tool_batch_runner_duplicate_read_after_plan_complete_pushes_verif
19571957
     assert len(persistent_messages) == 1
19581958
     assert "All explicitly planned artifacts already exist on disk." in persistent_messages[0]
19591959
     assert (
1960
-        "Move to verification or final confirmation using the files already on disk."
1960
+        "Finish with a final response now so Loader can run verification automatically."
19611961
         in persistent_messages[0]
19621962
     )
19631963
     assert "Create 07-performance-tuning.html" not in persistent_messages[0]
@@ -2078,7 +2078,7 @@ async def test_tool_batch_runner_duplicate_read_after_plan_complete_ignores_stal
20782078
     assert len(persistent_messages) == 1
20792079
     assert "All explicitly planned artifacts already exist on disk." in persistent_messages[0]
20802080
     assert (
2081
-        "Move to verification or final confirmation using the files already on disk."
2081
+        "Finish with a final response now so Loader can run verification automatically."
20822082
         in persistent_messages[0]
20832083
     )
20842084
     assert "Create 01-getting-started.html" not in persistent_messages[0]
@@ -2196,7 +2196,7 @@ async def test_tool_batch_runner_successful_read_after_plan_complete_pushes_revi
21962196
     assert "Ensure all files are properly linked and formatted consistently" in message
21972197
     assert "Create 01-getting-started.html" not in message
21982198
     assert "do not keep broad-rereading the output set" in message
2199
-    assert "If no specific mismatch remains, move to verification now." in message
2199
+    assert "If no specific mismatch remains, finish with a final response so Loader can verify." in message
22002200
 
22012201
 
22022202
 @pytest.mark.asyncio
@@ -2289,7 +2289,7 @@ async def test_tool_batch_runner_successful_read_after_plan_complete_switches_to
22892289
 
22902290
     assert len(persistent_messages) == 1
22912291
     assert "All explicitly planned artifacts already exist." in persistent_messages[0]
2292
-    assert "Verification should run next." in persistent_messages[0]
2292
+    assert "Finish with a final response now so Loader can run verification automatically." in persistent_messages[0]
22932293
     assert "stop broad rereads" in persistent_messages[0]
22942294
     assert ephemeral_messages == []
22952295
     assert context.workflow_mode == "verify"
@@ -3377,7 +3377,8 @@ async def test_tool_batch_runner_hands_off_to_verification_once_planned_artifact
33773377
         for message in persistent_messages
33783378
     )
33793379
     assert any(
3380
-        "Move to verification once no specific mismatch remains." in message
3380
+        "Finish with a final response once no specific mismatch remains so Loader can verify."
3381
+        in message
33813382
         for message in persistent_messages
33823383
     )
33833384
 
@@ -4025,7 +4026,10 @@ async def test_tool_batch_runner_todowrite_after_artifacts_exist_pushes_verifica
40254026
     message = queued_messages[-1]
40264027
     assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
40274028
     assert "Verify all guide files are linked and complete" in message
4028
-    assert "Move to verification once no specific mismatch remains." in message
4029
+    assert (
4030
+        "Finish with a final response once no specific mismatch remains so Loader can verify."
4031
+        in message
4032
+    )
40294033
     assert "reopen reference materials" in message
40304034
     assert "Fortran guide structure" not in message
40314035
     assert context.workflow_mode == "execute"
@@ -4163,7 +4167,7 @@ async def test_tool_batch_runner_todowrite_after_outputs_exist_but_links_missing
41634167
     assert queued_messages
41644168
     message = queued_messages[-1]
41654169
     assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
4166
-    assert "Verification should run next." in message
4170
+    assert "Finish with a final response now so Loader can run verification automatically." in message
41674171
     assert "Repair or verify the current files instead of expanding the artifact set." not in message
41684172
     assert context.workflow_mode == "verify"
41694173
 
@@ -4285,7 +4289,7 @@ async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verif
42854289
     assert len(summary.tool_result_messages) == 1
42864290
     assert context.workflow_mode == "verify"
42874291
     assert queued_messages
4288
-    assert "Verification should run next." in queued_messages[-1]
4292
+    assert "Finish with a final response now so Loader can run verification automatically." in queued_messages[-1]
42894293
 
42904294
 
42914295
 @pytest.mark.asyncio
@@ -4409,12 +4413,15 @@ async def test_tool_batch_runner_todowrite_complete_directory_plan_does_not_rein
44094413
     assert result.continue_after_batch is True
44104414
     assert queued_messages
44114415
     message = queued_messages[-1]
4412
-    assert "Verification should run next." in message
4416
+    assert "Finish with a final response now so Loader can run verification automatically." in message
44134417
     assert "01-introduction.html" not in message
44144418
     assert "chapter files" not in message.lower()
44154419
     assert context.workflow_mode == "verify"
44164420
     assert summary.tool_result_messages
4417
-    assert "verification should be reviewed next" in summary.tool_result_messages[-1].content
4421
+    assert (
4422
+        "final response should be provided next for Loader verification"
4423
+        in summary.tool_result_messages[-1].content
4424
+    )
44184425
     assert "fortran guide structure" not in summary.tool_result_messages[-1].content.lower()
44194426
 
44204427
 
@@ -4532,7 +4539,7 @@ async def test_tool_batch_runner_preempts_post_build_observation_batch_for_verif
45324539
     assert [call.id for call in executor.calls] == ["bash-post-build-audit"]
45334540
     assert context.workflow_mode == "verify"
45344541
     assert queued_messages
4535
-    assert "Verification should run next." in queued_messages[-1]
4542
+    assert "Finish with a final response now so Loader can run verification automatically." in queued_messages[-1]
45364543
 
45374544
 
45384545
 @pytest.mark.asyncio
@@ -4759,7 +4766,7 @@ async def test_tool_batch_runner_skips_post_build_user_question_during_consisten
47594766
     assert queued_messages
47604767
     assert "The remaining work is review/verification of the generated files." in queued_messages[-1]
47614768
     assert "Do not ask the user for more clarification about the reference pattern now." in queued_messages[-1]
4762
-    assert "Verification should run next." in queued_messages[-1]
4769
+    assert "Finish with a final response now so Loader can run verification automatically." in queued_messages[-1]
47634770
     assert context.workflow_mode == "verify"
47644771
     assert summary.tool_result_messages
47654772
     assert "Skipped - stale post-build user question" in summary.tool_result_messages[-1].content
@@ -4878,7 +4885,7 @@ async def test_tool_batch_runner_rewrites_stale_todowrite_summary_from_reconcile
48784885
     assert summary.tool_result_messages
48794886
     message = summary.tool_result_messages[-1].content
48804887
     assert "updated todo list" in message
4881
-    assert "verification should be reviewed next" in message
4888
+    assert "final response should be provided next for Loader verification" in message
48824889
     assert "next pending:" not in message
48834890
     assert "fortran guide structure" not in message.lower()
48844891
 
@@ -5034,7 +5041,7 @@ async def test_tool_batch_runner_todowrite_drops_unplanned_expansion_after_outpu
50345041
     assert queued_messages
50355042
     message = queued_messages[-1]
50365043
     assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
5037
-    assert "Verification should run next." in message
5044
+    assert "Finish with a final response now so Loader can run verification automatically." in message
50385045
     assert "Repair or verify the current files instead of expanding the artifact set." not in message
50395046
     assert "08-troubleshooting.html" not in message
50405047
     assert context.workflow_mode == "verify"
@@ -6369,7 +6376,7 @@ def test_tool_batch_runner_blocked_noop_edit_after_full_build_prefers_verificati
63696376
 
63706377
     assert queued
63716378
     assert "All explicitly planned artifacts already exist." in queued[0]
6372
-    assert "Move to verification or final confirmation using the files already on disk." in queued[0]
6379
+    assert "Finish with a final response now so Loader can run verification automatically." in queued[0]
63736380
     assert "replace the surrounding block" not in queued[0]
63746381
 
63756382
 
@@ -6950,7 +6957,7 @@ def test_tool_batch_runner_blocked_completed_artifact_scope_nudge_prefers_verifi
69506957
     assert "All explicitly planned artifacts already exist." in queued[0]
69516958
     assert "Verify all guide files are linked and complete" in queued[0]
69526959
     assert "Do not reopen earlier reference materials." in queued[0]
6953
-    assert "Verification should run next" in queued[0]
6960
+    assert "Finish with a final response so Loader can verify" in queued[0]
69546961
 
69556962
 
69566963
 def test_tool_batch_runner_blocked_post_build_audit_nudge_switches_to_verify(
@@ -7021,7 +7028,7 @@ def test_tool_batch_runner_blocked_post_build_audit_nudge_switches_to_verify(
70217028
     assert queued
70227029
     assert context.workflow_mode == "verify"
70237030
     assert "All explicitly planned artifacts already exist." in queued[0]
7024
-    assert "move to verification or final confirmation" in queued[0]
7031
+    assert "finish with a final response so Loader can verify" in queued[0]
70257032
 
70267033
 
70277034
 @pytest.mark.asyncio
@@ -7130,7 +7137,7 @@ async def test_tool_batch_runner_does_not_halt_on_repeated_post_build_audit_bloc
71307137
     assert result.consecutive_errors == 0
71317138
     assert context.workflow_mode == "verify"
71327139
     assert queued
7133
-    assert any("move to verification or final confirmation" in message for message in queued)
7140
+    assert any("finish with a final response so Loader can verify" in message for message in queued)
71347141
 
71357142
 
71367143
 def test_tool_batch_runner_blocked_html_declared_target_nudge_uses_closest_declared_target(
@@ -7374,7 +7381,7 @@ def test_tool_batch_runner_blocked_html_declared_file_creation_after_outputs_exi
73747381
     assert queued
73757382
     assert "All explicitly planned artifacts already exist on disk." in queued[0]
73767383
     assert "Do not expand the output set with `chapters/08-advanced-configuration.html`." in queued[0]
7377
-    assert "Move to verification or final confirmation using the files already on disk." in queued[0]
7384
+    assert "Finish with a final response now so Loader can run verification automatically." in queued[0]
73787385
     assert "update the guide root" not in queued[0]
73797386
 
73807387