tenseleyflow/loader / 6bf8210

Browse files

Promote completed builds into verify

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
6bf82100b57788c25f28fe3a4725f453b21a2822
Parents
5a69d33
Tree
3caa2ad

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 20 0
M tests/test_tool_batches.py 103 4
src/loader/runtime/tool_batches.pymodified
@@ -639,6 +639,16 @@ class ToolBatchRunner:
639639
             )
640640
             return
641641
 
642
+        if verification_commands:
643
+            self.context.set_workflow_mode("verify")
644
+            self.context.queue_steering_message(
645
+                "All explicitly planned artifacts already exist. "
646
+                f"Use the generated files under {roots_preview} as the source of truth and stop broad rereads. "
647
+                "If you already know a concrete mismatch, fix it directly. "
648
+                "Verification should run next. Do not reopen reference materials or keep auditing the same files."
649
+            )
650
+            return
651
+
642652
         verification_suffix = (
643653
             "Move to verification or final confirmation using the files already on disk."
644654
             if verification_commands
@@ -1630,6 +1640,16 @@ class ToolBatchRunner:
16301640
                 )
16311641
                 return
16321642
 
1643
+            if verification_commands:
1644
+                self.context.set_workflow_mode("verify")
1645
+                self.context.queue_steering_message(
1646
+                    "Todo tracking is updated. All explicitly planned artifacts now exist on disk. "
1647
+                    "Verification should run next. Use the current output files as the source of truth, "
1648
+                    "and do not restart discovery, reopen reference materials, or spend another turn "
1649
+                    "on TodoWrite alone."
1650
+                )
1651
+                return
1652
+
16331653
             verification_suffix = (
16341654
                 " Move to verification or final confirmation using the files already on disk."
16351655
                 if verification_commands
tests/test_tool_batches.pymodified
@@ -2199,6 +2199,102 @@ async def test_tool_batch_runner_successful_read_after_plan_complete_pushes_revi
21992199
     assert "If no specific mismatch remains, move to verification now." in message
22002200
 
22012201
 
2202
+@pytest.mark.asyncio
2203
+async def test_tool_batch_runner_successful_read_after_plan_complete_switches_to_verify(
2204
+    temp_dir: Path,
2205
+) -> None:
2206
+    async def assess_confidence(
2207
+        tool_name: str,
2208
+        tool_args: dict,
2209
+        context: str,
2210
+    ) -> ConfidenceAssessment:
2211
+        raise AssertionError("Confidence scoring should not run for this scenario")
2212
+
2213
+    async def verify_action(
2214
+        tool_name: str,
2215
+        tool_args: dict,
2216
+        result: str,
2217
+        expected: str = "",
2218
+    ) -> ActionVerification:
2219
+        raise AssertionError("Verification should not run for this scenario")
2220
+
2221
+    guide_root = temp_dir / "guides" / "nginx"
2222
+    chapters = guide_root / "chapters"
2223
+    guide_root.mkdir(parents=True)
2224
+    chapters.mkdir()
2225
+    index_path = guide_root / "index.html"
2226
+    chapter_one = chapters / "01-getting-started.html"
2227
+    chapter_two = chapters / "02-installation.html"
2228
+    index_path.write_text("<html></html>\n")
2229
+    chapter_one.write_text("<h1>One</h1>\n")
2230
+    chapter_two.write_text("<h1>Two</h1>\n")
2231
+
2232
+    implementation_plan = temp_dir / "implementation.md"
2233
+    implementation_plan.write_text(
2234
+        "\n".join(
2235
+            [
2236
+                "# Implementation Plan",
2237
+                "",
2238
+                "## File Changes",
2239
+                f"- `{guide_root}/`",
2240
+                f"- `{chapters}/`",
2241
+                f"- `{index_path}`",
2242
+                f"- `{chapter_one}`",
2243
+                f"- `{chapter_two}`",
2244
+                "",
2245
+            ]
2246
+        )
2247
+    )
2248
+
2249
+    context = build_context(
2250
+        temp_dir=temp_dir,
2251
+        messages=[],
2252
+        safeguards=FakeSafeguards(),
2253
+        assess_confidence=assess_confidence,
2254
+        verify_action=verify_action,
2255
+        auto_recover=False,
2256
+    )
2257
+    persistent_messages: list[str] = []
2258
+    ephemeral_messages: list[str] = []
2259
+    context.queue_steering_message_callback = persistent_messages.append
2260
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2261
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2262
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2263
+    dod.implementation_plan = str(implementation_plan)
2264
+    dod.verification_commands = [f"ls -la {guide_root}"]
2265
+
2266
+    tool_call = ToolCall(
2267
+        id="read-built-verify",
2268
+        name="read",
2269
+        arguments={"file_path": str(chapter_one)},
2270
+    )
2271
+    executor = FakeExecutor(
2272
+        [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)]
2273
+    )
2274
+
2275
+    summary = TurnSummary(final_response="")
2276
+    await runner.execute_batch(
2277
+        tool_calls=[tool_call],
2278
+        tool_source="assistant",
2279
+        pending_tool_calls_seen=set(),
2280
+        emit=_noop_emit,
2281
+        summary=summary,
2282
+        dod=dod,
2283
+        executor=executor,  # type: ignore[arg-type]
2284
+        on_confirmation=None,
2285
+        on_user_question=None,
2286
+        emit_confirmation=None,
2287
+        consecutive_errors=0,
2288
+    )
2289
+
2290
+    assert len(persistent_messages) == 1
2291
+    assert "All explicitly planned artifacts already exist." in persistent_messages[0]
2292
+    assert "Verification should run next." in persistent_messages[0]
2293
+    assert "stop broad rereads" in persistent_messages[0]
2294
+    assert ephemeral_messages == []
2295
+    assert context.workflow_mode == "verify"
2296
+
2297
+
22022298
 @pytest.mark.asyncio
22032299
 async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
22042300
     temp_dir: Path,
@@ -3932,6 +4028,7 @@ async def test_tool_batch_runner_todowrite_after_artifacts_exist_pushes_verifica
39324028
     assert "Move to verification once no specific mismatch remains." in message
39334029
     assert "reopen reference materials" in message
39344030
     assert "Fortran guide structure" not in message
4031
+    assert context.workflow_mode == "execute"
39354032
 
39364033
 
39374034
 @pytest.mark.asyncio
@@ -4066,8 +4163,9 @@ async def test_tool_batch_runner_todowrite_after_outputs_exist_but_links_missing
40664163
     assert queued_messages
40674164
     message = queued_messages[-1]
40684165
     assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
4069
-    assert "Repair or verify the current files instead of expanding the artifact set." in message
4070
-    assert "Move to verification or final confirmation using the files already on disk." in message
4166
+    assert "Verification should run next." in message
4167
+    assert "Repair or verify the current files instead of expanding the artifact set." not in message
4168
+    assert context.workflow_mode == "verify"
40714169
 
40724170
 
40734171
 @pytest.mark.asyncio
@@ -4221,9 +4319,10 @@ async def test_tool_batch_runner_todowrite_drops_unplanned_expansion_after_outpu
42214319
     assert queued_messages
42224320
     message = queued_messages[-1]
42234321
     assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
4224
-    assert "Repair or verify the current files instead of expanding the artifact set." in message
4225
-    assert "Move to verification or final confirmation using the files already on disk." in message
4322
+    assert "Verification should run next." in message
4323
+    assert "Repair or verify the current files instead of expanding the artifact set." not in message
42264324
     assert "08-troubleshooting.html" not in message
4325
+    assert context.workflow_mode == "verify"
42274326
 
42284327
 
42294328
 @pytest.mark.asyncio