tenseleyflow/loader / 6bf8210

Browse files

Promote completed builds into verify

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
6bf82100b57788c25f28fe3a4725f453b21a2822
Parents
5a69d33
Tree
3caa2ad

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 20 0
M tests/test_tool_batches.py 103 4
src/loader/runtime/tool_batches.pymodified
@@ -639,6 +639,16 @@ class ToolBatchRunner:
639
             )
639
             )
640
             return
640
             return
641
 
641
 
642
+        if verification_commands:
643
+            self.context.set_workflow_mode("verify")
644
+            self.context.queue_steering_message(
645
+                "All explicitly planned artifacts already exist. "
646
+                f"Use the generated files under {roots_preview} as the source of truth and stop broad rereads. "
647
+                "If you already know a concrete mismatch, fix it directly. "
648
+                "Verification should run next. Do not reopen reference materials or keep auditing the same files."
649
+            )
650
+            return
651
+
642
         verification_suffix = (
652
         verification_suffix = (
643
             "Move to verification or final confirmation using the files already on disk."
653
             "Move to verification or final confirmation using the files already on disk."
644
             if verification_commands
654
             if verification_commands
@@ -1630,6 +1640,16 @@ class ToolBatchRunner:
1630
                 )
1640
                 )
1631
                 return
1641
                 return
1632
 
1642
 
1643
+            if verification_commands:
1644
+                self.context.set_workflow_mode("verify")
1645
+                self.context.queue_steering_message(
1646
+                    "Todo tracking is updated. All explicitly planned artifacts now exist on disk. "
1647
+                    "Verification should run next. Use the current output files as the source of truth, "
1648
+                    "and do not restart discovery, reopen reference materials, or spend another turn "
1649
+                    "on TodoWrite alone."
1650
+                )
1651
+                return
1652
+
1633
             verification_suffix = (
1653
             verification_suffix = (
1634
                 " Move to verification or final confirmation using the files already on disk."
1654
                 " Move to verification or final confirmation using the files already on disk."
1635
                 if verification_commands
1655
                 if verification_commands
tests/test_tool_batches.pymodified
@@ -2199,6 +2199,102 @@ async def test_tool_batch_runner_successful_read_after_plan_complete_pushes_revi
2199
     assert "If no specific mismatch remains, move to verification now." in message
2199
     assert "If no specific mismatch remains, move to verification now." in message
2200
 
2200
 
2201
 
2201
 
2202
+@pytest.mark.asyncio
2203
+async def test_tool_batch_runner_successful_read_after_plan_complete_switches_to_verify(
2204
+    temp_dir: Path,
2205
+) -> None:
2206
+    async def assess_confidence(
2207
+        tool_name: str,
2208
+        tool_args: dict,
2209
+        context: str,
2210
+    ) -> ConfidenceAssessment:
2211
+        raise AssertionError("Confidence scoring should not run for this scenario")
2212
+
2213
+    async def verify_action(
2214
+        tool_name: str,
2215
+        tool_args: dict,
2216
+        result: str,
2217
+        expected: str = "",
2218
+    ) -> ActionVerification:
2219
+        raise AssertionError("Verification should not run for this scenario")
2220
+
2221
+    guide_root = temp_dir / "guides" / "nginx"
2222
+    chapters = guide_root / "chapters"
2223
+    guide_root.mkdir(parents=True)
2224
+    chapters.mkdir()
2225
+    index_path = guide_root / "index.html"
2226
+    chapter_one = chapters / "01-getting-started.html"
2227
+    chapter_two = chapters / "02-installation.html"
2228
+    index_path.write_text("<html></html>\n")
2229
+    chapter_one.write_text("<h1>One</h1>\n")
2230
+    chapter_two.write_text("<h1>Two</h1>\n")
2231
+
2232
+    implementation_plan = temp_dir / "implementation.md"
2233
+    implementation_plan.write_text(
2234
+        "\n".join(
2235
+            [
2236
+                "# Implementation Plan",
2237
+                "",
2238
+                "## File Changes",
2239
+                f"- `{guide_root}/`",
2240
+                f"- `{chapters}/`",
2241
+                f"- `{index_path}`",
2242
+                f"- `{chapter_one}`",
2243
+                f"- `{chapter_two}`",
2244
+                "",
2245
+            ]
2246
+        )
2247
+    )
2248
+
2249
+    context = build_context(
2250
+        temp_dir=temp_dir,
2251
+        messages=[],
2252
+        safeguards=FakeSafeguards(),
2253
+        assess_confidence=assess_confidence,
2254
+        verify_action=verify_action,
2255
+        auto_recover=False,
2256
+    )
2257
+    persistent_messages: list[str] = []
2258
+    ephemeral_messages: list[str] = []
2259
+    context.queue_steering_message_callback = persistent_messages.append
2260
+    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
2261
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
2262
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
2263
+    dod.implementation_plan = str(implementation_plan)
2264
+    dod.verification_commands = [f"ls -la {guide_root}"]
2265
+
2266
+    tool_call = ToolCall(
2267
+        id="read-built-verify",
2268
+        name="read",
2269
+        arguments={"file_path": str(chapter_one)},
2270
+    )
2271
+    executor = FakeExecutor(
2272
+        [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)]
2273
+    )
2274
+
2275
+    summary = TurnSummary(final_response="")
2276
+    await runner.execute_batch(
2277
+        tool_calls=[tool_call],
2278
+        tool_source="assistant",
2279
+        pending_tool_calls_seen=set(),
2280
+        emit=_noop_emit,
2281
+        summary=summary,
2282
+        dod=dod,
2283
+        executor=executor,  # type: ignore[arg-type]
2284
+        on_confirmation=None,
2285
+        on_user_question=None,
2286
+        emit_confirmation=None,
2287
+        consecutive_errors=0,
2288
+    )
2289
+
2290
+    assert len(persistent_messages) == 1
2291
+    assert "All explicitly planned artifacts already exist." in persistent_messages[0]
2292
+    assert "Verification should run next." in persistent_messages[0]
2293
+    assert "stop broad rereads" in persistent_messages[0]
2294
+    assert ephemeral_messages == []
2295
+    assert context.workflow_mode == "verify"
2296
+
2297
+
2202
 @pytest.mark.asyncio
2298
 @pytest.mark.asyncio
2203
 async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
2299
 async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
2204
     temp_dir: Path,
2300
     temp_dir: Path,
@@ -3932,6 +4028,7 @@ async def test_tool_batch_runner_todowrite_after_artifacts_exist_pushes_verifica
3932
     assert "Move to verification once no specific mismatch remains." in message
4028
     assert "Move to verification once no specific mismatch remains." in message
3933
     assert "reopen reference materials" in message
4029
     assert "reopen reference materials" in message
3934
     assert "Fortran guide structure" not in message
4030
     assert "Fortran guide structure" not in message
4031
+    assert context.workflow_mode == "execute"
3935
 
4032
 
3936
 
4033
 
3937
 @pytest.mark.asyncio
4034
 @pytest.mark.asyncio
@@ -4066,8 +4163,9 @@ async def test_tool_batch_runner_todowrite_after_outputs_exist_but_links_missing
4066
     assert queued_messages
4163
     assert queued_messages
4067
     message = queued_messages[-1]
4164
     message = queued_messages[-1]
4068
     assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
4165
     assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
4069
-    assert "Repair or verify the current files instead of expanding the artifact set." in message
4166
+    assert "Verification should run next." in message
4070
-    assert "Move to verification or final confirmation using the files already on disk." in message
4167
+    assert "Repair or verify the current files instead of expanding the artifact set." not in message
4168
+    assert context.workflow_mode == "verify"
4071
 
4169
 
4072
 
4170
 
4073
 @pytest.mark.asyncio
4171
 @pytest.mark.asyncio
@@ -4221,9 +4319,10 @@ async def test_tool_batch_runner_todowrite_drops_unplanned_expansion_after_outpu
4221
     assert queued_messages
4319
     assert queued_messages
4222
     message = queued_messages[-1]
4320
     message = queued_messages[-1]
4223
     assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
4321
     assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
4224
-    assert "Repair or verify the current files instead of expanding the artifact set." in message
4322
+    assert "Verification should run next." in message
4225
-    assert "Move to verification or final confirmation using the files already on disk." in message
4323
+    assert "Repair or verify the current files instead of expanding the artifact set." not in message
4226
     assert "08-troubleshooting.html" not in message
4324
     assert "08-troubleshooting.html" not in message
4325
+    assert context.workflow_mode == "verify"
4227
 
4326
 
4228
 
4327
 
4229
 @pytest.mark.asyncio
4328
 @pytest.mark.asyncio