tenseleyflow/loader / 0bd46e0

Browse files

Preempt post-build audit batches

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
0bd46e04b8a974223491d1f6e6c2ab610e0148f1
Parents
8b65eda
Tree
eec838f

3 changed files

StatusFile+-
M src/loader/runtime/response_route_handlers.py 7 0
M src/loader/runtime/tool_batches.py 35 0
M tests/test_tool_batches.py 112 0
src/loader/runtime/response_route_handlers.pymodified
@@ -140,6 +140,13 @@ class ToolBatchRouteHandler:
140140
             emit_confirmation=emit_confirmation,
141141
             consecutive_errors=context.consecutive_errors,
142142
         )
143
+        if batch_result.continue_after_batch:
144
+            return ResponseRouteDecision(
145
+                action=ResponseRouteAction.CONTINUE,
146
+                continuation_count=context.continuation_count,
147
+                consecutive_errors=batch_result.consecutive_errors,
148
+                new_actions_taken=batch_result.actions_taken,
149
+            )
143150
         if batch_result.halted:
144151
             return ResponseRouteDecision(
145152
                 action=ResponseRouteAction.FINALIZE,
src/loader/runtime/tool_batches.pymodified
@@ -147,6 +147,7 @@ class ToolBatchResult:
147147
     actions_taken: list[str] = field(default_factory=list)
148148
     consecutive_errors: int = 0
149149
     halted: bool = False
150
+    continue_after_batch: bool = False
150151
     final_response: str = ""
151152
 
152153
 
@@ -318,6 +319,12 @@ class ToolBatchRunner:
318319
             # otherwise the model operates blind and loops.
319320
             self.context.session.append(outcome.message)
320321
             summary.tool_result_messages.append(outcome.message)
322
+            if self._should_preempt_for_verification_handoff(
323
+                tool_call=executed_tool_call,
324
+                dod=dod,
325
+            ):
326
+                result.continue_after_batch = True
327
+                return result
321328
             if outcome.state == ToolExecutionState.DUPLICATE:
322329
                 self._queue_duplicate_observation_nudge(tool_call, dod=dod)
323330
             elif outcome.state == ToolExecutionState.BLOCKED:
@@ -394,6 +401,34 @@ class ToolBatchRunner:
394401
 
395402
         return result
396403
 
404
+    def _should_preempt_for_verification_handoff(
405
+        self,
406
+        *,
407
+        tool_call: ToolCall,
408
+        dod: DefinitionOfDone,
409
+    ) -> bool:
410
+        """Yield back to the main loop once post-build work has clearly transitioned to verify."""
411
+
412
+        if self.context.workflow_mode != "verify":
413
+            return False
414
+        if dod.status in {"fixing", "done"}:
415
+            return False
416
+        if not all_planned_artifact_outputs_exist(dod, project_root=self.context.project_root):
417
+            return False
418
+        verification_commands = dod.verification_commands or derive_verification_commands(
419
+            dod,
420
+            project_root=self.context.project_root,
421
+            task_statement=getattr(self.context.session, "current_task", "") or "",
422
+            supplement_existing=True,
423
+        )
424
+        if not verification_commands:
425
+            return False
426
+        return tool_call.name in (
427
+            {"TodoWrite"}
428
+            | _OBSERVATION_TOOLS
429
+            | _BOOKKEEPING_NOTE_TOOL_NAMES
430
+        )
431
+
397432
     def _queue_duplicate_observation_nudge(
398433
         self,
399434
         tool_call: ToolCall,
tests/test_tool_batches.pymodified
@@ -4168,6 +4168,118 @@ async def test_tool_batch_runner_todowrite_after_outputs_exist_but_links_missing
41684168
     assert context.workflow_mode == "verify"
41694169
 
41704170
 
4171
+@pytest.mark.asyncio
4172
+async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verify_handoff(
4173
+    temp_dir: Path,
4174
+) -> None:
4175
+    async def assess_confidence(
4176
+        tool_name: str,
4177
+        tool_args: dict,
4178
+        context: str,
4179
+    ) -> ConfidenceAssessment:
4180
+        raise AssertionError("Confidence scoring should not run for this scenario")
4181
+
4182
+    async def verify_action(
4183
+        tool_name: str,
4184
+        tool_args: dict,
4185
+        result: str,
4186
+        expected: str = "",
4187
+    ) -> ActionVerification:
4188
+        raise AssertionError("Verification should not run for this scenario")
4189
+
4190
+    guide_root = temp_dir / "guides" / "nginx"
4191
+    chapters = guide_root / "chapters"
4192
+    guide_root.mkdir(parents=True)
4193
+    chapters.mkdir()
4194
+    index_path = guide_root / "index.html"
4195
+    chapter_one = chapters / "01-introduction.html"
4196
+    chapter_two = chapters / "02-installation.html"
4197
+    index_path.write_text("<html></html>\n")
4198
+    chapter_one.write_text("<html></html>\n")
4199
+    chapter_two.write_text("<html></html>\n")
4200
+
4201
+    implementation_plan = temp_dir / "implementation.md"
4202
+    implementation_plan.write_text(
4203
+        "\n".join(
4204
+            [
4205
+                "# Implementation Plan",
4206
+                "",
4207
+                "## File Changes",
4208
+                f"- `{guide_root}/`",
4209
+                f"- `{chapters}/`",
4210
+                f"- `{index_path}`",
4211
+                f"- `{chapter_one}`",
4212
+                f"- `{chapter_two}`",
4213
+                "",
4214
+            ]
4215
+        )
4216
+    )
4217
+
4218
+    context = build_context(
4219
+        temp_dir=temp_dir,
4220
+        messages=[],
4221
+        safeguards=FakeSafeguards(),
4222
+        assess_confidence=assess_confidence,
4223
+        verify_action=verify_action,
4224
+        auto_recover=False,
4225
+    )
4226
+    queued_messages: list[str] = []
4227
+    context.queue_steering_message_callback = queued_messages.append
4228
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
4229
+    dod = create_definition_of_done("Create a multi-file nginx guide.")
4230
+    dod.implementation_plan = str(implementation_plan)
4231
+    dod.verification_commands = [f"ls -la {guide_root}"]
4232
+
4233
+    todo_call = ToolCall(
4234
+        id="todo-post-build-preempt",
4235
+        name="TodoWrite",
4236
+        arguments={"todos": []},
4237
+    )
4238
+    audit_read = ToolCall(
4239
+        id="read-after-todo",
4240
+        name="read",
4241
+        arguments={"file_path": str(index_path)},
4242
+    )
4243
+    executor = FakeExecutor(
4244
+        [
4245
+            tool_outcome(
4246
+                tool_call=todo_call,
4247
+                output="Todos updated",
4248
+                is_error=False,
4249
+                metadata={"new_todos": []},
4250
+            ),
4251
+            tool_outcome(
4252
+                tool_call=audit_read,
4253
+                output=index_path.read_text(),
4254
+                is_error=False,
4255
+            ),
4256
+        ]
4257
+    )
4258
+
4259
+    summary = TurnSummary(final_response="")
4260
+    result = await runner.execute_batch(
4261
+        tool_calls=[todo_call, audit_read],
4262
+        tool_source="assistant",
4263
+        pending_tool_calls_seen=set(),
4264
+        emit=_noop_emit,
4265
+        summary=summary,
4266
+        dod=dod,
4267
+        executor=executor,  # type: ignore[arg-type]
4268
+        on_confirmation=None,
4269
+        on_user_question=None,
4270
+        emit_confirmation=None,
4271
+        consecutive_errors=0,
4272
+    )
4273
+
4274
+    assert result.continue_after_batch is True
4275
+    assert result.halted is False
4276
+    assert [call.id for call in executor.calls] == ["todo-post-build-preempt"]
4277
+    assert len(summary.tool_result_messages) == 1
4278
+    assert context.workflow_mode == "verify"
4279
+    assert queued_messages
4280
+    assert "Verification should run next." in queued_messages[-1]
4281
+
4282
+
41714283
 @pytest.mark.asyncio
41724284
 async def test_tool_batch_runner_todowrite_drops_unplanned_expansion_after_outputs_exist(
41734285
     temp_dir: Path,