tenseleyflow/loader / add1107

Browse files

Defer fatal post-build audit blocks

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
add1107fc745593fa33230bedd295ea341707493
Parents
fac914c
Tree
ea1c0a5

2 changed files

StatusFile+-
M src/loader/runtime/tool_batches.py 14 1
M tests/test_tool_batches.py 109 0
src/loader/runtime/tool_batches.pymodified
@@ -289,7 +289,10 @@ class ToolBatchRunner:
289289
                     return result
290290
 
291291
             if outcome.is_error:
292
-                result.consecutive_errors += 1
292
+                if _is_recoverable_guidance_block(outcome.event_content):
293
+                    result.consecutive_errors = 0
294
+                else:
295
+                    result.consecutive_errors += 1
293296
             else:
294297
                 result.consecutive_errors = 0
295298
 
@@ -2810,6 +2813,16 @@ def _recent_recovery_prompt(messages: list[Any]) -> bool:
28102813
     return False
28112814
 
28122815
 
2816
+def _is_recoverable_guidance_block(event_content: str) -> bool:
2817
+    """Return whether a blocked observation should steer without tripping fatal error limits."""
2818
+
2819
+    normalized = str(event_content or "")
2820
+    return (
2821
+        "[Blocked - completed artifact set scope:" in normalized
2822
+        or "[Blocked - post-build audit loop:" in normalized
2823
+    )
2824
+
2825
+
28132826
 def _tool_call_label(tool_call: ToolCall) -> str:
28142827
     """Human-readable label for one tool call."""
28152828
     name = tool_call.name
tests/test_tool_batches.pymodified
@@ -6309,6 +6309,115 @@ def test_tool_batch_runner_blocked_post_build_audit_nudge_switches_to_verify(
63096309
     assert "move to verification or final confirmation" in queued[0]
63106310
 
63116311
 
6312
+@pytest.mark.asyncio
6313
+async def test_tool_batch_runner_does_not_halt_on_repeated_post_build_audit_blocks(
6314
+    temp_dir: Path,
6315
+) -> None:
6316
+    async def assess_confidence(
6317
+        tool_name: str,
6318
+        tool_args: dict,
6319
+        context: str,
6320
+    ) -> ConfidenceAssessment:
6321
+        raise AssertionError("Confidence scoring should be disabled in this scenario")
6322
+
6323
+    async def verify_action(
6324
+        tool_name: str,
6325
+        tool_args: dict,
6326
+        result: str,
6327
+        expected: str = "",
6328
+    ) -> ActionVerification:
6329
+        raise AssertionError("Verification should not run in this scenario")
6330
+
6331
+    guide_root = temp_dir / "guide"
6332
+    chapters = guide_root / "chapters"
6333
+    guide_root.mkdir(parents=True)
6334
+    chapters.mkdir()
6335
+    index_path = guide_root / "index.html"
6336
+    chapter_one = chapters / "01-getting-started.html"
6337
+    chapter_two = chapters / "02-installation.html"
6338
+    index_path.write_text("index")
6339
+    chapter_one.write_text("one")
6340
+    chapter_two.write_text("two")
6341
+
6342
+    implementation_plan = temp_dir / "implementation.md"
6343
+    implementation_plan.write_text(
6344
+        "\n".join(
6345
+            [
6346
+                "# Implementation Plan",
6347
+                "",
6348
+                "## File Changes",
6349
+                f"- `{guide_root}`",
6350
+                f"- `{chapters}`",
6351
+                f"- `{index_path}`",
6352
+                f"- `{chapter_one}`",
6353
+                f"- `{chapter_two}`",
6354
+                "",
6355
+            ]
6356
+        )
6357
+    )
6358
+
6359
+    context = build_context(
6360
+        temp_dir=temp_dir,
6361
+        messages=[],
6362
+        safeguards=FakeSafeguards(),
6363
+        assess_confidence=assess_confidence,
6364
+        verify_action=verify_action,
6365
+    )
6366
+    queued: list[str] = []
6367
+    context.queue_steering_message_callback = queued.append
6368
+    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
6369
+    dod = create_definition_of_done("Create a multi-file guide from a reference")
6370
+    dod.implementation_plan = str(implementation_plan)
6371
+    dod.verification_commands = [f"ls -la {guide_root}"]
6372
+
6373
+    blocked_message = (
6374
+        "[Blocked - post-build audit loop: all explicitly planned artifacts already exist.]"
6375
+    )
6376
+    tool_calls = [
6377
+        ToolCall(
6378
+            id=f"audit-{index}",
6379
+            name="bash",
6380
+            arguments={"command": f"cd {temp_dir} && ls -la guide/chapters/"},
6381
+        )
6382
+        for index in range(1, 4)
6383
+    ]
6384
+    executor = FakeExecutor(
6385
+        [
6386
+            tool_outcome(
6387
+                tool_call=tool_call,
6388
+                output=blocked_message,
6389
+                is_error=True,
6390
+                state=ToolExecutionState.BLOCKED,
6391
+            )
6392
+            for tool_call in tool_calls
6393
+        ]
6394
+    )
6395
+    events: list[AgentEvent] = []
6396
+
6397
+    async def emit(event: AgentEvent) -> None:
6398
+        events.append(event)
6399
+
6400
+    result = await runner.execute_batch(
6401
+        tool_calls=tool_calls,
6402
+        tool_source="native",
6403
+        pending_tool_calls_seen=set(),
6404
+        emit=emit,
6405
+        summary=TurnSummary(final_response=""),
6406
+        dod=dod,
6407
+        executor=executor,
6408
+        on_confirmation=None,
6409
+        on_user_question=None,
6410
+        emit_confirmation=None,
6411
+        consecutive_errors=0,
6412
+    )
6413
+
6414
+    assert result.halted is False
6415
+    assert result.consecutive_errors == 0
6416
+    assert context.workflow_mode == "verify"
6417
+    assert queued
6418
+    assert any("move to verification or final confirmation" in message for message in queued)
6419
+
6420
+
63126421
 def test_tool_batch_runner_blocked_html_declared_target_nudge_uses_closest_declared_target(
63136422
     temp_dir: Path,
63146423
 ) -> None: