`add1107`

Defer fatal post-build audit blocks

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 week ago

SHA: add1107fc745593fa33230bedd295ea341707493
Parents: fac914c
Tree: ea1c0a5

2 changed files

Status	File	+	-
M	`src/loader/runtime/tool_batches.py`	14	1
M	`tests/test_tool_batches.py`	109	0

src/loader/runtime/tool_batches.pymodified

                      return result
              if outcome.is_error:
 -                result.consecutive_errors += 1
 +                if _is_recoverable_guidance_block(outcome.event_content):
 +                    result.consecutive_errors = 0
 +                else:
 +                    result.consecutive_errors += 1
              else:
                  result.consecutive_errors = 0
      return False
 +def _is_recoverable_guidance_block(event_content: str) -> bool:
 +    """Return whether a blocked observation should steer without tripping fatal error limits."""
++
 +    normalized = str(event_content or "")
 +    return (
 +        "[Blocked - completed artifact set scope:" in normalized
 +        or "[Blocked - post-build audit loop:" in normalized
 +    )
++
++
  def _tool_call_label(tool_call: ToolCall) -> str:
      """Human-readable label for one tool call."""
      name = tool_call.name

tests/test_tool_batches.pymodified

      assert "move to verification or final confirmation" in queued[0]
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_does_not_halt_on_repeated_post_build_audit_blocks(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guide"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("index")
 +    chapter_one.write_text("one")
 +    chapter_two.write_text("two")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}`",
 +                f"- `{chapters}`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    queued: list[str] = []
 +    context.queue_steering_message_callback = queued.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file guide from a reference")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.verification_commands = [f"ls -la {guide_root}"]
++
 +    blocked_message = (
 +        "[Blocked - post-build audit loop: all explicitly planned artifacts already exist.]"
 +    )
 +    tool_calls = [
 +        ToolCall(
 +            id=f"audit-{index}",
 +            name="bash",
 +            arguments={"command": f"cd {temp_dir} && ls -la guide/chapters/"},
 +        )
 +        for index in range(1, 4)
 +    ]
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output=blocked_message,
 +                is_error=True,
 +                state=ToolExecutionState.BLOCKED,
 +            )
 +            for tool_call in tool_calls
 +        ]
 +    )
 +    events: list[AgentEvent] = []
++
 +    async def emit(event: AgentEvent) -> None:
 +        events.append(event)
++
 +    result = await runner.execute_batch(
 +        tool_calls=tool_calls,
 +        tool_source="native",
 +        pending_tool_calls_seen=set(),
 +        emit=emit,
 +        summary=TurnSummary(final_response=""),
 +        dod=dod,
 +        executor=executor,
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert result.halted is False
 +    assert result.consecutive_errors == 0
 +    assert context.workflow_mode == "verify"
 +    assert queued
 +    assert any("move to verification or final confirmation" in message for message in queued)
++
++
  def test_tool_batch_runner_blocked_html_declared_target_nudge_uses_closest_declared_target(
      temp_dir: Path,
  ) -> None: