`edbe397`

Delay verification during builds

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 week ago

SHA: edbe39782458b675172af014a36949d31b0de32f
Parents: c9cf957
Tree: 24e1a2b

2 changed files

Status	File	+	-
M	`src/loader/runtime/tool_batches.py`	31	18
M	`tests/test_tool_batches.py`	90	0

src/loader/runtime/tool_batches.pymodified

          is_mutating = is_state_mutating_tool_call(tool_call)
          previously_verified = dod.last_verification_result == "passed"
          record_successful_tool_call(dod, tool_call)
 -        if previously_verified and is_mutating:
 -            _mark_verification_stale(
 -                context=self.context,
 -                summary=summary,
 -                dod=dod,
 -                tool_call=tool_call,
 -            )
 -        elif is_mutating and _should_plan_verification_for_tool_call(
 -            dod,
 -            tool_call=tool_call,
 -            project_root=self.context.project_root,
 -        ):
 -            _mark_verification_planned(
 -                context=self.context,
 -                summary=summary,
 -                dod=dod,
 -                tool_call=tool_call,
 -            )
          if tool_call.name == "TodoWrite" and outcome.registry_result is not None:
              new_todos = outcome.registry_result.metadata.get("new_todos", [])
              if isinstance(new_todos, list):
                  tool_call=tool_call,
                  dod=dod,
+             )
 +        if previously_verified and is_mutating:
 +            _mark_verification_stale(
 +                context=self.context,
 +                summary=summary,
 +                dod=dod,
 +                tool_call=tool_call,
 +            )
 +        elif is_mutating and _should_plan_verification_for_tool_call(
 +            dod,
 +            tool_call=tool_call,
 +            project_root=self.context.project_root,
 +        ):
 +            _mark_verification_planned(
 +                context=self.context,
 +                summary=summary,
 +                dod=dod,
 +                tool_call=tool_call,
 +            )
          self.dod_store.save(dod)
          recovery_context = self.context.recovery_context
          if recovery_context is not None:
      tool_call: ToolCall,
      project_root: Path,
  ) -> bool:
 +    actionable_pending = [
 +        item
 +        for item in effective_pending_todo_items(
 +            dod,
 +            project_root=project_root,
 +        )
 +        if item not in _TODO_NUDGE_EXCLUDED_ITEMS
 +    ]
 +    if any(
 +        _todo_is_mutation_step(item) or _todo_is_consistency_review_step(item)
 +        for item in actionable_pending
 +    ):
 +        return False
      if tool_call.name in {"write", "edit", "patch"}:
          return True
      if tool_call.name != "bash":

tests/test_tool_batches.pymodified

+     )
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_does_not_mark_verification_planned_while_chapter_build_pending(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +    )
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    nginx_root = temp_dir / "Loader" / "guides" / "nginx"
 +    chapters = nginx_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = nginx_root / "index.html"
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{nginx_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    tool_call = ToolCall(
 +        id="write-index",
 +        name="write",
 +        arguments={"file_path": str(index_path), "content": "<html></html>\n"},
 +    )
 +    executor = FakeExecutor(
 +        [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
 +    )
 +    summary = TurnSummary(final_response="")
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.pending_items.extend(
 +        [
 +            "Develop the main index.html file with proper structure",
 +            "Create first nginx chapter",
 +        ]
 +    )
 +    events: list[AgentEvent] = []
++
 +    async def emit(event: AgentEvent) -> None:
 +        events.append(event)
++
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert dod.last_verification_result is None
 +    assert "Collect verification evidence" not in dod.pending_items
 +    assert "Create first nginx chapter" in dod.pending_items
 +    assert not any(
 +        entry.reason_code == "verification_planned" for entry in summary.workflow_timeline
 +    )
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_marks_passed_verification_stale_after_new_mutation(
      temp_dir: Path,