`6bf8210`

Promote completed builds into verify

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 week ago

SHA: 6bf82100b57788c25f28fe3a4725f453b21a2822
Parents: 5a69d33
Tree: 3caa2ad

2 changed files

Status	File	+	-
M	`src/loader/runtime/tool_batches.py`	20	0
M	`tests/test_tool_batches.py`	103	4

src/loader/runtime/tool_batches.pymodified

+             )
              return
 +        if verification_commands:
 +            self.context.set_workflow_mode("verify")
 +            self.context.queue_steering_message(
 +                "All explicitly planned artifacts already exist. "
 +                f"Use the generated files under {roots_preview} as the source of truth and stop broad rereads. "
 +                "If you already know a concrete mismatch, fix it directly. "
 +                "Verification should run next. Do not reopen reference materials or keep auditing the same files."
 +            )
 +            return
++
          verification_suffix = (
              "Move to verification or final confirmation using the files already on disk."
              if verification_commands
+                 )
                  return
 +            if verification_commands:
 +                self.context.set_workflow_mode("verify")
 +                self.context.queue_steering_message(
 +                    "Todo tracking is updated. All explicitly planned artifacts now exist on disk. "
 +                    "Verification should run next. Use the current output files as the source of truth, "
 +                    "and do not restart discovery, reopen reference materials, or spend another turn "
 +                    "on TodoWrite alone."
 +                )
 +                return
++
              verification_suffix = (
                  " Move to verification or final confirmation using the files already on disk."
                  if verification_commands

tests/test_tool_batches.pymodified

      assert "If no specific mismatch remains, move to verification now." in message
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_successful_read_after_plan_complete_switches_to_verify(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run for this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    persistent_messages: list[str] = []
 +    ephemeral_messages: list[str] = []
 +    context.queue_steering_message_callback = persistent_messages.append
 +    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.verification_commands = [f"ls -la {guide_root}"]
++
 +    tool_call = ToolCall(
 +        id="read-built-verify",
 +        name="read",
 +        arguments={"file_path": str(chapter_one)},
 +    )
 +    executor = FakeExecutor(
 +        [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert len(persistent_messages) == 1
 +    assert "All explicitly planned artifacts already exist." in persistent_messages[0]
 +    assert "Verification should run next." in persistent_messages[0]
 +    assert "stop broad rereads" in persistent_messages[0]
 +    assert ephemeral_messages == []
 +    assert context.workflow_mode == "verify"
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
      temp_dir: Path,
      assert "Move to verification once no specific mismatch remains." in message
      assert "reopen reference materials" in message
      assert "Fortran guide structure" not in message
 +    assert context.workflow_mode == "execute"
  @pytest.mark.asyncio
      assert queued_messages
      message = queued_messages[-1]
      assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
 -    assert "Repair or verify the current files instead of expanding the artifact set." in message
 -    assert "Move to verification or final confirmation using the files already on disk." in message
 +    assert "Verification should run next." in message
 +    assert "Repair or verify the current files instead of expanding the artifact set." not in message
 +    assert context.workflow_mode == "verify"
  @pytest.mark.asyncio
      assert queued_messages
      message = queued_messages[-1]
      assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
 -    assert "Repair or verify the current files instead of expanding the artifact set." in message
 -    assert "Move to verification or final confirmation using the files already on disk." in message
 +    assert "Verification should run next." in message
 +    assert "Repair or verify the current files instead of expanding the artifact set." not in message
      assert "08-troubleshooting.html" not in message
 +    assert context.workflow_mode == "verify"
  @pytest.mark.asyncio