`6bf8210`

Promote completed builds into verify

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 week ago

SHA: 6bf82100b57788c25f28fe3a4725f453b21a2822
Parents: 5a69d33
Tree: 3caa2ad

2 changed files

Status	File	+	-
M	`src/loader/runtime/tool_batches.py`	20	0
M	`tests/test_tool_batches.py`	103	4

src/loader/runtime/tool_batches.pymodified

+             )
              return
++        if verification_commands:
++            self.context.set_workflow_mode("verify")
++            self.context.queue_steering_message(
++                "All explicitly planned artifacts already exist. "
++                f"Use the generated files under {roots_preview} as the source of truth and stop broad rereads. "
++                "If you already know a concrete mismatch, fix it directly. "
++                "Verification should run next. Do not reopen reference materials or keep auditing the same files."
++            )
++            return
++
          verification_suffix = (
              "Move to verification or final confirmation using the files already on disk."
              if verification_commands
+                 )
                  return
++            if verification_commands:
++                self.context.set_workflow_mode("verify")
++                self.context.queue_steering_message(
++                    "Todo tracking is updated. All explicitly planned artifacts now exist on disk. "
++                    "Verification should run next. Use the current output files as the source of truth, "
++                    "and do not restart discovery, reopen reference materials, or spend another turn "
++                    "on TodoWrite alone."
++                )
++                return
++
              verification_suffix = (
                  " Move to verification or final confirmation using the files already on disk."
                  if verification_commands

tests/test_tool_batches.pymodified

      assert "If no specific mismatch remains, move to verification now." in message
++@pytest.mark.asyncio
++async def test_tool_batch_runner_successful_read_after_plan_complete_switches_to_verify(
++    temp_dir: Path,
++) -> None:
++    async def assess_confidence(
++        tool_name: str,
++        tool_args: dict,
++        context: str,
++    ) -> ConfidenceAssessment:
++        raise AssertionError("Confidence scoring should not run for this scenario")
++
++    async def verify_action(
++        tool_name: str,
++        tool_args: dict,
++        result: str,
++        expected: str = "",
++    ) -> ActionVerification:
++        raise AssertionError("Verification should not run for this scenario")
++
++    guide_root = temp_dir / "guides" / "nginx"
++    chapters = guide_root / "chapters"
++    guide_root.mkdir(parents=True)
++    chapters.mkdir()
++    index_path = guide_root / "index.html"
++    chapter_one = chapters / "01-getting-started.html"
++    chapter_two = chapters / "02-installation.html"
++    index_path.write_text("<html></html>\n")
++    chapter_one.write_text("<h1>One</h1>\n")
++    chapter_two.write_text("<h1>Two</h1>\n")
++
++    implementation_plan = temp_dir / "implementation.md"
++    implementation_plan.write_text(
++        "\n".join(
++            [
++                "# Implementation Plan",
++                "",
++                "## File Changes",
++                f"- `{guide_root}/`",
++                f"- `{chapters}/`",
++                f"- `{index_path}`",
++                f"- `{chapter_one}`",
++                f"- `{chapter_two}`",
++                "",
++            ]
++        )
++    )
++
++    context = build_context(
++        temp_dir=temp_dir,
++        messages=[],
++        safeguards=FakeSafeguards(),
++        assess_confidence=assess_confidence,
++        verify_action=verify_action,
++        auto_recover=False,
++    )
++    persistent_messages: list[str] = []
++    ephemeral_messages: list[str] = []
++    context.queue_steering_message_callback = persistent_messages.append
++    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
++    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
++    dod = create_definition_of_done("Create a multi-file nginx guide.")
++    dod.implementation_plan = str(implementation_plan)
++    dod.verification_commands = [f"ls -la {guide_root}"]
++
++    tool_call = ToolCall(
++        id="read-built-verify",
++        name="read",
++        arguments={"file_path": str(chapter_one)},
++    )
++    executor = FakeExecutor(
++        [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)]
++    )
++
++    summary = TurnSummary(final_response="")
++    await runner.execute_batch(
++        tool_calls=[tool_call],
++        tool_source="assistant",
++        pending_tool_calls_seen=set(),
++        emit=_noop_emit,
++        summary=summary,
++        dod=dod,
++        executor=executor,  # type: ignore[arg-type]
++        on_confirmation=None,
++        on_user_question=None,
++        emit_confirmation=None,
++        consecutive_errors=0,
++    )
++
++    assert len(persistent_messages) == 1
++    assert "All explicitly planned artifacts already exist." in persistent_messages[0]
++    assert "Verification should run next." in persistent_messages[0]
++    assert "stop broad rereads" in persistent_messages[0]
++    assert ephemeral_messages == []
++    assert context.workflow_mode == "verify"
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
      temp_dir: Path,
      assert "Move to verification once no specific mismatch remains." in message
      assert "reopen reference materials" in message
      assert "Fortran guide structure" not in message
++    assert context.workflow_mode == "execute"
  @pytest.mark.asyncio
      assert queued_messages
      message = queued_messages[-1]
      assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
--    assert "Repair or verify the current files instead of expanding the artifact set." in message
++    assert "Verification should run next." in message
--    assert "Move to verification or final confirmation using the files already on disk." in message
++    assert "Repair or verify the current files instead of expanding the artifact set." not in message
++    assert context.workflow_mode == "verify"
  @pytest.mark.asyncio
      assert queued_messages
      message = queued_messages[-1]
      assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
--    assert "Repair or verify the current files instead of expanding the artifact set." in message
++    assert "Verification should run next." in message
--    assert "Move to verification or final confirmation using the files already on disk." in message
++    assert "Repair or verify the current files instead of expanding the artifact set." not in message
      assert "08-troubleshooting.html" not in message
++    assert context.workflow_mode == "verify"
  @pytest.mark.asyncio