`78735b6`

Stop reinferring complete guide children

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 1 week ago

SHA: 78735b613d47105f5fc5659047dc80a70e581563
Parents: 0bd46e0
Tree: 82cb2c8

2 changed files

Status	File	+	-
M	`src/loader/runtime/tool_batches.py`	18	0
M	`tests/test_tool_batches.py`	127	0

src/loader/runtime/tool_batches.pymodified

              project_root=project_root,
          ):
              return target, expect_directory
 +    if not _pending_mutation_work_requires_inferred_children(
 +        dod,
 +        project_root=project_root,
 +    ):
 +        return None
      for target, expect_directory in collect_planned_artifact_targets(
          dod,
          project_root=project_root,
      return None
 +def _pending_mutation_work_requires_inferred_children(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +) -> bool:
 +    actionable_pending = [
 +        item
 +        for item in effective_pending_todo_items(dod, project_root=project_root)
 +        if item not in _TODO_NUDGE_EXCLUDED_ITEMS
 +    ]
 +    return any(_todo_is_mutation_step(item) for item in actionable_pending)
++
++
  def _prefer_missing_artifact_for_pending_item(
      dod: DefinitionOfDone,
      *,

tests/test_tool_batches.pymodified

      assert "Verification should run next." in queued_messages[-1]
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_todowrite_complete_directory_plan_does_not_reinfer_first_child(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run for this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
 +    reference.parent.mkdir(parents=True)
 +    reference.write_text("<h1>Introduction</h1>\n")
++
 +    guide_root = temp_dir / "Loader" / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-introduction.html"
 +    chapter_two = chapters / "02-installation.html"
 +    chapter_three = chapters / "03-basic-configuration.html"
 +    index_path.write_text(
 +        "\n".join(
 +            [
 +                '<a href="chapters/01-introduction.html">Introduction</a>',
 +                '<a href="chapters/02-installation.html">Installation</a>',
 +                '<a href="chapters/03-basic-configuration.html">Configuration</a>',
 +                "",
 +            ]
 +        )
 +    )
 +    chapter_one.write_text("<html></html>\n")
 +    chapter_two.write_text("<html></html>\n")
 +    chapter_three.write_text("<html></html>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root / 'index.html'}`",
 +                f"- `{chapters}/`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    messages = [
 +        Message(
 +            role=Role.ASSISTANT,
 +            content="I examined the reference guide structure.",
 +            tool_calls=[
 +                ToolCall(
 +                    id="read-reference-child",
 +                    name="read",
 +                    arguments={"file_path": str(reference)},
 +                )
 +            ],
 +        )
 +    ]
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=messages,
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create an equally thorough nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.verification_commands = [f"ls -la {guide_root}"]
++
 +    todo_call = ToolCall(
 +        id="todo-complete-directory-plan",
 +        name="TodoWrite",
 +        arguments={"todos": []},
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=todo_call,
 +                output="Todos updated",
 +                is_error=False,
 +                metadata={"new_todos": []},
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    result = await runner.execute_batch(
 +        tool_calls=[todo_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert result.continue_after_batch is True
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert "Verification should run next." in message
 +    assert "01-introduction.html" not in message
 +    assert "chapter files" not in message.lower()
 +    assert context.workflow_mode == "verify"
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_todowrite_drops_unplanned_expansion_after_outputs_exist(
      temp_dir: Path,