`8056705`

Prioritize concrete missing outputs

Authored by

espadonne 2 weeks ago

SHA: 8056705e83c8772c93189c34988f92e8adf90cfb
Parents: 09f576c
Tree: d28b7a9

4 changed files

Status	File	+	-
M	`src/loader/runtime/tool_batches.py`	42	0
M	`src/loader/runtime/workflow.py`	15	0
M	`tests/test_tool_batches.py`	136	1
M	`tests/test_workflow.py`	36	0

src/loader/runtime/tool_batches.pymodified

              max_items=2,
+         )
          if _should_prioritize_missing_artifact(
 +            dod=dod,
              next_pending=next_pending,
              missing_artifact=missing_artifact,
 +            project_root=self.context.project_root,
          ):
              prefix = "Reuse the earlier observation instead of repeating it. "
              if confirmed_facts:
          if not completed_label or not next_pending or next_pending == completed_label:
              return
          if _should_prioritize_missing_artifact(
 +            dod=dod,
              next_pending=next_pending,
              missing_artifact=missing_artifact,
 +            project_root=self.context.project_root,
          ):
              if not has_artifact_progress:
                  compact_handoff = _compact_missing_artifact_handoff(
              and not _todo_is_mutation_step(next_pending)
              and not _todo_is_consistency_review_step(next_pending)
              and not _should_prioritize_missing_artifact(
 +                dod=dod,
                  next_pending=next_pending,
                  missing_artifact=(
                      missing_artifact
+                     )
                      else None
                  ),
 +                project_root=self.context.project_root,
+             )
          ):
              self.context.queue_steering_message(
  def _should_prioritize_missing_artifact(
      *,
 +    dod: DefinitionOfDone,
      next_pending: str | None,
      missing_artifact: tuple[Path, bool] | None,
 +    project_root: Path,
  ) -> bool:
      if missing_artifact is None:
          return False
      if not next_pending:
          return True
 +    if _pending_todo_conflicts_with_missing_artifact(
 +        dod,
 +        item=next_pending,
 +        missing_artifact=missing_artifact,
 +        project_root=project_root,
 +    ):
 +        return True
      if _todo_is_consistency_review_step(next_pending):
          return True
      return not _todo_is_mutation_step(next_pending)
 +def _pending_todo_conflicts_with_missing_artifact(
 +    dod: DefinitionOfDone,
 +    *,
 +    item: str,
 +    missing_artifact: tuple[Path, bool],
 +    project_root: Path,
 +) -> bool:
 +    text = item.strip().lower()
 +    if not text or item in _TODO_NUDGE_EXCLUDED_ITEMS:
 +        return False
++
 +    target, expect_directory = missing_artifact
 +    inferred_target = infer_pending_todo_output_target(
 +        dod,
 +        item,
 +        project_root=project_root,
 +    )
 +    if inferred_target is None:
 +        return not expect_directory and _todo_is_mutation_step(item)
++
 +    inferred_target = inferred_target.resolve(strict=False)
 +    target = target.resolve(strict=False)
 +    if expect_directory:
 +        return target != inferred_target and target not in inferred_target.parents
 +    return inferred_target != target
++
++
  def _next_missing_planned_artifact(
      dod: DefinitionOfDone,
      *,

src/loader/runtime/workflow.pymodified

      "formatted",
      "formatting",
      "review",
 +    "style",
 +    "same style",
 +    "same structure",
 +    "follow the same",
+ )
  _BROAD_SETUP_HINTS = (
      "directory structure",
+         )
      ):
          return 0
 +    if (
 +        is_discovery_tool
 +        and _todo_requires_complete_artifact_set(text)
 +        and not (
 +            _contains_any(text, _READ_STEP_HINTS)
 +            or _contains_any(text, _SEARCH_STEP_HINTS)
 +            or _contains_any(text, _PARSE_STEP_HINTS)
 +            or _contains_any(text, _VERIFY_STEP_HINTS)
 +        )
 +    ):
 +        return 0
      if basename and basename in text:
          score += 3

tests/test_tool_batches.pymodified

      assert len(queued_messages) == 1
      assert "Reuse the earlier observation instead of repeating it." in queued_messages[0]
 -    assert "Continue with the next pending item: `Create the remaining chapter files`." in queued_messages[0]
 +    assert "A declared output artifact is still missing." in queued_messages[0]
      assert "Resume by creating `04-variables.html` now." in queued_messages[0]
      assert f"Prefer one `write` call for `{temp_dir / 'chapters' / '04-variables.html'}` instead of more rereads." in queued_messages[0]
      assert "Update `" not in queued_messages[0]
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_successful_reference_read_prioritizes_concrete_missing_artifact(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    guide_root = temp_dir / "Loader" / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    chapter_one = chapters / "01-introduction.html"
 +    chapter_one.write_text("<html></html>\n")
 +    index_path = guide_root / "index.html"
++
 +    reference = temp_dir / "Loader" / "guides" / "fortran" / "index.html"
 +    reference.parent.mkdir(parents=True, exist_ok=True)
 +    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapters / '02-installation.html'}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.append(str(chapter_one))
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Examine the existing Fortran guide structure to understand the format and cadence",
 +                "active_form": "Working on: Examine the existing Fortran guide structure to understand the format and cadence",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Create each chapter file with appropriate content",
 +                "active_form": "Working on: Create each chapter file with appropriate content",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Ensure all files follow the same structure and style as the Fortran guide",
 +                "active_form": "Working on: Ensure all files follow the same structure and style as the Fortran guide",
 +                "status": "pending",
 +            },
 +        ],
 +    )
 +    tool_call = ToolCall(
 +        id="read-reference-index",
 +        name="read",
 +        arguments={"file_path": str(reference)},
 +    )
 +    read_output = "Observation [read]: Result: <h1>Fortran Beginner's Guide</h1>\n"
 +    executor = FakeExecutor(
 +        [
 +            ToolExecutionOutcome(
 +                tool_call=tool_call,
 +                state=ToolExecutionState.EXECUTED,
 +                message=Message.tool_result_message(
 +                    tool_call_id=tool_call.id,
 +                    display_content=read_output,
 +                    result_content=read_output,
 +                ),
 +                event_content=read_output,
 +                is_error=False,
 +                result_output=read_output,
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages
 +    assert any(
 +        "Confirmed progress: `Examine the existing Fortran guide structure to understand the format and cadence`"
 +        in message
 +        for message in queued_messages
 +    )
 +    assert any("Resume by creating `index.html` now." in message for message in queued_messages)
 +    assert not any(
 +        "Continue with the next pending item: `Create each chapter file with appropriate content`"
 +        in message
 +        for message in queued_messages
 +    )
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_duplicate_read_ignores_unplanned_expansion_after_plan_complete(
      temp_dir: Path,
          ],
+     )
      assert tool_batches_should_prioritize_missing_artifact(
 +        dod=dod,
          next_pending=dod.pending_items[0],
          missing_artifact=(chapters / "06-ssl-configuration.html", False),
 +        project_root=temp_dir,
+     )
      tool_call = ToolCall(

tests/test_workflow.pymodified

      assert "Link all chapters together properly in the index file" in dod.pending_items
 +def test_advance_todos_from_tool_call_does_not_complete_aggregate_style_step_from_reference_read() -> None:
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create each chapter file with appropriate content",
 +                "active_form": "Working on: Create each chapter file with appropriate content",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Ensure all files follow the same structure and style as the Fortran guide",
 +                "active_form": "Working on: Ensure all files follow the same structure and style as the Fortran guide",
 +                "status": "pending",
 +            },
 +        ],
 +    )
++
 +    assert (
 +        advance_todos_from_tool_call(
 +            dod,
 +            ToolCall(
 +                id="read-reference-index",
 +                name="read",
 +                arguments={"file_path": "~/Loader/guides/fortran/index.html"},
 +            ),
 +        )
 +        is False
 +    )
 +    assert "Create each chapter file with appropriate content" in dod.pending_items
 +    assert (
 +        "Ensure all files follow the same structure and style as the Fortran guide"
 +        in dod.pending_items
 +    )
++
++
  def test_sync_todos_to_definition_of_done_keeps_linking_step_pending_while_artifacts_missing(
      temp_dir: Path,
  ) -> None: