`18ae40f`

Name missing files from observed shapes

Authored by

espadonne 2 weeks ago

SHA: 18ae40ffbc2319aac5500fca07bd6adefd7c3b5a
Parents: 39c0fab
Tree: 50f77f0

5 changed files

Status	File	+	-
M	`src/loader/runtime/dod.py`	73	1
M	`src/loader/runtime/repair.py`	17	5
M	`src/loader/runtime/tool_batches.py`	24	4
M	`tests/test_repair.py`	77	1
M	`tests/test_tool_batches.py`	137	0

src/loader/runtime/dod.pymodified

  from pathlib import Path
  from typing import Any, Literal
 -from ..llm.base import ToolCall
 +from ..llm.base import Message, ToolCall
  from ..tools.shell_tools import BashTool
  from .verification_observations import VerificationAttempt, verification_attempt_id
      return missing_targets[0] if missing_targets else None
 +def infer_next_output_file(
 +    *,
 +    target: Path,
 +    project_root: Path,
 +    messages: list[Message] | None = None,
 +) -> tuple[Path | None, str | None]:
 +    """Infer the next concrete output file for a planned output directory.
++
 +    Returns a tuple of `(path, source)` where source is one of:
 +    - `"declared"` when inferred from the current artifact graph
 +    - `"observed"` when mirrored from an already-inspected sibling directory
 +    """
++
 +    declared_target = infer_next_declared_html_output_file(
 +        target=target,
 +        project_root=project_root,
 +    )
 +    if declared_target is not None:
 +        return declared_target, "declared"
++
 +    observed_target = _infer_next_observed_output_file(
 +        target=target,
 +        messages=messages or [],
 +    )
 +    if observed_target is not None:
 +        return observed_target, "observed"
 +    return None, None
++
++
  def collect_missing_declared_html_output_files(
      *,
      target: Path,
      return tuple(missing_targets)
 +def _infer_next_observed_output_file(
 +    *,
 +    target: Path,
 +    messages: list[Message],
 +) -> Path | None:
 +    normalized_target = target.resolve(strict=False)
 +    if normalized_target.suffix:
 +        return None
++
 +    existing_names = {
 +        path.name
 +        for path in normalized_target.glob("*.html")
 +        if path.is_file()
 +    }
 +    candidate_names: set[str] = set()
 +    for message in messages:
 +        for tool_call in getattr(message, "tool_calls", []) or []:
 +            if tool_call.name != "read":
 +                continue
 +            raw_path = str(tool_call.arguments.get("file_path", "")).strip()
 +            if not raw_path:
 +                continue
 +            observed_path = Path(raw_path).expanduser().resolve(strict=False)
 +            if observed_path.suffix.lower() not in {".html", ".htm"}:
 +                continue
 +            if observed_path.name.lower() == "index.html":
 +                continue
 +            if observed_path.parent.name != normalized_target.name:
 +                continue
 +            try:
 +                observed_path.relative_to(normalized_target)
 +                continue
 +            except ValueError:
 +                pass
 +            if observed_path.name in existing_names:
 +                continue
 +            candidate_names.add(observed_path.name)
++
 +    if not candidate_names:
 +        return None
 +    return normalized_target / sorted(candidate_names)[0]
++
++
  def _build_planned_artifact_verification_commands(
      targets: list[tuple[Path, bool]],
  ) -> list[str]:

src/loader/runtime/repair.pymodified

  from .dod import (
      DefinitionOfDone,
      collect_planned_artifact_targets,
 -    infer_next_declared_html_output_file,
 +    infer_next_output_file,
      planned_artifact_target_satisfied,
+ )
  from .parsing import parse_tool_calls
              (None, False),
+         )
          if first_missing_target is not None and first_missing_is_directory:
 -            next_output_file = infer_next_declared_html_output_file(
 +            next_output_file, next_output_source = infer_next_output_file(
                  target=first_missing_target,
                  project_root=self.context.project_root,
 +                messages=list(getattr(self.context.session, "messages", []) or []),
+             )
              if next_output_file is not None:
 -                lines.append(
 +                next_output_detail = (
                      "Next declared output under "
 -                    f"{self._format_artifact_label(first_missing_target, expect_directory=True)}: "
 +                    if next_output_source == "declared"
 +                    else "Next observed output pattern under "
 +                )
 +                lines.append(
 +                    next_output_detail
 +                    + f"{self._format_artifact_label(first_missing_target, expect_directory=True)}: "
                      f"{self._format_artifact_label(next_output_file, expect_directory=False)}"
+                 )
          if len(missing_labels) > 1:
                  expect_directory=expect_directory,
+             )
              if expect_directory:
 -                next_output_file = infer_next_declared_html_output_file(
 +                next_output_file, next_output_source = infer_next_output_file(
                      target=target,
                      project_root=self.context.project_root,
 +                    messages=list(getattr(self.context.session, "messages", []) or []),
+                 )
                  if next_output_file is not None:
                      next_output_label = self._format_artifact_label(
+                         ]
                      lines.append(
                          f"It is the next missing declared output under {label}."
 +                        if next_output_source == "declared"
 +                        else (
 +                            "It mirrors the observed filename pattern from another "
 +                            f"{label} directory you already inspected."
 +                        )
+                     )
                      lines.append(
                          f"Prefer one `write` call for `{next_output_file}` before more research."

src/loader/runtime/tool_batches.pymodified

      collect_planned_artifact_targets,
      derive_verification_commands,
      ensure_active_verification_attempt,
 -    infer_next_declared_html_output_file,
 +    infer_next_output_file,
      is_state_mutating_tool_call,
      planned_artifact_target_satisfied,
      record_successful_tool_call,
                  + _missing_artifact_resume_suffix(
                      missing_artifact,
                      project_root=self.context.project_root,
 +                    messages=list(getattr(self.context.session, "messages", []) or []),
+                 )
                  + " Do not switch into review or consistency-check mode until the missing artifact exists."
+             )
                  mutation_suffix = _missing_artifact_resume_suffix(
                      missing_artifact,
                      project_root=self.context.project_root,
 +                    messages=list(getattr(self.context.session, "messages", []) or []),
+                 )
                  if not mutation_suffix:
                      mutation_suffix = (
                  + _missing_artifact_resume_suffix(
                      missing_artifact,
                      project_root=self.context.project_root,
 +                    messages=list(getattr(self.context.session, "messages", []) or []),
                  ).strip()
+             )
              return
              + _missing_artifact_resume_suffix(
                  missing_artifact,
                  project_root=self.context.project_root,
 +                messages=list(getattr(self.context.session, "messages", []) or []),
+             )
              + f" Stay within the current output roots under {roots_preview}"
              + " and finish that artifact before reopening older reference materials."
                  + _missing_artifact_resume_suffix(
                      missing_artifact,
                      project_root=self.context.project_root,
 +                    messages=list(getattr(self.context.session, "messages", []) or []),
+                 )
                  + " Do not switch into review or consistency-check mode until the missing artifact exists."
+             )
              mutation_suffix = _missing_artifact_resume_suffix(
                  missing_artifact,
                  project_root=self.context.project_root,
 +                messages=list(getattr(self.context.session, "messages", []) or []),
+             )
              if not mutation_suffix:
                  mutation_suffix = (
                  + _missing_artifact_resume_suffix(
                      missing_artifact,
                      project_root=self.context.project_root,
 +                    messages=list(getattr(self.context.session, "messages", []) or []),
+                 )
                  + " No TodoWrite, no verification, no rereads until that artifact exists."
+             )
              + _missing_artifact_resume_suffix(
                  missing_artifact,
                  project_root=self.context.project_root,
 +                messages=list(getattr(self.context.session, "messages", []) or []),
+             )
              + todo_refresh
              + " Do not move to verification, final confirmation, or TodoWrite-only "
              + _missing_artifact_resume_suffix(
                  missing_artifact,
                  project_root=self.context.project_root,
 +                messages=list(getattr(self.context.session, "messages", []) or []),
+             )
              + todo_refresh
              + " Do not spend the next turn on TodoWrite alone, bookkeeping notes, "
              + _missing_artifact_resume_suffix(
                  missing_artifact,
                  project_root=self.context.project_root,
 +                messages=list(getattr(self.context.session, "messages", []) or []),
+             )
              + todo_refresh
              + " Do not spend the next turn on additional notes, rediscovery, "
      missing_artifact: tuple[Path, bool] | None,
      *,
      project_root: Path,
 +    messages: list[Any] | None = None,
  ) -> str:
      if missing_artifact is None:
          return ""
      if expect_directory and not label.endswith("/"):
          label += "/"
      if expect_directory:
 -        next_output_file = infer_next_declared_html_output_file(
 +        next_output_file, next_output_source = infer_next_output_file(
              target=target,
              project_root=project_root,
 +            messages=list(messages or []),
+         )
          if next_output_file is not None:
 +            guidance_origin = (
 +                f"It is the next missing declared output under `{label}`."
 +                if next_output_source == "declared"
 +                else (
 +                    "It mirrors the observed filename pattern from another "
 +                    f"`{label}` directory you already inspected."
 +                )
 +            )
              guidance = (
 -                f" Resume by creating `{next_output_file.name}` now. It is the next missing "
 -                f"declared output under `{label}`. Prefer one `write` call for "
 +                f" Resume by creating `{next_output_file.name}` now. {guidance_origin} "
 +                f"Prefer one `write` call for "
                  f"`{next_output_file}` instead of more rereads."
+             )
              if not next_output_file.parent.exists():

tests/test_repair.pymodified

  from pathlib import Path
  from types import SimpleNamespace
 -from loader.llm.base import ToolCall
 +from loader.llm.base import Message, Role, ToolCall
  from loader.runtime.context import RuntimeContext
  from loader.runtime.dod import create_definition_of_done
  from loader.runtime.permissions import (
      assert decision.retry_message is not None
      assert "Link all chapters together properly" not in decision.retry_message
      assert "Create the main index.html file with proper structure" in decision.retry_message
++
++
 +def test_empty_response_retry_names_next_file_from_observed_sibling_directory(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    repairer = ResponseRepairer(context)
++
 +    reference_chapters = temp_dir / "fortran" / "chapters"
 +    reference_chapters.mkdir(parents=True)
 +    (reference_chapters / "01-introduction.html").write_text("<h1>Introduction</h1>\n")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    index_path.write_text("<html></html>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.append(str(index_path))
 +    dod.pending_items.append("Write the introduction chapter")
 +    context.session.append(
 +        Message(
 +            role=Role.ASSISTANT,
 +            content="",
 +            tool_calls=[
 +                ToolCall(
 +                    id="read-ref-1",
 +                    name="read",
 +                    arguments={"file_path": str(reference_chapters / "01-introduction.html")},
 +                )
 +            ],
 +        )
 +    )
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=1,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.should_continue is True
 +    assert decision.retry_message is not None
 +    assert "Next missing planned artifact: `chapters/`" in decision.retry_message
 +    assert "Next observed output pattern under `chapters/`: `01-introduction.html`" in decision.retry_message
 +    assert (
 +        "Resume with this exact next step: continue `Write the introduction chapter` "
 +        "by creating `01-introduction.html`."
 +        in decision.retry_message
 +    )
 +    assert (
 +        "It mirrors the observed filename pattern from another `chapters/` directory "
 +        "you already inspected."
 +        in decision.retry_message
 +    )

tests/test_tool_batches.pymodified

      assert "Do not spend the next turn on TodoWrite alone" in message
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_todowrite_uses_observed_sibling_pattern_for_next_file(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    reference_chapters = temp_dir / "fortran" / "chapters"
 +    reference_chapters.mkdir(parents=True)
 +    (reference_chapters / "01-introduction.html").write_text("<h1>Introduction</h1>\n")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    index_path.write_text("<html></html>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.pending_items = [
 +        "Write the introduction chapter",
 +        "Complete the requested work",
 +    ]
 +    dod.touched_files.append(str(index_path))
++
 +    queued_messages: list[str] = []
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[
 +            Message(
 +                role=Role.ASSISTANT,
 +                content="",
 +                tool_calls=[
 +                    ToolCall(
 +                        id="read-ref-1",
 +                        name="read",
 +                        arguments={"file_path": str(reference_chapters / "01-introduction.html")},
 +                    )
 +                ],
 +            )
 +        ],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
++
 +    tool_call = ToolCall(
 +        id="todo-observed-1",
 +        name="TodoWrite",
 +        arguments={
 +            "todos": [
 +                {
 +                    "content": "Write the introduction chapter",
 +                    "activeForm": "Writing the introduction chapter",
 +                    "status": "pending",
 +                }
 +            ]
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output="Todos updated",
 +                is_error=False,
 +                metadata={
 +                    "new_todos": [
 +                        {
 +                            "content": "Write the introduction chapter",
 +                            "active_form": "Writing the introduction chapter",
 +                            "status": "pending",
 +                        }
 +                    ]
 +                },
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert "Todo tracking is updated. An explicitly planned artifact is still missing." in message
 +    assert "Continue with the next pending item: `Write the introduction chapter`." in message
 +    assert "Resume by creating `01-introduction.html` now." in message
 +    assert (
 +        "It mirrors the observed filename pattern from another `chapters/` directory "
 +        "you already inspected."
 +        in message
 +    )
 +    assert "01-introduction.html` instead of more rereads." in message
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_bookkeeping_note_with_missing_artifact_requeues_resume_step(
      temp_dir: Path,