`8331de4`

Tighten late-stage build nudges

Authored by

espadonne 2 weeks ago

SHA: 8331de4e0a4e71229c4b667413521822169c53c8
Parents: d913493
Tree: 8b87952

4 changed files

Status	File	+
M	`src/loader/runtime/repair.py`	27
M	`src/loader/runtime/tool_batches.py`	37
M	`tests/test_repair.py`	81
M	`tests/test_tool_batches.py`	120

src/loader/runtime/repair.pymodified

          retry_number: int,
          max_empty_retries: int,
      ) -> str:
 +        if dod is not None and self._should_compact_empty_retry_message(dod):
 +            compact_lines: list[str] = []
 +            compact_lines.extend(self._planned_artifact_progress_lines(dod)[:2])
 +            compact_lines.extend(
 +                self._next_step_resume_lines(
 +                    dod,
 +                    retry_number=retry_number,
 +                )
 +            )
 +            return "\n".join(
 +                [
 +                    "[EMPTY ASSISTANT RESPONSE]",
 +                    (
 +                        "Your last response was empty "
 +                        f"(retry {retry_number}/{max_empty_retries}). Continue from the "
 +                        "exact next step below."
 +                    ),
 +                    *[f"- {line}" for line in compact_lines],
 +                    "",
 +                    "Respond with that concrete mutation tool call now. Do not return an empty response.",
 +                ]
 +            )
++
          progress_lines: list[str] = []
          if dod is not None:
              reconcile_aggregate_completion_steps(
              return base_max_empty_retries
          return base_max_empty_retries + _LATE_STAGE_EMPTY_RETRY_EXTRA
 +    def _should_compact_empty_retry_message(self, dod: DefinitionOfDone) -> bool:
 +        completed_artifacts, missing_artifacts = self._planned_artifact_counts(dod)
 +        return completed_artifacts >= 7 and missing_artifacts > 0
++
      def _planned_artifact_counts(self, dod: DefinitionOfDone) -> tuple[int, int]:
          completed = 0
          missing = 0

src/loader/runtime/tool_batches.pymodified

              dod,
              project_root=self.context.project_root,
+         )
 +        if _late_stage_missing_artifact_build(
 +            dod,
 +            project_root=self.context.project_root,
 +        ):
 +            self.context.queue_steering_message(
 +                f"Confirmed progress: {current_label} is now recorded."
 +                + _missing_artifact_resume_suffix(
 +                    missing_artifact,
 +                    project_root=self.context.project_root,
 +                )
 +                + " No TodoWrite, no verification, no rereads until that artifact exists."
 +            )
 +            return
          self.context.queue_steering_message(
              f"Confirmed progress: {current_label} is now recorded."
              " One explicitly planned artifact is still missing."
      return None
 +def _late_stage_missing_artifact_build(
 +    dod: DefinitionOfDone,
 +    *,
 +    project_root: Path,
 +) -> bool:
 +    completed = 0
 +    missing = 0
 +    for target, expect_directory in collect_planned_artifact_targets(
 +        dod,
 +        project_root=project_root,
 +        max_paths=12,
 +    ):
 +        if planned_artifact_target_satisfied(
 +            dod,
 +            target=target,
 +            expect_directory=expect_directory,
 +            project_root=project_root,
 +        ):
 +            completed += 1
 +        else:
 +            missing += 1
 +    return completed >= 7 and missing > 0
++
++
  def _missing_artifact_resume_suffix(
      missing_artifact: tuple[Path, bool] | None,
      *,

tests/test_repair.pymodified

      assert "Follow the same one-file-at-a-time mutation pattern" in decision.retry_message
 +def test_empty_response_retry_uses_compact_prompt_after_substantial_progress(
 +    temp_dir: Path,
 +) -> None:
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        use_react=False,
 +    )
 +    context.session.messages.append(
 +        SimpleNamespace(
 +            content=(
 +                "Observation [notepad_write_working]: Result: "
 +                "- [2026-04-23T19:00:00Z] Creating fifth chapter file: Advanced features"
 +            )
 +        )
 +    )
 +    repairer = ResponseRepairer(context)
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = guide_root / "index.html"
 +    chapter_one = chapters / "01-getting-started.html"
 +    chapter_two = chapters / "02-installation.html"
 +    chapter_three = chapters / "03-first-website.html"
 +    chapter_four = chapters / "04-configuration-basics.html"
 +    chapter_five = chapters / "05-advanced-features.html"
 +    index_path.write_text("<html></html>\n")
 +    chapter_one.write_text("<h1>One</h1>\n")
 +    chapter_two.write_text("<h1>Two</h1>\n")
 +    chapter_three.write_text("<h1>Three</h1>\n")
 +    chapter_four.write_text("<h1>Four</h1>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapter_one}`",
 +                f"- `{chapter_two}`",
 +                f"- `{chapter_three}`",
 +                f"- `{chapter_four}`",
 +                f"- `{chapter_five}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.extend(
 +        [str(index_path), str(chapter_one), str(chapter_two), str(chapter_three)]
 +    )
 +    dod.completed_items.extend(
 +        [
 +            "Create the directory structure for the new nginx guide",
 +            "Create the main index.html file with proper structure",
 +        ]
 +    )
 +    dod.pending_items.append("Create each chapter file in sequence")
++
 +    decision = repairer.handle_empty_response(
 +        task="Create a multi-file nginx guide.",
 +        original_task=None,
 +        empty_retry_count=3,
 +        max_empty_retries=2,
 +        dod=dod,
 +    )
++
 +    assert decision.should_continue is True
 +    assert decision.retry_message is not None
 +    assert "Continue from the exact next step below." in decision.retry_message
 +    assert "Latest working note:" not in decision.retry_message
 +    assert "Confirmed completed work:" not in decision.retry_message
 +    assert "Next pending item:" not in decision.retry_message
++
++
  def test_empty_response_retry_points_at_next_output_file_when_planned_directory_is_empty(
      temp_dir: Path,
  ) -> None:

tests/test_tool_batches.pymodified

+     )
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_uses_compact_missing_artifact_nudge_after_substantial_progress(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should not run in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run in this scenario")
++
 +    guide_root = temp_dir / "guides" / "nginx"
 +    chapters = guide_root / "chapters"
 +    guide_root.mkdir(parents=True)
 +    chapters.mkdir()
 +    index_path = guide_root / "index.html"
 +    chapter_paths = [
 +        chapters / "01-introduction.html",
 +        chapters / "02-installation.html",
 +        chapters / "03-configuration.html",
 +        chapters / "04-basic-usage.html",
 +        chapters / "05-advanced-features.html",
 +    ]
 +    for path in (index_path, *chapter_paths[:4]):
 +        path.write_text("<html></html>\n")
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{guide_root}/`",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                *[f"- `{path}`" for path in chapter_paths],
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    queued_messages: list[str] = []
 +    context.queue_steering_message_callback = queued_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a thorough nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    dod.touched_files.extend(str(path) for path in (index_path, *chapter_paths[:4]))
 +    dod.completed_items.extend(
 +        [
 +            "Create the nginx directory structure",
 +            "Create the main index.html file with proper structure",
 +        ]
 +    )
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create each chapter file with appropriate content",
 +                "active_form": "Creating each chapter file with appropriate content",
 +                "status": "pending",
 +            }
 +        ],
 +    )
 +    tool_call = ToolCall(
 +        id="write-chapter-04",
 +        name="write",
 +        arguments={
 +            "file_path": str(chapter_paths[3]),
 +            "content": "<html>updated</html>\n",
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output=f"Successfully wrote {chapter_paths[3]}",
 +                is_error=False,
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert queued_messages
 +    message = queued_messages[-1]
 +    assert "Resume by creating `05-advanced-features.html` now." in message
 +    assert "No TodoWrite, no verification, no rereads until that artifact exists." in message
 +    assert "refresh `TodoWrite`" not in message
++
++
  @pytest.mark.asyncio
  async def test_tool_batch_runner_todowrite_with_missing_artifact_requeues_exact_resume_step(
      temp_dir: Path,