`6ee06b1`

Soften recovered handoffs

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 2 weeks ago

SHA: 6ee06b156ec0a550b3976c82b11dc279d956b65d
Parents: 1403a6a
Tree: 6dfed4b

2 changed files

Status	File	+	-
M	`src/loader/runtime/tool_batches.py`	26	6
M	`tests/test_tool_batches.py`	117	0

src/loader/runtime/tool_batches.pymodified

              dod,
              project_root=self.context.project_root,
+         )
 +        session_messages = list(getattr(self.context.session, "messages", []) or [])
 +        if use_persistent_handoff and _recent_recovery_prompt(session_messages):
 +            use_persistent_handoff = False
          queue_message = (
              self.context.queue_steering_message
              if use_persistent_handoff
              compact_resume = _compact_missing_artifact_handoff(
                  (resume_target, False),
                  project_root=self.context.project_root,
 -                messages=list(getattr(self.context.session, "messages", []) or []),
 +                messages=session_messages,
+             )
              if compact_resume:
                  queue_message(
              compact_handoff = _compact_missing_artifact_handoff(
                  missing_artifact,
                  project_root=self.context.project_root,
 -                messages=list(getattr(self.context.session, "messages", []) or []),
 +                messages=session_messages,
+             )
              if compact_handoff:
                  queue_message(
          *,
          dod: DefinitionOfDone,
      ) -> None:
 +        session_messages = list(getattr(self.context.session, "messages", []) or [])
          missing_artifact = _next_missing_planned_artifact(
              dod,
              project_root=self.context.project_root,
 -            messages=list(getattr(self.context.session, "messages", []) or []),
 +            messages=session_messages,
+         )
          next_pending = preferred_pending_todo_item(
              dod,
              + _missing_artifact_resume_suffix(
                  missing_artifact,
                  project_root=self.context.project_root,
 -                messages=list(getattr(self.context.session, "messages", []) or []),
 +                messages=session_messages,
+             )
              + todo_refresh
              + " Do not spend the next turn on TodoWrite alone, bookkeeping notes, "
          if tool_call.name not in _BOOKKEEPING_NOTE_TOOL_NAMES:
              return
 +        session_messages = list(getattr(self.context.session, "messages", []) or [])
          missing_artifact = _next_missing_planned_artifact(
              dod,
              project_root=self.context.project_root,
 -            messages=list(getattr(self.context.session, "messages", []) or []),
 +            messages=session_messages,
+         )
          if missing_artifact is None:
              return
              + _missing_artifact_resume_suffix(
                  missing_artifact,
                  project_root=self.context.project_root,
 -                messages=list(getattr(self.context.session, "messages", []) or []),
 +                messages=session_messages,
+             )
              + todo_refresh
              + " Do not spend the next turn on additional notes, rediscovery, "
      return bool(parts) and parts[0] == "mkdir"
 +def _recent_recovery_prompt(messages: list[Any]) -> bool:
 +    for message in reversed(messages[-4:]):
 +        role = getattr(message, "role", None)
 +        if getattr(role, "value", role) != "user":
 +            continue
 +        content = getattr(message, "content", "")
 +        if not isinstance(content, str):
 +            continue
 +        if content.startswith("[EMPTY ASSISTANT RESPONSE]"):
 +            return True
 +        if content.startswith("[CONTINUE CURRENT STEP]"):
 +            return True
 +    return False
++
++
  def _tool_call_label(tool_call: ToolCall) -> str:
      """Human-readable label for one tool call."""
      name = tool_call.name

tests/test_tool_batches.pymodified

      assert ephemeral_messages == []
 +@pytest.mark.asyncio
 +async def test_tool_batch_runner_softens_first_file_handoff_after_recovery_prompt(
 +    temp_dir: Path,
 +) -> None:
 +    async def assess_confidence(
 +        tool_name: str,
 +        tool_args: dict,
 +        context: str,
 +    ) -> ConfidenceAssessment:
 +        raise AssertionError("Confidence scoring should be disabled in this scenario")
++
 +    async def verify_action(
 +        tool_name: str,
 +        tool_args: dict,
 +        result: str,
 +        expected: str = "",
 +    ) -> ActionVerification:
 +        raise AssertionError("Verification should not run for this scenario")
++
 +    nginx_root = temp_dir / "guides" / "nginx"
 +    chapters = nginx_root / "chapters"
 +    chapters.mkdir(parents=True)
 +    index_path = nginx_root / "index.html"
++
 +    implementation_plan = temp_dir / "implementation.md"
 +    implementation_plan.write_text(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- `{chapters}/`",
 +                f"- `{index_path}`",
 +                f"- `{chapters / '01-introduction.html'}`",
 +                "",
 +            ]
 +        )
 +    )
++
 +    context = build_context(
 +        temp_dir=temp_dir,
 +        messages=[
 +            Message(
 +                role=Role.USER,
 +                content=(
 +                    "[EMPTY ASSISTANT RESPONSE]\n"
 +                    "Respond with that concrete mutation tool call now. Do not return an empty response."
 +                ),
 +            )
 +        ],
 +        safeguards=FakeSafeguards(),
 +        assess_confidence=assess_confidence,
 +        verify_action=verify_action,
 +        auto_recover=False,
 +    )
 +    persistent_messages: list[str] = []
 +    ephemeral_messages: list[str] = []
 +    context.queue_steering_message_callback = persistent_messages.append
 +    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
 +    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
 +    dod = create_definition_of_done("Create a multi-file nginx guide.")
 +    dod.implementation_plan = str(implementation_plan)
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Create the main index.html file with proper structure",
 +                "active_form": "Creating the main index.html file with proper structure",
 +                "status": "pending",
 +            },
 +            {
 +                "content": "Create each chapter file with appropriate content",
 +                "active_form": "Creating each chapter file with appropriate content",
 +                "status": "pending",
 +            },
 +        ],
 +    )
++
 +    tool_call = ToolCall(
 +        id="write-index-recovered",
 +        name="write",
 +        arguments={
 +            "file_path": str(index_path),
 +            "content": "<html></html>\n",
 +        },
 +    )
 +    executor = FakeExecutor(
 +        [
 +            tool_outcome(
 +                tool_call=tool_call,
 +                output=f"Successfully wrote 14 bytes to {index_path}",
 +                is_error=False,
 +            )
 +        ]
 +    )
++
 +    summary = TurnSummary(final_response="")
 +    await runner.execute_batch(
 +        tool_calls=[tool_call],
 +        tool_source="assistant",
 +        pending_tool_calls_seen=set(),
 +        emit=_noop_emit,
 +        summary=summary,
 +        dod=dod,
 +        executor=executor,  # type: ignore[arg-type]
 +        on_confirmation=None,
 +        on_user_question=None,
 +        emit_confirmation=None,
 +        consecutive_errors=0,
 +    )
++
 +    assert persistent_messages == []
 +    assert ephemeral_messages
 +    message = ephemeral_messages[-1]
 +    assert "Resume by creating `01-introduction.html` now." in message
++
++
  @pytest.mark.asyncio
  async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_review(
      temp_dir: Path,