Lighten steering handoffs

Status	File	+	-
M	`src/loader/agent/loop.py`	7	1
M	`src/loader/runtime/bootstrap.py`	15	3
M	`src/loader/runtime/context.py`	13	2
M	`src/loader/runtime/finalization.py`	40	0
M	`src/loader/runtime/public_shell.py`	13	4
M	`src/loader/runtime/runtime_handle.py`	7	1
A	`src/loader/runtime/steering.py`	13	0
M	`src/loader/runtime/tool_batches.py`	14	14
M	`src/loader/runtime/turn_preamble.py`	5	3
M	`tests/test_finalization.py`	37	0
M	`tests/test_runtime_context.py`	4	1
M	`tests/test_runtime_handle.py`	4	1
M	`tests/test_runtime_public_shell.py`	5	2
M	`tests/test_tool_batches.py`	10	10
M	`tests/test_turn_preamble.py`	41	0

src/loader/agent/loop.pymodified

      stream_runtime_shell,
+ )
  from ..runtime.safeguards import RuntimeSafeguards
 +from ..runtime.steering import SteeringDirective
  from ..runtime.workflow import WorkflowMode
  from ..tools.base import ToolRegistry, create_default_registry
          self.steering.queue(message)
 -    def drain_steering_messages(self) -> list[str]:
 +    def queue_ephemeral_steering_message(self, message: str) -> None:
 +        """Queue one UI-only runtime steering message."""
++
 +        self.steering.queue_ephemeral(message)
++
 +    def drain_steering_messages(self) -> list[SteeringDirective]:
          """Drain queued runtime steering messages."""
          return self.steering.drain()

src/loader/runtime/bootstrap.pymodified

  from .permissions import PermissionConfigStatus, PermissionPolicy
  from .reasoning_service import RuntimeReasoningService
  from .session import ConversationSession
 +from .steering import SteeringDirective
  class RuntimeBootstrapSource(Protocol):
      def queue_steering_message(self, message: str) -> None:
          """Queue one steering message for the runtime."""
 -    def drain_steering_messages(self) -> list[str]:
 +    def queue_ephemeral_steering_message(self, message: str) -> None:
 +        """Queue one UI-only steering message for the runtime."""
++
 +    def drain_steering_messages(self) -> list[SteeringDirective]:
          """Drain queued steering messages."""
      def refresh_capability_profile(self) -> None:
      _get_prompt_format: Callable[[], str | None]
      _get_prompt_sections: Callable[[], list[str]]
      _queue_steering_message: Callable[[str], None]
 -    _drain_steering_messages: Callable[[], list[str]]
 +    _queue_ephemeral_steering_message: Callable[[str], None]
 +    _drain_steering_messages: Callable[[], list[SteeringDirective]]
      _refresh_capability_profile: Callable[[], None]
      metadata: dict[str, Any] = field(default_factory=dict)
          self._queue_steering_message(message)
 -    def drain_steering_messages(self) -> list[str]:
 +    def queue_ephemeral_steering_message(self, message: str) -> None:
 +        """Queue one UI-only steering message through the public shell callback."""
++
 +        self._queue_ephemeral_steering_message(message)
++
 +    def drain_steering_messages(self) -> list[SteeringDirective]:
          """Drain steering messages through the public shell callback."""
          return self._drain_steering_messages()
          _get_prompt_format=lambda: source.prompt_format,
          _get_prompt_sections=lambda: list(source.prompt_sections),
          _queue_steering_message=source.queue_steering_message,
 +        _queue_ephemeral_steering_message=source.queue_ephemeral_steering_message,
          _drain_steering_messages=source.drain_steering_messages,
          _refresh_capability_profile=source.refresh_capability_profile,
          metadata=build_runtime_owner_metadata(source),
          set_workflow_mode_callback=_set_workflow_mode,
          drain_steering_messages_callback=source.drain_steering_messages,
          queue_steering_message_callback=source.queue_steering_message,
 +        queue_ephemeral_steering_message_callback=source.queue_ephemeral_steering_message,
          refresh_capability_profile_callback=_refresh_capability_profile,
+     )
      return context

src/loader/runtime/context.pymodified

  from .reasoning_types import ActionVerification, ConfidenceAssessment
  from .recovery import RecoveryContext
  from .session import ConversationSession
 +from .steering import SteeringDirective
  class ReasoningConfigProtocol(Protocol):
      prompt_format: str | None = None
      prompt_sections: list[str] = field(default_factory=list)
      set_workflow_mode_callback: Callable[[str], None] | None = None
 -    drain_steering_messages_callback: Callable[[], list[str]] | None = None
 +    drain_steering_messages_callback: Callable[[], list[SteeringDirective]] | None = None
      queue_steering_message_callback: Callable[[str], None] | None = None
 +    queue_ephemeral_steering_message_callback: Callable[[str], None] | None = None
      refresh_capability_profile_callback: Callable[[], None] | None = None
      @property
          self.set_workflow_mode_callback(workflow_mode)
          self.workflow_mode = workflow_mode
 -    def drain_steering_messages(self) -> list[str]:
 +    def drain_steering_messages(self) -> list[SteeringDirective]:
          """Drain pending steering messages through the runtime control seam."""
          if self.drain_steering_messages_callback is None:
              return
          self.queue_steering_message_callback(message)
 +    def queue_ephemeral_steering_message(self, message: str) -> None:
 +        """Queue a UI-visible steering message without forcing model persistence."""
++
 +        if self.queue_ephemeral_steering_message_callback is not None:
 +            self.queue_ephemeral_steering_message_callback(message)
 +            return
 +        if self.queue_steering_message_callback is not None:
 +            self.queue_steering_message_callback(message)
++
      def refresh_capability_profile(self) -> None:
          """Refresh the resolved capability profile through the runtime control seam."""

src/loader/runtime/finalization.pymodified

          mutating_paths = [path for path in dod.touched_files if path]
          requires_verification = bool(mutating_paths or dod.mutating_actions)
 +        if (
 +            tracked_pending_items
 +            and not requires_verification
 +            and _response_declares_no_mutation_needed(candidate_response)
 +        ):
 +            tracked_pending_items = [
 +                item
 +                for item in tracked_pending_items
 +                if not _is_task_restatement_pending_item(item, dod.task_statement)
 +            ]
          rlog = get_runtime_logger()
          rlog.completion_check(
              "dod_gate",
+     )
 +def _normalize_pending_statement(value: str) -> str:
 +    return " ".join(value.strip().lower().split())
++
++
 +def _is_task_restatement_pending_item(item: str, task_statement: str) -> bool:
 +    normalized_item = _normalize_pending_statement(item)
 +    normalized_task = _normalize_pending_statement(task_statement)
 +    return bool(normalized_item and normalized_item == normalized_task)
++
++
 +def _response_declares_no_mutation_needed(candidate_response: str) -> bool:
 +    lowered = candidate_response.lower()
 +    return any(
 +        phrase in lowered
 +        for phrase in (
 +            "already correct",
 +            "already up to date",
 +            "already matches",
 +            "already complete",
 +            "no edit is needed",
 +            "no edits are needed",
 +            "no change is needed",
 +            "no changes are needed",
 +            "nothing to change",
 +            "no update is needed",
 +            "no updates are needed",
 +        )
 +    )
++
++
  def _build_verification_repair_guidance(
      dod: DefinitionOfDone,
      *,

src/loader/runtime/public_shell.pymodified

  from .prompt_history import PromptSnapshot
  from .prompting import build_system_prompt_result
  from .session import ConversationSession
 +from .steering import SteeringDirective
  @dataclass(slots=True)
      """Small public-shell owner for steering and running-state bookkeeping."""
      def __init__(self) -> None:
 -        self._pending: deque[str] = deque()
 +        self._pending: deque[SteeringDirective] = deque()
          self._is_running = False
      @property
      def queue(self, message: str) -> None:
          """Queue one steering message regardless of running state."""
 -        self._pending.append(message)
 +        self._pending.append(SteeringDirective(content=message, persist_to_model=True))
 -    def drain(self) -> list[str]:
 +    def queue_ephemeral(self, message: str) -> None:
 +        """Queue one UI-only steering message regardless of running state."""
++
 +        self._pending.append(SteeringDirective(content=message, persist_to_model=False))
++
 +    def drain(self) -> list[SteeringDirective]:
          """Drain all pending steering messages in FIFO order."""
          drained = list(self._pending)
      def queue_steering_message(self, message: str) -> None:
          """Queue one steering message for the runtime."""
 -    def drain_steering_messages(self) -> list[str]:
 +    def queue_ephemeral_steering_message(self, message: str) -> None:
 +        """Queue one UI-only steering message for the runtime."""
++
 +    def drain_steering_messages(self) -> list[SteeringDirective]:
          """Drain queued steering messages."""
      def refresh_capability_profile(self) -> None:

src/loader/runtime/runtime_handle.pymodified

      set_runtime_shell_workflow_mode,
      stream_runtime_shell,
+ )
 +from .steering import SteeringDirective
  from .workflow import WorkflowMode
          self.steering.queue(message)
 -    def drain_steering_messages(self) -> list[str]:
 +    def queue_ephemeral_steering_message(self, message: str) -> None:
 +        """Queue one UI-only runtime steering message."""
++
 +        self.steering.queue_ephemeral(message)
++
 +    def drain_steering_messages(self) -> list[SteeringDirective]:
          """Drain queued runtime steering messages."""
          return self.steering.drain()

src/loader/runtime/steering.pyadded

 +"""Steering-message payloads shared across runtime seams."""
++
 +from __future__ import annotations
++
 +from dataclasses import dataclass
++
++
 +@dataclass(frozen=True, slots=True)
 +class SteeringDirective:
 +    """One queued steering message plus persistence policy."""
++
 +    content: str
 +    persist_to_model: bool = True

src/loader/runtime/tool_batches.pymodified

                      messages=list(getattr(self.context.session, "messages", []) or []),
+                 )
                  if compact_handoff:
 -                    self.context.queue_steering_message(
 +                    self.context.queue_ephemeral_steering_message(
                          f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
                          f"`{tool_call.name}` result. {compact_handoff}"
                          " Do not reread reference material or spend the next turn on bookkeeping."
+                     )
                      return
 -            self.context.queue_steering_message(
 +            self.context.queue_ephemeral_steering_message(
                  f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
                  f"`{tool_call.name}` result. One declared output artifact is still missing."
                  + _missing_artifact_resume_suffix(
                      "more reference material and perform the change now."
+                 )
 -        self.context.queue_steering_message(
 +        self.context.queue_ephemeral_steering_message(
              f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
              f"`{tool_call.name}` result. Continue with the next pending item: "
              f"`{next_pending}` instead of rereading the same evidence.{mutation_suffix}"
                  messages=list(getattr(self.context.session, "messages", []) or []),
+             )
              if compact_handoff:
 -                self.context.queue_steering_message(
 +                self.context.queue_ephemeral_steering_message(
                      f"Confirmed progress: {current_label} is now recorded. "
                      + compact_handoff
                      + " Do not reread reference material or spend the next turn on bookkeeping."
              dod,
              project_root=self.context.project_root,
          ):
 -            self.context.queue_steering_message(
 +            self.context.queue_ephemeral_steering_message(
                  f"Confirmed progress: {current_label} is now recorded."
                  + _missing_artifact_resume_suffix(
                      missing_artifact,
                  + " No TodoWrite, no verification, no rereads until that artifact exists."
+             )
              return
 -        self.context.queue_steering_message(
 +        self.context.queue_ephemeral_steering_message(
              f"Confirmed progress: {current_label} is now recorded."
              " One declared output artifact is still missing."
              + _missing_artifact_resume_suffix(
                          "Perform the mutation now instead of spending another turn on "
                          "planning, rereads, or verification."
+                     )
 -                    self.context.queue_steering_message(concrete_message)
 +                    self.context.queue_ephemeral_steering_message(concrete_message)
                      return
 -                self.context.queue_steering_message(
 +                self.context.queue_ephemeral_steering_message(
                      "Todo tracking is updated. Continue with the next pending item: "
                      f"`{next_pending}`. Use the current output files as the source of "
                      "truth, and do not reopen reference materials unless one specific "
                      project_root=self.context.project_root,
+                 )
              ):
 -                self.context.queue_steering_message(
 +                self.context.queue_ephemeral_steering_message(
                      "Todo tracking is updated. Continue with the next pending item: "
                      f"`{next_pending}`. Use the current output files as the source of "
                      "truth, and do not reopen reference materials unless one specific "
                      if verification_commands
                      else " Finish the targeted consistency pass without reopening reference materials."
+                 )
 -                self.context.queue_steering_message(
 +                self.context.queue_ephemeral_steering_message(
                      "Todo tracking is updated. All explicitly planned artifacts now exist. "
                      f"Continue with the next pending item: `{next_pending}`. "
                      "Use the current output files as the source of truth, and do not restart "
                  if verification_commands
                  else " Finish the task using the files already on disk."
+             )
 -            self.context.queue_steering_message(
 +            self.context.queue_ephemeral_steering_message(
                  "Todo tracking is updated. All explicitly planned artifacts now exist. "
                  "Do not restart discovery, reopen reference materials, or spend another turn "
                  "on TodoWrite alone."
              if next_pending
              else ""
+         )
 -        self.context.queue_steering_message(
 +        self.context.queue_ephemeral_steering_message(
              "Todo tracking is updated. A declared output artifact is still missing."
              + next_pending_suffix
              + _missing_artifact_resume_suffix(
                  project_root=self.context.project_root,
+             )
          ):
 -            self.context.queue_steering_message(
 +            self.context.queue_ephemeral_steering_message(
                  "Bookkeeping note is recorded. Continue with the next pending item: "
                  f"`{next_pending}`. Make your next response one concrete evidence-gathering "
                  "tool call that advances that step, not another bookkeeping-only turn."
+             )
              return
 -        self.context.queue_steering_message(
 +        self.context.queue_ephemeral_steering_message(
              "Bookkeeping note is recorded. A declared output artifact is still missing."
              + _missing_artifact_resume_suffix(
                  missing_artifact,

src/loader/runtime/turn_preamble.pymodified

          self.tracer.record("turn.iteration_started", iteration=iterations)
          steering_messages = self.context.drain_steering_messages()
 -        for steering_message in steering_messages:
 -            await emit(AgentEvent(type="steering", content=steering_message))
 +        for directive in steering_messages:
 +            await emit(AgentEvent(type="steering", content=directive.content))
 +            if not directive.persist_to_model:
 +                continue
              self.context.session.append(
                  Message(
                      role=Role.USER,
 -                    content=f"[USER INTERRUPTION]: {steering_message}",
 +                    content=f"[USER INTERRUPTION]: {directive.content}",
+                 )
+             )

tests/test_finalization.pymodified

      assert any(event.type == "dod_status" and event.dod_status == "done" for event in events)
 +@pytest.mark.asyncio
 +async def test_turn_finalizer_accepts_noop_completion_with_task_restatement_todo(
 +    temp_dir: Path,
 +) -> None:
 +    session = FakeSession()
 +    context = build_context(temp_dir, session)
 +    finalizer = TurnFinalizer(
 +        context,
 +        RuntimeTracer(),
 +        DefinitionOfDoneStore(temp_dir),
 +        set_workflow_mode=_noop_set_workflow_mode,
 +    )
 +    task = (
 +        "Have a look at ~/Loader/guides/fortran/index.html, then "
 +        "~/Loader/guides/fortran/chapters. The table of contents links in "
 +        "index.html are inaccurate and the href’s are wrong. Let’s update the "
 +        "links and their link texts to be correct."
 +    )
 +    dod = create_definition_of_done(task)
 +    dod.pending_items = [task, "Complete the requested work"]
 +    summary = TurnSummary(final_response="")
++
 +    async def capture(event) -> None:
 +        return None
++
 +    result = await finalizer.run_definition_of_done_gate(
 +        dod=dod,
 +        candidate_response="The table of contents is already correct, so no edit is needed.",
 +        emit=capture,
 +        summary=summary,
 +        executor=FakeExecutor([]),  # type: ignore[arg-type]
 +    )
++
 +    assert result.should_continue is False
 +    assert result.reason_code == "non_mutating_response_accepted"
++
++
  @pytest.mark.asyncio
  async def test_turn_finalizer_records_passed_verification_observation(
      temp_dir: Path,

tests/test_runtime_context.pymodified

  from loader.runtime.bootstrap import build_runtime_bootstrap_source, build_runtime_context
  from loader.runtime.context import RuntimeContext
  from loader.runtime.recovery import RecoveryContext
 +from loader.runtime.steering import SteeringDirective
  from tests.helpers.runtime_harness import ScriptedBackend
      context = build_runtime_context(source)
      context.queue_steering_message("Re-check the current task.")
 -    assert context.drain_steering_messages() == ["Re-check the current task."]
 +    assert context.drain_steering_messages() == [
 +        SteeringDirective(content="Re-check the current task.")
 +    ]
      context.set_workflow_mode("clarify")
      assert agent.workflow_mode == "clarify"

tests/test_runtime_handle.pymodified

  from loader.runtime.conversation import ConversationRuntime
  from loader.runtime.launcher import RuntimeLauncher, build_runtime_launcher
  from loader.runtime.runtime_handle import RuntimeHandle
 +from loader.runtime.steering import SteeringDirective
  from tests.helpers.runtime_harness import ScriptedBackend, run_explore_scenario, run_scenario
      assert handle.is_running is True
      assert handle.steer("stay in runtime") is True
 -    assert handle.drain_steering_messages() == ["stay in runtime"]
 +    assert handle.drain_steering_messages() == [
 +        SteeringDirective(content="stay in runtime")
 +    ]

tests/test_runtime_public_shell.pymodified

+ )
  from loader.runtime.runtime_handle import RuntimeHandle
  from loader.runtime.session import ConversationSession
 +from loader.runtime.steering import SteeringDirective
  from tests.helpers.runtime_harness import ScriptedBackend
      assert mailbox.steer("stay in runtime") is True
      mailbox.queue("double-check the current task")
 +    mailbox.queue_ephemeral("show a lighter nudge")
      assert mailbox.drain() == [
 -        "stay in runtime",
 -        "double-check the current task",
 +        SteeringDirective(content="stay in runtime"),
 +        SteeringDirective(content="double-check the current task"),
 +        SteeringDirective(content="show a lighter nudge", persist_to_model=False),
+     ]
      mailbox.mark_idle()

tests/test_tool_batches.pymodified

      ) -> ActionVerification:
          raise AssertionError("Verification should not run for this scenario")
 -    reference = temp_dir / "fortran" / "index.html"
 +    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
      reference.parent.mkdir(parents=True)
 -    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
 +    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      context = build_context(
          temp_dir=temp_dir,
+         [
              tool_outcome(
                  tool_call=tool_call,
 -                output="<h1>Fortran Beginner's Guide</h1>\n",
 +                output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
                  is_error=False,
+             )
+         ]
      chapter_one.write_text("<html></html>\n")
      index_path = guide_root / "index.html"
 -    reference = temp_dir / "Loader" / "guides" / "fortran" / "index.html"
 +    reference = temp_dir / "Loader" / "guides" / "fortran" / "chapters" / "01-introduction.html"
      reference.parent.mkdir(parents=True, exist_ok=True)
 -    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
 +    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      implementation_plan = temp_dir / "implementation.md"
      implementation_plan.write_text(
          ],
+     )
      tool_call = ToolCall(
 -        id="read-reference-index",
 +        id="read-reference-chapter",
          name="read",
          arguments={"file_path": str(reference)},
+     )
 -    read_output = "Observation [read]: Result: <h1>Fortran Beginner's Guide</h1>\n"
 +    read_output = "Observation [read]: Result: <h1>Introduction</h1>\n<p>Guide cadence.</p>\n"
      executor = FakeExecutor(
+         [
              ToolExecutionOutcome(
      ) -> ActionVerification:
          raise AssertionError("Verification should not run for this scenario")
 -    reference = temp_dir / "fortran" / "index.html"
 +    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
      reference.parent.mkdir(parents=True)
 -    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
 +    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      context = build_context(
          temp_dir=temp_dir,
+         [
              tool_outcome(
                  tool_call=tool_call,
 -                output="<h1>Fortran Beginner's Guide</h1>\n",
 +                output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
                  is_error=False,
+             )
+         ]

tests/test_turn_preamble.pymodified

+     )
 +@pytest.mark.asyncio
 +async def test_turn_preamble_keeps_ephemeral_steering_out_of_model_history(
 +    temp_dir: Path,
 +) -> None:
 +    backend = ScriptedBackend()
 +    agent = Agent(
 +        backend=backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
 +    runtime = ConversationRuntime(agent)
++
 +    prepared, events, capture = await _prepare_runtime(
 +        runtime,
 +        task="Create a README for the runtime controller.",
 +    )
 +    agent.messages.append(Message(role=Role.USER, content=prepared.task))
 +    agent.queue_ephemeral_steering_message("Create 01-introduction.html now.")
++
 +    decision = await runtime.turn_preamble.prepare_iteration(
 +        task=prepared.task,
 +        original_task=None,
 +        iterations=1,
 +        dod=prepared.definition_of_done,
 +        emit=capture,
 +        summary=prepared.summary,
 +        on_user_question=None,
 +        executor=prepared.executor,
 +    )
++
 +    assert not decision.should_continue
 +    assert not any(
 +        message.content == "[USER INTERRUPTION]: Create 01-introduction.html now."
 +        for message in agent.session.messages
 +    )
 +    assert any(
 +        event.type == "steering" and event.content == "Create 01-introduction.html now."
 +        for event in events
 +    )
++
++
  @pytest.mark.asyncio
  async def test_turn_preamble_skips_iteration_when_recovery_refreshes(
      temp_dir: Path,

tenseleyflow/loader / `e0ebfab`

15 changed files