tenseleyflow/loader / e0ebfab

Browse files

Lighten steering handoffs

Authored by espadonne
SHA
e0ebfab39e1deecba3bf571f8ac4ce94df997642
Parents
c3b0cbc
Tree
f28534a

15 changed files

StatusFile+-
M src/loader/agent/loop.py 7 1
M src/loader/runtime/bootstrap.py 15 3
M src/loader/runtime/context.py 13 2
M src/loader/runtime/finalization.py 40 0
M src/loader/runtime/public_shell.py 13 4
M src/loader/runtime/runtime_handle.py 7 1
A src/loader/runtime/steering.py 13 0
M src/loader/runtime/tool_batches.py 14 14
M src/loader/runtime/turn_preamble.py 5 3
M tests/test_finalization.py 37 0
M tests/test_runtime_context.py 4 1
M tests/test_runtime_handle.py 4 1
M tests/test_runtime_public_shell.py 5 2
M tests/test_tool_batches.py 10 10
M tests/test_turn_preamble.py 41 0
src/loader/agent/loop.pymodified
@@ -39,6 +39,7 @@ from ..runtime.public_shell import (
3939
     stream_runtime_shell,
4040
 )
4141
 from ..runtime.safeguards import RuntimeSafeguards
42
+from ..runtime.steering import SteeringDirective
4243
 from ..runtime.workflow import WorkflowMode
4344
 from ..tools.base import ToolRegistry, create_default_registry
4445
 
@@ -217,7 +218,12 @@ class Agent:
217218
 
218219
         self.steering.queue(message)
219220
 
220
-    def drain_steering_messages(self) -> list[str]:
221
+    def queue_ephemeral_steering_message(self, message: str) -> None:
222
+        """Queue one UI-only runtime steering message."""
223
+
224
+        self.steering.queue_ephemeral(message)
225
+
226
+    def drain_steering_messages(self) -> list[SteeringDirective]:
221227
         """Drain queued runtime steering messages."""
222228
 
223229
         return self.steering.drain()
src/loader/runtime/bootstrap.pymodified
@@ -21,6 +21,7 @@ from .owner_metadata import build_runtime_owner_metadata
2121
 from .permissions import PermissionConfigStatus, PermissionPolicy
2222
 from .reasoning_service import RuntimeReasoningService
2323
 from .session import ConversationSession
24
+from .steering import SteeringDirective
2425
 
2526
 
2627
 class RuntimeBootstrapSource(Protocol):
@@ -48,7 +49,10 @@ class RuntimeBootstrapSource(Protocol):
4849
     def queue_steering_message(self, message: str) -> None:
4950
         """Queue one steering message for the runtime."""
5051
 
51
-    def drain_steering_messages(self) -> list[str]:
52
+    def queue_ephemeral_steering_message(self, message: str) -> None:
53
+        """Queue one UI-only steering message for the runtime."""
54
+
55
+    def drain_steering_messages(self) -> list[SteeringDirective]:
5256
         """Drain queued steering messages."""
5357
 
5458
     def refresh_capability_profile(self) -> None:
@@ -78,7 +82,8 @@ class RuntimeBootstrapView:
7882
     _get_prompt_format: Callable[[], str | None]
7983
     _get_prompt_sections: Callable[[], list[str]]
8084
     _queue_steering_message: Callable[[str], None]
81
-    _drain_steering_messages: Callable[[], list[str]]
85
+    _queue_ephemeral_steering_message: Callable[[str], None]
86
+    _drain_steering_messages: Callable[[], list[SteeringDirective]]
8287
     _refresh_capability_profile: Callable[[], None]
8388
     metadata: dict[str, Any] = field(default_factory=dict)
8489
 
@@ -136,7 +141,12 @@ class RuntimeBootstrapView:
136141
 
137142
         self._queue_steering_message(message)
138143
 
139
-    def drain_steering_messages(self) -> list[str]:
144
+    def queue_ephemeral_steering_message(self, message: str) -> None:
145
+        """Queue one UI-only steering message through the public shell callback."""
146
+
147
+        self._queue_ephemeral_steering_message(message)
148
+
149
+    def drain_steering_messages(self) -> list[SteeringDirective]:
140150
         """Drain steering messages through the public shell callback."""
141151
 
142152
         return self._drain_steering_messages()
@@ -173,6 +183,7 @@ def build_runtime_bootstrap_source(source: RuntimeBootstrapSource | Any) -> Runt
173183
         _get_prompt_format=lambda: source.prompt_format,
174184
         _get_prompt_sections=lambda: list(source.prompt_sections),
175185
         _queue_steering_message=source.queue_steering_message,
186
+        _queue_ephemeral_steering_message=source.queue_ephemeral_steering_message,
176187
         _drain_steering_messages=source.drain_steering_messages,
177188
         _refresh_capability_profile=source.refresh_capability_profile,
178189
         metadata=build_runtime_owner_metadata(source),
@@ -234,6 +245,7 @@ def build_runtime_context(source: RuntimeBootstrapSource) -> RuntimeContext:
234245
         set_workflow_mode_callback=_set_workflow_mode,
235246
         drain_steering_messages_callback=source.drain_steering_messages,
236247
         queue_steering_message_callback=source.queue_steering_message,
248
+        queue_ephemeral_steering_message_callback=source.queue_ephemeral_steering_message,
237249
         refresh_capability_profile_callback=_refresh_capability_profile,
238250
     )
239251
     return context
src/loader/runtime/context.pymodified
@@ -15,6 +15,7 @@ from .permissions import PermissionConfigStatus, PermissionPolicy
1515
 from .reasoning_types import ActionVerification, ConfidenceAssessment
1616
 from .recovery import RecoveryContext
1717
 from .session import ConversationSession
18
+from .steering import SteeringDirective
1819
 
1920
 
2021
 class ReasoningConfigProtocol(Protocol):
@@ -120,8 +121,9 @@ class RuntimeContext:
120121
     prompt_format: str | None = None
121122
     prompt_sections: list[str] = field(default_factory=list)
122123
     set_workflow_mode_callback: Callable[[str], None] | None = None
123
-    drain_steering_messages_callback: Callable[[], list[str]] | None = None
124
+    drain_steering_messages_callback: Callable[[], list[SteeringDirective]] | None = None
124125
     queue_steering_message_callback: Callable[[str], None] | None = None
126
+    queue_ephemeral_steering_message_callback: Callable[[str], None] | None = None
125127
     refresh_capability_profile_callback: Callable[[], None] | None = None
126128
 
127129
     @property
@@ -157,7 +159,7 @@ class RuntimeContext:
157159
         self.set_workflow_mode_callback(workflow_mode)
158160
         self.workflow_mode = workflow_mode
159161
 
160
-    def drain_steering_messages(self) -> list[str]:
162
+    def drain_steering_messages(self) -> list[SteeringDirective]:
161163
         """Drain pending steering messages through the runtime control seam."""
162164
 
163165
         if self.drain_steering_messages_callback is None:
@@ -171,6 +173,15 @@ class RuntimeContext:
171173
             return
172174
         self.queue_steering_message_callback(message)
173175
 
176
+    def queue_ephemeral_steering_message(self, message: str) -> None:
177
+        """Queue a UI-visible steering message without forcing model persistence."""
178
+
179
+        if self.queue_ephemeral_steering_message_callback is not None:
180
+            self.queue_ephemeral_steering_message_callback(message)
181
+            return
182
+        if self.queue_steering_message_callback is not None:
183
+            self.queue_steering_message_callback(message)
184
+
174185
     def refresh_capability_profile(self) -> None:
175186
         """Refresh the resolved capability profile through the runtime control seam."""
176187
 
src/loader/runtime/finalization.pymodified
@@ -118,6 +118,16 @@ class TurnFinalizer:
118118
 
119119
         mutating_paths = [path for path in dod.touched_files if path]
120120
         requires_verification = bool(mutating_paths or dod.mutating_actions)
121
+        if (
122
+            tracked_pending_items
123
+            and not requires_verification
124
+            and _response_declares_no_mutation_needed(candidate_response)
125
+        ):
126
+            tracked_pending_items = [
127
+                item
128
+                for item in tracked_pending_items
129
+                if not _is_task_restatement_pending_item(item, dod.task_statement)
130
+            ]
121131
         rlog = get_runtime_logger()
122132
         rlog.completion_check(
123133
             "dod_gate",
@@ -1084,6 +1094,36 @@ def _verification_state_signature(dod: DefinitionOfDone) -> str:
10841094
     )
10851095
 
10861096
 
1097
+def _normalize_pending_statement(value: str) -> str:
1098
+    return " ".join(value.strip().lower().split())
1099
+
1100
+
1101
+def _is_task_restatement_pending_item(item: str, task_statement: str) -> bool:
1102
+    normalized_item = _normalize_pending_statement(item)
1103
+    normalized_task = _normalize_pending_statement(task_statement)
1104
+    return bool(normalized_item and normalized_item == normalized_task)
1105
+
1106
+
1107
+def _response_declares_no_mutation_needed(candidate_response: str) -> bool:
1108
+    lowered = candidate_response.lower()
1109
+    return any(
1110
+        phrase in lowered
1111
+        for phrase in (
1112
+            "already correct",
1113
+            "already up to date",
1114
+            "already matches",
1115
+            "already complete",
1116
+            "no edit is needed",
1117
+            "no edits are needed",
1118
+            "no change is needed",
1119
+            "no changes are needed",
1120
+            "nothing to change",
1121
+            "no update is needed",
1122
+            "no updates are needed",
1123
+        )
1124
+    )
1125
+
1126
+
10871127
 def _build_verification_repair_guidance(
10881128
     dod: DefinitionOfDone,
10891129
     *,
src/loader/runtime/public_shell.pymodified
@@ -24,6 +24,7 @@ from .permissions import PermissionConfigStatus, PermissionMode, PermissionPolic
2424
 from .prompt_history import PromptSnapshot
2525
 from .prompting import build_system_prompt_result
2626
 from .session import ConversationSession
27
+from .steering import SteeringDirective
2728
 
2829
 
2930
 @dataclass(slots=True)
@@ -70,7 +71,7 @@ class SteeringMailbox:
7071
     """Small public-shell owner for steering and running-state bookkeeping."""
7172
 
7273
     def __init__(self) -> None:
73
-        self._pending: deque[str] = deque()
74
+        self._pending: deque[SteeringDirective] = deque()
7475
         self._is_running = False
7576
 
7677
     @property
@@ -100,9 +101,14 @@ class SteeringMailbox:
100101
     def queue(self, message: str) -> None:
101102
         """Queue one steering message regardless of running state."""
102103
 
103
-        self._pending.append(message)
104
+        self._pending.append(SteeringDirective(content=message, persist_to_model=True))
104105
 
105
-    def drain(self) -> list[str]:
106
+    def queue_ephemeral(self, message: str) -> None:
107
+        """Queue one UI-only steering message regardless of running state."""
108
+
109
+        self._pending.append(SteeringDirective(content=message, persist_to_model=False))
110
+
111
+    def drain(self) -> list[SteeringDirective]:
106112
         """Drain all pending steering messages in FIFO order."""
107113
 
108114
         drained = list(self._pending)
@@ -154,7 +160,10 @@ class RuntimeShellOwner(Protocol):
154160
     def queue_steering_message(self, message: str) -> None:
155161
         """Queue one steering message for the runtime."""
156162
 
157
-    def drain_steering_messages(self) -> list[str]:
163
+    def queue_ephemeral_steering_message(self, message: str) -> None:
164
+        """Queue one UI-only steering message for the runtime."""
165
+
166
+    def drain_steering_messages(self) -> list[SteeringDirective]:
158167
         """Drain queued steering messages."""
159168
 
160169
     def refresh_capability_profile(self) -> None:
src/loader/runtime/runtime_handle.pymodified
@@ -32,6 +32,7 @@ from .public_shell import (
3232
     set_runtime_shell_workflow_mode,
3333
     stream_runtime_shell,
3434
 )
35
+from .steering import SteeringDirective
3536
 from .workflow import WorkflowMode
3637
 
3738
 
@@ -214,7 +215,12 @@ class RuntimeHandle:
214215
 
215216
         self.steering.queue(message)
216217
 
217
-    def drain_steering_messages(self) -> list[str]:
218
+    def queue_ephemeral_steering_message(self, message: str) -> None:
219
+        """Queue one UI-only runtime steering message."""
220
+
221
+        self.steering.queue_ephemeral(message)
222
+
223
+    def drain_steering_messages(self) -> list[SteeringDirective]:
218224
         """Drain queued runtime steering messages."""
219225
 
220226
         return self.steering.drain()
src/loader/runtime/steering.pyadded
@@ -0,0 +1,13 @@
1
+"""Steering-message payloads shared across runtime seams."""
2
+
3
+from __future__ import annotations
4
+
5
+from dataclasses import dataclass
6
+
7
+
8
+@dataclass(frozen=True, slots=True)
9
+class SteeringDirective:
10
+    """One queued steering message plus persistence policy."""
11
+
12
+    content: str
13
+    persist_to_model: bool = True
src/loader/runtime/tool_batches.pymodified
@@ -820,13 +820,13 @@ class ToolBatchRunner:
820820
                     messages=list(getattr(self.context.session, "messages", []) or []),
821821
                 )
822822
                 if compact_handoff:
823
-                    self.context.queue_steering_message(
823
+                    self.context.queue_ephemeral_steering_message(
824824
                         f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
825825
                         f"`{tool_call.name}` result. {compact_handoff}"
826826
                         " Do not reread reference material or spend the next turn on bookkeeping."
827827
                     )
828828
                     return
829
-            self.context.queue_steering_message(
829
+            self.context.queue_ephemeral_steering_message(
830830
                 f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
831831
                 f"`{tool_call.name}` result. One declared output artifact is still missing."
832832
                 + _missing_artifact_resume_suffix(
@@ -851,7 +851,7 @@ class ToolBatchRunner:
851851
                     "more reference material and perform the change now."
852852
                 )
853853
 
854
-        self.context.queue_steering_message(
854
+        self.context.queue_ephemeral_steering_message(
855855
             f"Confirmed progress: `{completed_label}` is now satisfied by the successful "
856856
             f"`{tool_call.name}` result. Continue with the next pending item: "
857857
             f"`{next_pending}` instead of rereading the same evidence.{mutation_suffix}"
@@ -944,7 +944,7 @@ class ToolBatchRunner:
944944
                 messages=list(getattr(self.context.session, "messages", []) or []),
945945
             )
946946
             if compact_handoff:
947
-                self.context.queue_steering_message(
947
+                self.context.queue_ephemeral_steering_message(
948948
                     f"Confirmed progress: {current_label} is now recorded. "
949949
                     + compact_handoff
950950
                     + " Do not reread reference material or spend the next turn on bookkeeping."
@@ -954,7 +954,7 @@ class ToolBatchRunner:
954954
             dod,
955955
             project_root=self.context.project_root,
956956
         ):
957
-            self.context.queue_steering_message(
957
+            self.context.queue_ephemeral_steering_message(
958958
                 f"Confirmed progress: {current_label} is now recorded."
959959
                 + _missing_artifact_resume_suffix(
960960
                     missing_artifact,
@@ -964,7 +964,7 @@ class ToolBatchRunner:
964964
                 + " No TodoWrite, no verification, no rereads until that artifact exists."
965965
             )
966966
             return
967
-        self.context.queue_steering_message(
967
+        self.context.queue_ephemeral_steering_message(
968968
             f"Confirmed progress: {current_label} is now recorded."
969969
             " One declared output artifact is still missing."
970970
             + _missing_artifact_resume_suffix(
@@ -1020,9 +1020,9 @@ class ToolBatchRunner:
10201020
                         "Perform the mutation now instead of spending another turn on "
10211021
                         "planning, rereads, or verification."
10221022
                     )
1023
-                    self.context.queue_steering_message(concrete_message)
1023
+                    self.context.queue_ephemeral_steering_message(concrete_message)
10241024
                     return
1025
-                self.context.queue_steering_message(
1025
+                self.context.queue_ephemeral_steering_message(
10261026
                     "Todo tracking is updated. Continue with the next pending item: "
10271027
                     f"`{next_pending}`. Use the current output files as the source of "
10281028
                     "truth, and do not reopen reference materials unless one specific "
@@ -1040,7 +1040,7 @@ class ToolBatchRunner:
10401040
                     project_root=self.context.project_root,
10411041
                 )
10421042
             ):
1043
-                self.context.queue_steering_message(
1043
+                self.context.queue_ephemeral_steering_message(
10441044
                     "Todo tracking is updated. Continue with the next pending item: "
10451045
                     f"`{next_pending}`. Use the current output files as the source of "
10461046
                     "truth, and do not reopen reference materials unless one specific "
@@ -1063,7 +1063,7 @@ class ToolBatchRunner:
10631063
                     if verification_commands
10641064
                     else " Finish the targeted consistency pass without reopening reference materials."
10651065
                 )
1066
-                self.context.queue_steering_message(
1066
+                self.context.queue_ephemeral_steering_message(
10671067
                     "Todo tracking is updated. All explicitly planned artifacts now exist. "
10681068
                     f"Continue with the next pending item: `{next_pending}`. "
10691069
                     "Use the current output files as the source of truth, and do not restart "
@@ -1077,7 +1077,7 @@ class ToolBatchRunner:
10771077
                 if verification_commands
10781078
                 else " Finish the task using the files already on disk."
10791079
             )
1080
-            self.context.queue_steering_message(
1080
+            self.context.queue_ephemeral_steering_message(
10811081
                 "Todo tracking is updated. All explicitly planned artifacts now exist. "
10821082
                 "Do not restart discovery, reopen reference materials, or spend another turn "
10831083
                 "on TodoWrite alone."
@@ -1094,7 +1094,7 @@ class ToolBatchRunner:
10941094
             if next_pending
10951095
             else ""
10961096
         )
1097
-        self.context.queue_steering_message(
1097
+        self.context.queue_ephemeral_steering_message(
10981098
             "Todo tracking is updated. A declared output artifact is still missing."
10991099
             + next_pending_suffix
11001100
             + _missing_artifact_resume_suffix(
@@ -1151,7 +1151,7 @@ class ToolBatchRunner:
11511151
                 project_root=self.context.project_root,
11521152
             )
11531153
         ):
1154
-            self.context.queue_steering_message(
1154
+            self.context.queue_ephemeral_steering_message(
11551155
                 "Bookkeeping note is recorded. Continue with the next pending item: "
11561156
                 f"`{next_pending}`. Make your next response one concrete evidence-gathering "
11571157
                 "tool call that advances that step, not another bookkeeping-only turn."
@@ -1161,7 +1161,7 @@ class ToolBatchRunner:
11611161
             )
11621162
             return
11631163
 
1164
-        self.context.queue_steering_message(
1164
+        self.context.queue_ephemeral_steering_message(
11651165
             "Bookkeeping note is recorded. A declared output artifact is still missing."
11661166
             + _missing_artifact_resume_suffix(
11671167
                 missing_artifact,
src/loader/runtime/turn_preamble.pymodified
@@ -55,12 +55,14 @@ class TurnPreludeController:
5555
         self.tracer.record("turn.iteration_started", iteration=iterations)
5656
 
5757
         steering_messages = self.context.drain_steering_messages()
58
-        for steering_message in steering_messages:
59
-            await emit(AgentEvent(type="steering", content=steering_message))
58
+        for directive in steering_messages:
59
+            await emit(AgentEvent(type="steering", content=directive.content))
60
+            if not directive.persist_to_model:
61
+                continue
6062
             self.context.session.append(
6163
                 Message(
6264
                     role=Role.USER,
63
-                    content=f"[USER INTERRUPTION]: {steering_message}",
65
+                    content=f"[USER INTERRUPTION]: {directive.content}",
6466
                 )
6567
             )
6668
 
tests/test_finalization.pymodified
@@ -390,6 +390,43 @@ async def test_turn_finalizer_records_skipped_verification_observation(
390390
     assert any(event.type == "dod_status" and event.dod_status == "done" for event in events)
391391
 
392392
 
393
+@pytest.mark.asyncio
394
+async def test_turn_finalizer_accepts_noop_completion_with_task_restatement_todo(
395
+    temp_dir: Path,
396
+) -> None:
397
+    session = FakeSession()
398
+    context = build_context(temp_dir, session)
399
+    finalizer = TurnFinalizer(
400
+        context,
401
+        RuntimeTracer(),
402
+        DefinitionOfDoneStore(temp_dir),
403
+        set_workflow_mode=_noop_set_workflow_mode,
404
+    )
405
+    task = (
406
+        "Have a look at ~/Loader/guides/fortran/index.html, then "
407
+        "~/Loader/guides/fortran/chapters. The table of contents links in "
408
+        "index.html are inaccurate and the href’s are wrong. Let’s update the "
409
+        "links and their link texts to be correct."
410
+    )
411
+    dod = create_definition_of_done(task)
412
+    dod.pending_items = [task, "Complete the requested work"]
413
+    summary = TurnSummary(final_response="")
414
+
415
+    async def capture(event) -> None:
416
+        return None
417
+
418
+    result = await finalizer.run_definition_of_done_gate(
419
+        dod=dod,
420
+        candidate_response="The table of contents is already correct, so no edit is needed.",
421
+        emit=capture,
422
+        summary=summary,
423
+        executor=FakeExecutor([]),  # type: ignore[arg-type]
424
+    )
425
+
426
+    assert result.should_continue is False
427
+    assert result.reason_code == "non_mutating_response_accepted"
428
+
429
+
393430
 @pytest.mark.asyncio
394431
 async def test_turn_finalizer_records_passed_verification_observation(
395432
     temp_dir: Path,
tests/test_runtime_context.pymodified
@@ -8,6 +8,7 @@ from loader.agent.loop import Agent, AgentConfig
88
 from loader.runtime.bootstrap import build_runtime_bootstrap_source, build_runtime_context
99
 from loader.runtime.context import RuntimeContext
1010
 from loader.runtime.recovery import RecoveryContext
11
+from loader.runtime.steering import SteeringDirective
1112
 from tests.helpers.runtime_harness import ScriptedBackend
1213
 
1314
 
@@ -53,7 +54,9 @@ def test_runtime_context_control_callbacks_stay_in_sync(temp_dir: Path) -> None:
5354
     context = build_runtime_context(source)
5455
     context.queue_steering_message("Re-check the current task.")
5556
 
56
-    assert context.drain_steering_messages() == ["Re-check the current task."]
57
+    assert context.drain_steering_messages() == [
58
+        SteeringDirective(content="Re-check the current task.")
59
+    ]
5760
 
5861
     context.set_workflow_mode("clarify")
5962
     assert agent.workflow_mode == "clarify"
tests/test_runtime_handle.pymodified
@@ -12,6 +12,7 @@ from loader.runtime.bootstrap import RuntimeBootstrapView, build_runtime_context
1212
 from loader.runtime.conversation import ConversationRuntime
1313
 from loader.runtime.launcher import RuntimeLauncher, build_runtime_launcher
1414
 from loader.runtime.runtime_handle import RuntimeHandle
15
+from loader.runtime.steering import SteeringDirective
1516
 from tests.helpers.runtime_harness import ScriptedBackend, run_explore_scenario, run_scenario
1617
 
1718
 
@@ -180,4 +181,6 @@ def test_runtime_handle_exposes_public_shell_steering_contract(
180181
 
181182
     assert handle.is_running is True
182183
     assert handle.steer("stay in runtime") is True
183
-    assert handle.drain_steering_messages() == ["stay in runtime"]
184
+    assert handle.drain_steering_messages() == [
185
+        SteeringDirective(content="stay in runtime")
186
+    ]
tests/test_runtime_public_shell.pymodified
@@ -37,6 +37,7 @@ from loader.runtime.public_shell import (
3737
 )
3838
 from loader.runtime.runtime_handle import RuntimeHandle
3939
 from loader.runtime.session import ConversationSession
40
+from loader.runtime.steering import SteeringDirective
4041
 from tests.helpers.runtime_harness import ScriptedBackend
4142
 
4243
 
@@ -320,10 +321,12 @@ def test_steering_mailbox_tracks_running_state_and_fifo_messages() -> None:
320321
     assert mailbox.steer("stay in runtime") is True
321322
 
322323
     mailbox.queue("double-check the current task")
324
+    mailbox.queue_ephemeral("show a lighter nudge")
323325
 
324326
     assert mailbox.drain() == [
325
-        "stay in runtime",
326
-        "double-check the current task",
327
+        SteeringDirective(content="stay in runtime"),
328
+        SteeringDirective(content="double-check the current task"),
329
+        SteeringDirective(content="show a lighter nudge", persist_to_model=False),
327330
     ]
328331
 
329332
     mailbox.mark_idle()
tests/test_tool_batches.pymodified
@@ -1104,9 +1104,9 @@ async def test_tool_batch_runner_queues_next_pending_todo_after_discovery_progre
11041104
     ) -> ActionVerification:
11051105
         raise AssertionError("Verification should not run for this scenario")
11061106
 
1107
-    reference = temp_dir / "fortran" / "index.html"
1107
+    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
11081108
     reference.parent.mkdir(parents=True)
1109
-    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
1109
+    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
11101110
 
11111111
     context = build_context(
11121112
         temp_dir=temp_dir,
@@ -1149,7 +1149,7 @@ async def test_tool_batch_runner_queues_next_pending_todo_after_discovery_progre
11491149
         [
11501150
             tool_outcome(
11511151
                 tool_call=tool_call,
1152
-                output="<h1>Fortran Beginner's Guide</h1>\n",
1152
+                output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
11531153
                 is_error=False,
11541154
             )
11551155
         ]
@@ -1329,9 +1329,9 @@ async def test_tool_batch_runner_successful_reference_read_prioritizes_concrete_
13291329
     chapter_one.write_text("<html></html>\n")
13301330
     index_path = guide_root / "index.html"
13311331
 
1332
-    reference = temp_dir / "Loader" / "guides" / "fortran" / "index.html"
1332
+    reference = temp_dir / "Loader" / "guides" / "fortran" / "chapters" / "01-introduction.html"
13331333
     reference.parent.mkdir(parents=True, exist_ok=True)
1334
-    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
1334
+    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
13351335
 
13361336
     implementation_plan = temp_dir / "implementation.md"
13371337
     implementation_plan.write_text(
@@ -1385,11 +1385,11 @@ async def test_tool_batch_runner_successful_reference_read_prioritizes_concrete_
13851385
         ],
13861386
     )
13871387
     tool_call = ToolCall(
1388
-        id="read-reference-index",
1388
+        id="read-reference-chapter",
13891389
         name="read",
13901390
         arguments={"file_path": str(reference)},
13911391
     )
1392
-    read_output = "Observation [read]: Result: <h1>Fortran Beginner's Guide</h1>\n"
1392
+    read_output = "Observation [read]: Result: <h1>Introduction</h1>\n<p>Guide cadence.</p>\n"
13931393
     executor = FakeExecutor(
13941394
         [
13951395
             ToolExecutionOutcome(
@@ -1798,9 +1798,9 @@ async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
17981798
     ) -> ActionVerification:
17991799
         raise AssertionError("Verification should not run for this scenario")
18001800
 
1801
-    reference = temp_dir / "fortran" / "index.html"
1801
+    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
18021802
     reference.parent.mkdir(parents=True)
1803
-    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
1803
+    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
18041804
 
18051805
     context = build_context(
18061806
         temp_dir=temp_dir,
@@ -1838,7 +1838,7 @@ async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
18381838
         [
18391839
             tool_outcome(
18401840
                 tool_call=tool_call,
1841
-                output="<h1>Fortran Beginner's Guide</h1>\n",
1841
+                output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
18421842
                 is_error=False,
18431843
             )
18441844
         ]
tests/test_turn_preamble.pymodified
@@ -84,6 +84,47 @@ async def test_turn_preamble_drains_steering_without_prefill_hint(
8484
     )
8585
 
8686
 
87
+@pytest.mark.asyncio
88
+async def test_turn_preamble_keeps_ephemeral_steering_out_of_model_history(
89
+    temp_dir: Path,
90
+) -> None:
91
+    backend = ScriptedBackend()
92
+    agent = Agent(
93
+        backend=backend,
94
+        config=non_streaming_config(),
95
+        project_root=temp_dir,
96
+    )
97
+    runtime = ConversationRuntime(agent)
98
+
99
+    prepared, events, capture = await _prepare_runtime(
100
+        runtime,
101
+        task="Create a README for the runtime controller.",
102
+    )
103
+    agent.messages.append(Message(role=Role.USER, content=prepared.task))
104
+    agent.queue_ephemeral_steering_message("Create 01-introduction.html now.")
105
+
106
+    decision = await runtime.turn_preamble.prepare_iteration(
107
+        task=prepared.task,
108
+        original_task=None,
109
+        iterations=1,
110
+        dod=prepared.definition_of_done,
111
+        emit=capture,
112
+        summary=prepared.summary,
113
+        on_user_question=None,
114
+        executor=prepared.executor,
115
+    )
116
+
117
+    assert not decision.should_continue
118
+    assert not any(
119
+        message.content == "[USER INTERRUPTION]: Create 01-introduction.html now."
120
+        for message in agent.session.messages
121
+    )
122
+    assert any(
123
+        event.type == "steering" and event.content == "Create 01-introduction.html now."
124
+        for event in events
125
+    )
126
+
127
+
87128
 @pytest.mark.asyncio
88129
 async def test_turn_preamble_skips_iteration_when_recovery_refreshes(
89130
     temp_dir: Path,