Lighten steering handoffs
- SHA
e0ebfab39e1deecba3bf571f8ac4ce94df997642- Parents
-
c3b0cbc - Tree
f28534a
e0ebfab
e0ebfab39e1deecba3bf571f8ac4ce94df997642c3b0cbc
f28534a| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/agent/loop.py
|
7 | 1 |
| M |
src/loader/runtime/bootstrap.py
|
15 | 3 |
| M |
src/loader/runtime/context.py
|
13 | 2 |
| M |
src/loader/runtime/finalization.py
|
40 | 0 |
| M |
src/loader/runtime/public_shell.py
|
13 | 4 |
| M |
src/loader/runtime/runtime_handle.py
|
7 | 1 |
| A |
src/loader/runtime/steering.py
|
13 | 0 |
| M |
src/loader/runtime/tool_batches.py
|
14 | 14 |
| M |
src/loader/runtime/turn_preamble.py
|
5 | 3 |
| M |
tests/test_finalization.py
|
37 | 0 |
| M |
tests/test_runtime_context.py
|
4 | 1 |
| M |
tests/test_runtime_handle.py
|
4 | 1 |
| M |
tests/test_runtime_public_shell.py
|
5 | 2 |
| M |
tests/test_tool_batches.py
|
10 | 10 |
| M |
tests/test_turn_preamble.py
|
41 | 0 |
src/loader/agent/loop.pymodified@@ -39,6 +39,7 @@ from ..runtime.public_shell import ( | ||
| 39 | 39 | stream_runtime_shell, |
| 40 | 40 | ) |
| 41 | 41 | from ..runtime.safeguards import RuntimeSafeguards |
| 42 | +from ..runtime.steering import SteeringDirective | |
| 42 | 43 | from ..runtime.workflow import WorkflowMode |
| 43 | 44 | from ..tools.base import ToolRegistry, create_default_registry |
| 44 | 45 | |
@@ -217,7 +218,12 @@ class Agent: | ||
| 217 | 218 | |
| 218 | 219 | self.steering.queue(message) |
| 219 | 220 | |
| 220 | - def drain_steering_messages(self) -> list[str]: | |
| 221 | + def queue_ephemeral_steering_message(self, message: str) -> None: | |
| 222 | + """Queue one UI-only runtime steering message.""" | |
| 223 | + | |
| 224 | + self.steering.queue_ephemeral(message) | |
| 225 | + | |
| 226 | + def drain_steering_messages(self) -> list[SteeringDirective]: | |
| 221 | 227 | """Drain queued runtime steering messages.""" |
| 222 | 228 | |
| 223 | 229 | return self.steering.drain() |
src/loader/runtime/bootstrap.pymodified@@ -21,6 +21,7 @@ from .owner_metadata import build_runtime_owner_metadata | ||
| 21 | 21 | from .permissions import PermissionConfigStatus, PermissionPolicy |
| 22 | 22 | from .reasoning_service import RuntimeReasoningService |
| 23 | 23 | from .session import ConversationSession |
| 24 | +from .steering import SteeringDirective | |
| 24 | 25 | |
| 25 | 26 | |
| 26 | 27 | class RuntimeBootstrapSource(Protocol): |
@@ -48,7 +49,10 @@ class RuntimeBootstrapSource(Protocol): | ||
| 48 | 49 | def queue_steering_message(self, message: str) -> None: |
| 49 | 50 | """Queue one steering message for the runtime.""" |
| 50 | 51 | |
| 51 | - def drain_steering_messages(self) -> list[str]: | |
| 52 | + def queue_ephemeral_steering_message(self, message: str) -> None: | |
| 53 | + """Queue one UI-only steering message for the runtime.""" | |
| 54 | + | |
| 55 | + def drain_steering_messages(self) -> list[SteeringDirective]: | |
| 52 | 56 | """Drain queued steering messages.""" |
| 53 | 57 | |
| 54 | 58 | def refresh_capability_profile(self) -> None: |
@@ -78,7 +82,8 @@ class RuntimeBootstrapView: | ||
| 78 | 82 | _get_prompt_format: Callable[[], str | None] |
| 79 | 83 | _get_prompt_sections: Callable[[], list[str]] |
| 80 | 84 | _queue_steering_message: Callable[[str], None] |
| 81 | - _drain_steering_messages: Callable[[], list[str]] | |
| 85 | + _queue_ephemeral_steering_message: Callable[[str], None] | |
| 86 | + _drain_steering_messages: Callable[[], list[SteeringDirective]] | |
| 82 | 87 | _refresh_capability_profile: Callable[[], None] |
| 83 | 88 | metadata: dict[str, Any] = field(default_factory=dict) |
| 84 | 89 | |
@@ -136,7 +141,12 @@ class RuntimeBootstrapView: | ||
| 136 | 141 | |
| 137 | 142 | self._queue_steering_message(message) |
| 138 | 143 | |
| 139 | - def drain_steering_messages(self) -> list[str]: | |
| 144 | + def queue_ephemeral_steering_message(self, message: str) -> None: | |
| 145 | + """Queue one UI-only steering message through the public shell callback.""" | |
| 146 | + | |
| 147 | + self._queue_ephemeral_steering_message(message) | |
| 148 | + | |
| 149 | + def drain_steering_messages(self) -> list[SteeringDirective]: | |
| 140 | 150 | """Drain steering messages through the public shell callback.""" |
| 141 | 151 | |
| 142 | 152 | return self._drain_steering_messages() |
@@ -173,6 +183,7 @@ def build_runtime_bootstrap_source(source: RuntimeBootstrapSource | Any) -> Runt | ||
| 173 | 183 | _get_prompt_format=lambda: source.prompt_format, |
| 174 | 184 | _get_prompt_sections=lambda: list(source.prompt_sections), |
| 175 | 185 | _queue_steering_message=source.queue_steering_message, |
| 186 | + _queue_ephemeral_steering_message=source.queue_ephemeral_steering_message, | |
| 176 | 187 | _drain_steering_messages=source.drain_steering_messages, |
| 177 | 188 | _refresh_capability_profile=source.refresh_capability_profile, |
| 178 | 189 | metadata=build_runtime_owner_metadata(source), |
@@ -234,6 +245,7 @@ def build_runtime_context(source: RuntimeBootstrapSource) -> RuntimeContext: | ||
| 234 | 245 | set_workflow_mode_callback=_set_workflow_mode, |
| 235 | 246 | drain_steering_messages_callback=source.drain_steering_messages, |
| 236 | 247 | queue_steering_message_callback=source.queue_steering_message, |
| 248 | + queue_ephemeral_steering_message_callback=source.queue_ephemeral_steering_message, | |
| 237 | 249 | refresh_capability_profile_callback=_refresh_capability_profile, |
| 238 | 250 | ) |
| 239 | 251 | return context |
src/loader/runtime/context.pymodified@@ -15,6 +15,7 @@ from .permissions import PermissionConfigStatus, PermissionPolicy | ||
| 15 | 15 | from .reasoning_types import ActionVerification, ConfidenceAssessment |
| 16 | 16 | from .recovery import RecoveryContext |
| 17 | 17 | from .session import ConversationSession |
| 18 | +from .steering import SteeringDirective | |
| 18 | 19 | |
| 19 | 20 | |
| 20 | 21 | class ReasoningConfigProtocol(Protocol): |
@@ -120,8 +121,9 @@ class RuntimeContext: | ||
| 120 | 121 | prompt_format: str | None = None |
| 121 | 122 | prompt_sections: list[str] = field(default_factory=list) |
| 122 | 123 | set_workflow_mode_callback: Callable[[str], None] | None = None |
| 123 | - drain_steering_messages_callback: Callable[[], list[str]] | None = None | |
| 124 | + drain_steering_messages_callback: Callable[[], list[SteeringDirective]] | None = None | |
| 124 | 125 | queue_steering_message_callback: Callable[[str], None] | None = None |
| 126 | + queue_ephemeral_steering_message_callback: Callable[[str], None] | None = None | |
| 125 | 127 | refresh_capability_profile_callback: Callable[[], None] | None = None |
| 126 | 128 | |
| 127 | 129 | @property |
@@ -157,7 +159,7 @@ class RuntimeContext: | ||
| 157 | 159 | self.set_workflow_mode_callback(workflow_mode) |
| 158 | 160 | self.workflow_mode = workflow_mode |
| 159 | 161 | |
| 160 | - def drain_steering_messages(self) -> list[str]: | |
| 162 | + def drain_steering_messages(self) -> list[SteeringDirective]: | |
| 161 | 163 | """Drain pending steering messages through the runtime control seam.""" |
| 162 | 164 | |
| 163 | 165 | if self.drain_steering_messages_callback is None: |
@@ -171,6 +173,15 @@ class RuntimeContext: | ||
| 171 | 173 | return |
| 172 | 174 | self.queue_steering_message_callback(message) |
| 173 | 175 | |
| 176 | + def queue_ephemeral_steering_message(self, message: str) -> None: | |
| 177 | + """Queue a UI-visible steering message without forcing model persistence.""" | |
| 178 | + | |
| 179 | + if self.queue_ephemeral_steering_message_callback is not None: | |
| 180 | + self.queue_ephemeral_steering_message_callback(message) | |
| 181 | + return | |
| 182 | + if self.queue_steering_message_callback is not None: | |
| 183 | + self.queue_steering_message_callback(message) | |
| 184 | + | |
| 174 | 185 | def refresh_capability_profile(self) -> None: |
| 175 | 186 | """Refresh the resolved capability profile through the runtime control seam.""" |
| 176 | 187 | |
src/loader/runtime/finalization.pymodified@@ -118,6 +118,16 @@ class TurnFinalizer: | ||
| 118 | 118 | |
| 119 | 119 | mutating_paths = [path for path in dod.touched_files if path] |
| 120 | 120 | requires_verification = bool(mutating_paths or dod.mutating_actions) |
| 121 | + if ( | |
| 122 | + tracked_pending_items | |
| 123 | + and not requires_verification | |
| 124 | + and _response_declares_no_mutation_needed(candidate_response) | |
| 125 | + ): | |
| 126 | + tracked_pending_items = [ | |
| 127 | + item | |
| 128 | + for item in tracked_pending_items | |
| 129 | + if not _is_task_restatement_pending_item(item, dod.task_statement) | |
| 130 | + ] | |
| 121 | 131 | rlog = get_runtime_logger() |
| 122 | 132 | rlog.completion_check( |
| 123 | 133 | "dod_gate", |
@@ -1084,6 +1094,36 @@ def _verification_state_signature(dod: DefinitionOfDone) -> str: | ||
| 1084 | 1094 | ) |
| 1085 | 1095 | |
| 1086 | 1096 | |
| 1097 | +def _normalize_pending_statement(value: str) -> str: | |
| 1098 | + return " ".join(value.strip().lower().split()) | |
| 1099 | + | |
| 1100 | + | |
| 1101 | +def _is_task_restatement_pending_item(item: str, task_statement: str) -> bool: | |
| 1102 | + normalized_item = _normalize_pending_statement(item) | |
| 1103 | + normalized_task = _normalize_pending_statement(task_statement) | |
| 1104 | + return bool(normalized_item and normalized_item == normalized_task) | |
| 1105 | + | |
| 1106 | + | |
| 1107 | +def _response_declares_no_mutation_needed(candidate_response: str) -> bool: | |
| 1108 | + lowered = candidate_response.lower() | |
| 1109 | + return any( | |
| 1110 | + phrase in lowered | |
| 1111 | + for phrase in ( | |
| 1112 | + "already correct", | |
| 1113 | + "already up to date", | |
| 1114 | + "already matches", | |
| 1115 | + "already complete", | |
| 1116 | + "no edit is needed", | |
| 1117 | + "no edits are needed", | |
| 1118 | + "no change is needed", | |
| 1119 | + "no changes are needed", | |
| 1120 | + "nothing to change", | |
| 1121 | + "no update is needed", | |
| 1122 | + "no updates are needed", | |
| 1123 | + ) | |
| 1124 | + ) | |
| 1125 | + | |
| 1126 | + | |
| 1087 | 1127 | def _build_verification_repair_guidance( |
| 1088 | 1128 | dod: DefinitionOfDone, |
| 1089 | 1129 | *, |
src/loader/runtime/public_shell.pymodified@@ -24,6 +24,7 @@ from .permissions import PermissionConfigStatus, PermissionMode, PermissionPolic | ||
| 24 | 24 | from .prompt_history import PromptSnapshot |
| 25 | 25 | from .prompting import build_system_prompt_result |
| 26 | 26 | from .session import ConversationSession |
| 27 | +from .steering import SteeringDirective | |
| 27 | 28 | |
| 28 | 29 | |
| 29 | 30 | @dataclass(slots=True) |
@@ -70,7 +71,7 @@ class SteeringMailbox: | ||
| 70 | 71 | """Small public-shell owner for steering and running-state bookkeeping.""" |
| 71 | 72 | |
| 72 | 73 | def __init__(self) -> None: |
| 73 | - self._pending: deque[str] = deque() | |
| 74 | + self._pending: deque[SteeringDirective] = deque() | |
| 74 | 75 | self._is_running = False |
| 75 | 76 | |
| 76 | 77 | @property |
@@ -100,9 +101,14 @@ class SteeringMailbox: | ||
| 100 | 101 | def queue(self, message: str) -> None: |
| 101 | 102 | """Queue one steering message regardless of running state.""" |
| 102 | 103 | |
| 103 | - self._pending.append(message) | |
| 104 | + self._pending.append(SteeringDirective(content=message, persist_to_model=True)) | |
| 104 | 105 | |
| 105 | - def drain(self) -> list[str]: | |
| 106 | + def queue_ephemeral(self, message: str) -> None: | |
| 107 | + """Queue one UI-only steering message regardless of running state.""" | |
| 108 | + | |
| 109 | + self._pending.append(SteeringDirective(content=message, persist_to_model=False)) | |
| 110 | + | |
| 111 | + def drain(self) -> list[SteeringDirective]: | |
| 106 | 112 | """Drain all pending steering messages in FIFO order.""" |
| 107 | 113 | |
| 108 | 114 | drained = list(self._pending) |
@@ -154,7 +160,10 @@ class RuntimeShellOwner(Protocol): | ||
| 154 | 160 | def queue_steering_message(self, message: str) -> None: |
| 155 | 161 | """Queue one steering message for the runtime.""" |
| 156 | 162 | |
| 157 | - def drain_steering_messages(self) -> list[str]: | |
| 163 | + def queue_ephemeral_steering_message(self, message: str) -> None: | |
| 164 | + """Queue one UI-only steering message for the runtime.""" | |
| 165 | + | |
| 166 | + def drain_steering_messages(self) -> list[SteeringDirective]: | |
| 158 | 167 | """Drain queued steering messages.""" |
| 159 | 168 | |
| 160 | 169 | def refresh_capability_profile(self) -> None: |
src/loader/runtime/runtime_handle.pymodified@@ -32,6 +32,7 @@ from .public_shell import ( | ||
| 32 | 32 | set_runtime_shell_workflow_mode, |
| 33 | 33 | stream_runtime_shell, |
| 34 | 34 | ) |
| 35 | +from .steering import SteeringDirective | |
| 35 | 36 | from .workflow import WorkflowMode |
| 36 | 37 | |
| 37 | 38 | |
@@ -214,7 +215,12 @@ class RuntimeHandle: | ||
| 214 | 215 | |
| 215 | 216 | self.steering.queue(message) |
| 216 | 217 | |
| 217 | - def drain_steering_messages(self) -> list[str]: | |
| 218 | + def queue_ephemeral_steering_message(self, message: str) -> None: | |
| 219 | + """Queue one UI-only runtime steering message.""" | |
| 220 | + | |
| 221 | + self.steering.queue_ephemeral(message) | |
| 222 | + | |
| 223 | + def drain_steering_messages(self) -> list[SteeringDirective]: | |
| 218 | 224 | """Drain queued runtime steering messages.""" |
| 219 | 225 | |
| 220 | 226 | return self.steering.drain() |
src/loader/runtime/steering.pyadded@@ -0,0 +1,13 @@ | ||
| 1 | +"""Steering-message payloads shared across runtime seams.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +from dataclasses import dataclass | |
| 6 | + | |
| 7 | + | |
| 8 | +@dataclass(frozen=True, slots=True) | |
| 9 | +class SteeringDirective: | |
| 10 | + """One queued steering message plus persistence policy.""" | |
| 11 | + | |
| 12 | + content: str | |
| 13 | + persist_to_model: bool = True | |
src/loader/runtime/tool_batches.pymodified@@ -820,13 +820,13 @@ class ToolBatchRunner: | ||
| 820 | 820 | messages=list(getattr(self.context.session, "messages", []) or []), |
| 821 | 821 | ) |
| 822 | 822 | if compact_handoff: |
| 823 | - self.context.queue_steering_message( | |
| 823 | + self.context.queue_ephemeral_steering_message( | |
| 824 | 824 | f"Confirmed progress: `{completed_label}` is now satisfied by the successful " |
| 825 | 825 | f"`{tool_call.name}` result. {compact_handoff}" |
| 826 | 826 | " Do not reread reference material or spend the next turn on bookkeeping." |
| 827 | 827 | ) |
| 828 | 828 | return |
| 829 | - self.context.queue_steering_message( | |
| 829 | + self.context.queue_ephemeral_steering_message( | |
| 830 | 830 | f"Confirmed progress: `{completed_label}` is now satisfied by the successful " |
| 831 | 831 | f"`{tool_call.name}` result. One declared output artifact is still missing." |
| 832 | 832 | + _missing_artifact_resume_suffix( |
@@ -851,7 +851,7 @@ class ToolBatchRunner: | ||
| 851 | 851 | "more reference material and perform the change now." |
| 852 | 852 | ) |
| 853 | 853 | |
| 854 | - self.context.queue_steering_message( | |
| 854 | + self.context.queue_ephemeral_steering_message( | |
| 855 | 855 | f"Confirmed progress: `{completed_label}` is now satisfied by the successful " |
| 856 | 856 | f"`{tool_call.name}` result. Continue with the next pending item: " |
| 857 | 857 | f"`{next_pending}` instead of rereading the same evidence.{mutation_suffix}" |
@@ -944,7 +944,7 @@ class ToolBatchRunner: | ||
| 944 | 944 | messages=list(getattr(self.context.session, "messages", []) or []), |
| 945 | 945 | ) |
| 946 | 946 | if compact_handoff: |
| 947 | - self.context.queue_steering_message( | |
| 947 | + self.context.queue_ephemeral_steering_message( | |
| 948 | 948 | f"Confirmed progress: {current_label} is now recorded. " |
| 949 | 949 | + compact_handoff |
| 950 | 950 | + " Do not reread reference material or spend the next turn on bookkeeping." |
@@ -954,7 +954,7 @@ class ToolBatchRunner: | ||
| 954 | 954 | dod, |
| 955 | 955 | project_root=self.context.project_root, |
| 956 | 956 | ): |
| 957 | - self.context.queue_steering_message( | |
| 957 | + self.context.queue_ephemeral_steering_message( | |
| 958 | 958 | f"Confirmed progress: {current_label} is now recorded." |
| 959 | 959 | + _missing_artifact_resume_suffix( |
| 960 | 960 | missing_artifact, |
@@ -964,7 +964,7 @@ class ToolBatchRunner: | ||
| 964 | 964 | + " No TodoWrite, no verification, no rereads until that artifact exists." |
| 965 | 965 | ) |
| 966 | 966 | return |
| 967 | - self.context.queue_steering_message( | |
| 967 | + self.context.queue_ephemeral_steering_message( | |
| 968 | 968 | f"Confirmed progress: {current_label} is now recorded." |
| 969 | 969 | " One declared output artifact is still missing." |
| 970 | 970 | + _missing_artifact_resume_suffix( |
@@ -1020,9 +1020,9 @@ class ToolBatchRunner: | ||
| 1020 | 1020 | "Perform the mutation now instead of spending another turn on " |
| 1021 | 1021 | "planning, rereads, or verification." |
| 1022 | 1022 | ) |
| 1023 | - self.context.queue_steering_message(concrete_message) | |
| 1023 | + self.context.queue_ephemeral_steering_message(concrete_message) | |
| 1024 | 1024 | return |
| 1025 | - self.context.queue_steering_message( | |
| 1025 | + self.context.queue_ephemeral_steering_message( | |
| 1026 | 1026 | "Todo tracking is updated. Continue with the next pending item: " |
| 1027 | 1027 | f"`{next_pending}`. Use the current output files as the source of " |
| 1028 | 1028 | "truth, and do not reopen reference materials unless one specific " |
@@ -1040,7 +1040,7 @@ class ToolBatchRunner: | ||
| 1040 | 1040 | project_root=self.context.project_root, |
| 1041 | 1041 | ) |
| 1042 | 1042 | ): |
| 1043 | - self.context.queue_steering_message( | |
| 1043 | + self.context.queue_ephemeral_steering_message( | |
| 1044 | 1044 | "Todo tracking is updated. Continue with the next pending item: " |
| 1045 | 1045 | f"`{next_pending}`. Use the current output files as the source of " |
| 1046 | 1046 | "truth, and do not reopen reference materials unless one specific " |
@@ -1063,7 +1063,7 @@ class ToolBatchRunner: | ||
| 1063 | 1063 | if verification_commands |
| 1064 | 1064 | else " Finish the targeted consistency pass without reopening reference materials." |
| 1065 | 1065 | ) |
| 1066 | - self.context.queue_steering_message( | |
| 1066 | + self.context.queue_ephemeral_steering_message( | |
| 1067 | 1067 | "Todo tracking is updated. All explicitly planned artifacts now exist. " |
| 1068 | 1068 | f"Continue with the next pending item: `{next_pending}`. " |
| 1069 | 1069 | "Use the current output files as the source of truth, and do not restart " |
@@ -1077,7 +1077,7 @@ class ToolBatchRunner: | ||
| 1077 | 1077 | if verification_commands |
| 1078 | 1078 | else " Finish the task using the files already on disk." |
| 1079 | 1079 | ) |
| 1080 | - self.context.queue_steering_message( | |
| 1080 | + self.context.queue_ephemeral_steering_message( | |
| 1081 | 1081 | "Todo tracking is updated. All explicitly planned artifacts now exist. " |
| 1082 | 1082 | "Do not restart discovery, reopen reference materials, or spend another turn " |
| 1083 | 1083 | "on TodoWrite alone." |
@@ -1094,7 +1094,7 @@ class ToolBatchRunner: | ||
| 1094 | 1094 | if next_pending |
| 1095 | 1095 | else "" |
| 1096 | 1096 | ) |
| 1097 | - self.context.queue_steering_message( | |
| 1097 | + self.context.queue_ephemeral_steering_message( | |
| 1098 | 1098 | "Todo tracking is updated. A declared output artifact is still missing." |
| 1099 | 1099 | + next_pending_suffix |
| 1100 | 1100 | + _missing_artifact_resume_suffix( |
@@ -1151,7 +1151,7 @@ class ToolBatchRunner: | ||
| 1151 | 1151 | project_root=self.context.project_root, |
| 1152 | 1152 | ) |
| 1153 | 1153 | ): |
| 1154 | - self.context.queue_steering_message( | |
| 1154 | + self.context.queue_ephemeral_steering_message( | |
| 1155 | 1155 | "Bookkeeping note is recorded. Continue with the next pending item: " |
| 1156 | 1156 | f"`{next_pending}`. Make your next response one concrete evidence-gathering " |
| 1157 | 1157 | "tool call that advances that step, not another bookkeeping-only turn." |
@@ -1161,7 +1161,7 @@ class ToolBatchRunner: | ||
| 1161 | 1161 | ) |
| 1162 | 1162 | return |
| 1163 | 1163 | |
| 1164 | - self.context.queue_steering_message( | |
| 1164 | + self.context.queue_ephemeral_steering_message( | |
| 1165 | 1165 | "Bookkeeping note is recorded. A declared output artifact is still missing." |
| 1166 | 1166 | + _missing_artifact_resume_suffix( |
| 1167 | 1167 | missing_artifact, |
src/loader/runtime/turn_preamble.pymodified@@ -55,12 +55,14 @@ class TurnPreludeController: | ||
| 55 | 55 | self.tracer.record("turn.iteration_started", iteration=iterations) |
| 56 | 56 | |
| 57 | 57 | steering_messages = self.context.drain_steering_messages() |
| 58 | - for steering_message in steering_messages: | |
| 59 | - await emit(AgentEvent(type="steering", content=steering_message)) | |
| 58 | + for directive in steering_messages: | |
| 59 | + await emit(AgentEvent(type="steering", content=directive.content)) | |
| 60 | + if not directive.persist_to_model: | |
| 61 | + continue | |
| 60 | 62 | self.context.session.append( |
| 61 | 63 | Message( |
| 62 | 64 | role=Role.USER, |
| 63 | - content=f"[USER INTERRUPTION]: {steering_message}", | |
| 65 | + content=f"[USER INTERRUPTION]: {directive.content}", | |
| 64 | 66 | ) |
| 65 | 67 | ) |
| 66 | 68 | |
tests/test_finalization.pymodified@@ -390,6 +390,43 @@ async def test_turn_finalizer_records_skipped_verification_observation( | ||
| 390 | 390 | assert any(event.type == "dod_status" and event.dod_status == "done" for event in events) |
| 391 | 391 | |
| 392 | 392 | |
| 393 | +@pytest.mark.asyncio | |
| 394 | +async def test_turn_finalizer_accepts_noop_completion_with_task_restatement_todo( | |
| 395 | + temp_dir: Path, | |
| 396 | +) -> None: | |
| 397 | + session = FakeSession() | |
| 398 | + context = build_context(temp_dir, session) | |
| 399 | + finalizer = TurnFinalizer( | |
| 400 | + context, | |
| 401 | + RuntimeTracer(), | |
| 402 | + DefinitionOfDoneStore(temp_dir), | |
| 403 | + set_workflow_mode=_noop_set_workflow_mode, | |
| 404 | + ) | |
| 405 | + task = ( | |
| 406 | + "Have a look at ~/Loader/guides/fortran/index.html, then " | |
| 407 | + "~/Loader/guides/fortran/chapters. The table of contents links in " | |
| 408 | + "index.html are inaccurate and the href’s are wrong. Let’s update the " | |
| 409 | + "links and their link texts to be correct." | |
| 410 | + ) | |
| 411 | + dod = create_definition_of_done(task) | |
| 412 | + dod.pending_items = [task, "Complete the requested work"] | |
| 413 | + summary = TurnSummary(final_response="") | |
| 414 | + | |
| 415 | + async def capture(event) -> None: | |
| 416 | + return None | |
| 417 | + | |
| 418 | + result = await finalizer.run_definition_of_done_gate( | |
| 419 | + dod=dod, | |
| 420 | + candidate_response="The table of contents is already correct, so no edit is needed.", | |
| 421 | + emit=capture, | |
| 422 | + summary=summary, | |
| 423 | + executor=FakeExecutor([]), # type: ignore[arg-type] | |
| 424 | + ) | |
| 425 | + | |
| 426 | + assert result.should_continue is False | |
| 427 | + assert result.reason_code == "non_mutating_response_accepted" | |
| 428 | + | |
| 429 | + | |
| 393 | 430 | @pytest.mark.asyncio |
| 394 | 431 | async def test_turn_finalizer_records_passed_verification_observation( |
| 395 | 432 | temp_dir: Path, |
tests/test_runtime_context.pymodified@@ -8,6 +8,7 @@ from loader.agent.loop import Agent, AgentConfig | ||
| 8 | 8 | from loader.runtime.bootstrap import build_runtime_bootstrap_source, build_runtime_context |
| 9 | 9 | from loader.runtime.context import RuntimeContext |
| 10 | 10 | from loader.runtime.recovery import RecoveryContext |
| 11 | +from loader.runtime.steering import SteeringDirective | |
| 11 | 12 | from tests.helpers.runtime_harness import ScriptedBackend |
| 12 | 13 | |
| 13 | 14 | |
@@ -53,7 +54,9 @@ def test_runtime_context_control_callbacks_stay_in_sync(temp_dir: Path) -> None: | ||
| 53 | 54 | context = build_runtime_context(source) |
| 54 | 55 | context.queue_steering_message("Re-check the current task.") |
| 55 | 56 | |
| 56 | - assert context.drain_steering_messages() == ["Re-check the current task."] | |
| 57 | + assert context.drain_steering_messages() == [ | |
| 58 | + SteeringDirective(content="Re-check the current task.") | |
| 59 | + ] | |
| 57 | 60 | |
| 58 | 61 | context.set_workflow_mode("clarify") |
| 59 | 62 | assert agent.workflow_mode == "clarify" |
tests/test_runtime_handle.pymodified@@ -12,6 +12,7 @@ from loader.runtime.bootstrap import RuntimeBootstrapView, build_runtime_context | ||
| 12 | 12 | from loader.runtime.conversation import ConversationRuntime |
| 13 | 13 | from loader.runtime.launcher import RuntimeLauncher, build_runtime_launcher |
| 14 | 14 | from loader.runtime.runtime_handle import RuntimeHandle |
| 15 | +from loader.runtime.steering import SteeringDirective | |
| 15 | 16 | from tests.helpers.runtime_harness import ScriptedBackend, run_explore_scenario, run_scenario |
| 16 | 17 | |
| 17 | 18 | |
@@ -180,4 +181,6 @@ def test_runtime_handle_exposes_public_shell_steering_contract( | ||
| 180 | 181 | |
| 181 | 182 | assert handle.is_running is True |
| 182 | 183 | assert handle.steer("stay in runtime") is True |
| 183 | - assert handle.drain_steering_messages() == ["stay in runtime"] | |
| 184 | + assert handle.drain_steering_messages() == [ | |
| 185 | + SteeringDirective(content="stay in runtime") | |
| 186 | + ] | |
tests/test_runtime_public_shell.pymodified@@ -37,6 +37,7 @@ from loader.runtime.public_shell import ( | ||
| 37 | 37 | ) |
| 38 | 38 | from loader.runtime.runtime_handle import RuntimeHandle |
| 39 | 39 | from loader.runtime.session import ConversationSession |
| 40 | +from loader.runtime.steering import SteeringDirective | |
| 40 | 41 | from tests.helpers.runtime_harness import ScriptedBackend |
| 41 | 42 | |
| 42 | 43 | |
@@ -320,10 +321,12 @@ def test_steering_mailbox_tracks_running_state_and_fifo_messages() -> None: | ||
| 320 | 321 | assert mailbox.steer("stay in runtime") is True |
| 321 | 322 | |
| 322 | 323 | mailbox.queue("double-check the current task") |
| 324 | + mailbox.queue_ephemeral("show a lighter nudge") | |
| 323 | 325 | |
| 324 | 326 | assert mailbox.drain() == [ |
| 325 | - "stay in runtime", | |
| 326 | - "double-check the current task", | |
| 327 | + SteeringDirective(content="stay in runtime"), | |
| 328 | + SteeringDirective(content="double-check the current task"), | |
| 329 | + SteeringDirective(content="show a lighter nudge", persist_to_model=False), | |
| 327 | 330 | ] |
| 328 | 331 | |
| 329 | 332 | mailbox.mark_idle() |
tests/test_tool_batches.pymodified@@ -1104,9 +1104,9 @@ async def test_tool_batch_runner_queues_next_pending_todo_after_discovery_progre | ||
| 1104 | 1104 | ) -> ActionVerification: |
| 1105 | 1105 | raise AssertionError("Verification should not run for this scenario") |
| 1106 | 1106 | |
| 1107 | - reference = temp_dir / "fortran" / "index.html" | |
| 1107 | + reference = temp_dir / "fortran" / "chapters" / "01-introduction.html" | |
| 1108 | 1108 | reference.parent.mkdir(parents=True) |
| 1109 | - reference.write_text("<h1>Fortran Beginner's Guide</h1>\n") | |
| 1109 | + reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n") | |
| 1110 | 1110 | |
| 1111 | 1111 | context = build_context( |
| 1112 | 1112 | temp_dir=temp_dir, |
@@ -1149,7 +1149,7 @@ async def test_tool_batch_runner_queues_next_pending_todo_after_discovery_progre | ||
| 1149 | 1149 | [ |
| 1150 | 1150 | tool_outcome( |
| 1151 | 1151 | tool_call=tool_call, |
| 1152 | - output="<h1>Fortran Beginner's Guide</h1>\n", | |
| 1152 | + output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n", | |
| 1153 | 1153 | is_error=False, |
| 1154 | 1154 | ) |
| 1155 | 1155 | ] |
@@ -1329,9 +1329,9 @@ async def test_tool_batch_runner_successful_reference_read_prioritizes_concrete_ | ||
| 1329 | 1329 | chapter_one.write_text("<html></html>\n") |
| 1330 | 1330 | index_path = guide_root / "index.html" |
| 1331 | 1331 | |
| 1332 | - reference = temp_dir / "Loader" / "guides" / "fortran" / "index.html" | |
| 1332 | + reference = temp_dir / "Loader" / "guides" / "fortran" / "chapters" / "01-introduction.html" | |
| 1333 | 1333 | reference.parent.mkdir(parents=True, exist_ok=True) |
| 1334 | - reference.write_text("<h1>Fortran Beginner's Guide</h1>\n") | |
| 1334 | + reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n") | |
| 1335 | 1335 | |
| 1336 | 1336 | implementation_plan = temp_dir / "implementation.md" |
| 1337 | 1337 | implementation_plan.write_text( |
@@ -1385,11 +1385,11 @@ async def test_tool_batch_runner_successful_reference_read_prioritizes_concrete_ | ||
| 1385 | 1385 | ], |
| 1386 | 1386 | ) |
| 1387 | 1387 | tool_call = ToolCall( |
| 1388 | - id="read-reference-index", | |
| 1388 | + id="read-reference-chapter", | |
| 1389 | 1389 | name="read", |
| 1390 | 1390 | arguments={"file_path": str(reference)}, |
| 1391 | 1391 | ) |
| 1392 | - read_output = "Observation [read]: Result: <h1>Fortran Beginner's Guide</h1>\n" | |
| 1392 | + read_output = "Observation [read]: Result: <h1>Introduction</h1>\n<p>Guide cadence.</p>\n" | |
| 1393 | 1393 | executor = FakeExecutor( |
| 1394 | 1394 | [ |
| 1395 | 1395 | ToolExecutionOutcome( |
@@ -1798,9 +1798,9 @@ async def test_tool_batch_runner_observation_handoff_pushes_mutation_step( | ||
| 1798 | 1798 | ) -> ActionVerification: |
| 1799 | 1799 | raise AssertionError("Verification should not run for this scenario") |
| 1800 | 1800 | |
| 1801 | - reference = temp_dir / "fortran" / "index.html" | |
| 1801 | + reference = temp_dir / "fortran" / "chapters" / "01-introduction.html" | |
| 1802 | 1802 | reference.parent.mkdir(parents=True) |
| 1803 | - reference.write_text("<h1>Fortran Beginner's Guide</h1>\n") | |
| 1803 | + reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n") | |
| 1804 | 1804 | |
| 1805 | 1805 | context = build_context( |
| 1806 | 1806 | temp_dir=temp_dir, |
@@ -1838,7 +1838,7 @@ async def test_tool_batch_runner_observation_handoff_pushes_mutation_step( | ||
| 1838 | 1838 | [ |
| 1839 | 1839 | tool_outcome( |
| 1840 | 1840 | tool_call=tool_call, |
| 1841 | - output="<h1>Fortran Beginner's Guide</h1>\n", | |
| 1841 | + output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n", | |
| 1842 | 1842 | is_error=False, |
| 1843 | 1843 | ) |
| 1844 | 1844 | ] |
tests/test_turn_preamble.pymodified@@ -84,6 +84,47 @@ async def test_turn_preamble_drains_steering_without_prefill_hint( | ||
| 84 | 84 | ) |
| 85 | 85 | |
| 86 | 86 | |
| 87 | +@pytest.mark.asyncio | |
| 88 | +async def test_turn_preamble_keeps_ephemeral_steering_out_of_model_history( | |
| 89 | + temp_dir: Path, | |
| 90 | +) -> None: | |
| 91 | + backend = ScriptedBackend() | |
| 92 | + agent = Agent( | |
| 93 | + backend=backend, | |
| 94 | + config=non_streaming_config(), | |
| 95 | + project_root=temp_dir, | |
| 96 | + ) | |
| 97 | + runtime = ConversationRuntime(agent) | |
| 98 | + | |
| 99 | + prepared, events, capture = await _prepare_runtime( | |
| 100 | + runtime, | |
| 101 | + task="Create a README for the runtime controller.", | |
| 102 | + ) | |
| 103 | + agent.messages.append(Message(role=Role.USER, content=prepared.task)) | |
| 104 | + agent.queue_ephemeral_steering_message("Create 01-introduction.html now.") | |
| 105 | + | |
| 106 | + decision = await runtime.turn_preamble.prepare_iteration( | |
| 107 | + task=prepared.task, | |
| 108 | + original_task=None, | |
| 109 | + iterations=1, | |
| 110 | + dod=prepared.definition_of_done, | |
| 111 | + emit=capture, | |
| 112 | + summary=prepared.summary, | |
| 113 | + on_user_question=None, | |
| 114 | + executor=prepared.executor, | |
| 115 | + ) | |
| 116 | + | |
| 117 | + assert not decision.should_continue | |
| 118 | + assert not any( | |
| 119 | + message.content == "[USER INTERRUPTION]: Create 01-introduction.html now." | |
| 120 | + for message in agent.session.messages | |
| 121 | + ) | |
| 122 | + assert any( | |
| 123 | + event.type == "steering" and event.content == "Create 01-introduction.html now." | |
| 124 | + for event in events | |
| 125 | + ) | |
| 126 | + | |
| 127 | + | |
| 87 | 128 | @pytest.mark.asyncio |
| 88 | 129 | async def test_turn_preamble_skips_iteration_when_recovery_refreshes( |
| 89 | 130 | temp_dir: Path, |