Python · 22474 bytes Raw Blame History
1 """Direct tests for runtime-owned public shell helpers."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6 from types import SimpleNamespace
7
8 import pytest
9
10 from loader.agent.loop import AgentConfig
11 from loader.llm.base import CompletionResponse, Message, Role, StreamChunk
12 from loader.runtime.capabilities import CapabilityProfile
13 from loader.runtime.completion_trace import CompletionTraceEntry
14 from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
15 from loader.runtime.public_shell import (
16 SteeringMailbox,
17 apply_runtime_session_install,
18 build_event_emitter,
19 build_fresh_runtime_session_install,
20 build_runtime_few_shot_examples,
21 build_runtime_system_message,
22 clear_runtime_shell_history,
23 create_runtime_session,
24 create_runtime_session_install,
25 get_runtime_shell_few_shot_examples,
26 get_runtime_shell_system_message,
27 load_runtime_session_install,
28 refresh_runtime_capability_state,
29 refresh_runtime_shell_capability_profile,
30 resolve_runtime_shell_use_react,
31 restore_runtime_session_state,
32 resume_runtime_shell_session,
33 run_runtime_shell,
34 run_runtime_shell_explore,
35 set_runtime_shell_workflow_mode,
36 stream_runtime_shell,
37 )
38 from loader.runtime.runtime_handle import RuntimeHandle
39 from loader.runtime.session import ConversationSession
40 from loader.runtime.steering import SteeringDirective
41 from tests.helpers.runtime_harness import ScriptedBackend
42
43
44 def _dummy_system() -> Message:
45 return Message(role=Role.SYSTEM, content="system")
46
47
48 def _dummy_few_shots() -> list[Message]:
49 return []
50
51
52 def _runtime_handle(
53 temp_dir: Path,
54 *,
55 backend: ScriptedBackend | None = None,
56 config: AgentConfig | None = None,
57 ) -> RuntimeHandle:
58 return RuntimeHandle(
59 backend=backend or ScriptedBackend(),
60 config=config or AgentConfig(auto_context=False),
61 project_root=temp_dir,
62 )
63
64
65 def test_create_runtime_session_copies_public_shell_state(temp_dir: Path) -> None:
66 handle = _runtime_handle(temp_dir)
67
68 session = create_runtime_session(
69 project_root=handle.project_root,
70 messages=handle.messages,
71 permission_policy=handle.permission_policy,
72 permission_config_status=handle.permission_config_status,
73 prompt_format="native",
74 prompt_sections=["Runtime Config", "Workflow Context"],
75 workflow_mode="execute",
76 runtime_owner_type="RuntimeHandle",
77 runtime_owner_path="runtime-handle",
78 rotate_after_bytes=handle.config.session_rotate_after_bytes,
79 auto_compaction_input_tokens_threshold=(
80 handle.config.session_auto_compaction_input_tokens_threshold
81 ),
82 compaction_keep_last_messages=handle.config.session_compaction_keep_last_messages,
83 system_message_factory=_dummy_system,
84 few_shot_factory=_dummy_few_shots,
85 )
86
87 assert session.permission_mode == handle.active_permission_mode
88 assert session.permission_prompting_enabled is handle.permission_policy.prompting_enabled
89 assert session.permission_rule_counts == handle.permission_policy.rule_counts()
90 assert session.runtime_owner_type == "RuntimeHandle"
91 assert session.runtime_owner_path == "runtime-handle"
92 assert session.prompt_format == "native"
93 assert session.prompt_sections == ["Runtime Config", "Workflow Context"]
94
95
96 def test_build_runtime_system_message_updates_session_metadata(temp_dir: Path) -> None:
97 handle = _runtime_handle(temp_dir)
98 session = ConversationSession(
99 system_message_factory=_dummy_system,
100 few_shot_factory=_dummy_few_shots,
101 project_root=temp_dir,
102 )
103
104 prompt_state = build_runtime_system_message(
105 registry=handle.registry,
106 use_react=False,
107 project_context=None,
108 workflow_mode="plan",
109 permission_mode="workspace-write",
110 cwd=temp_dir,
111 current_task="Plan the next runtime refactor.",
112 session=session,
113 )
114
115 assert prompt_state.prompt_format == "native"
116 assert prompt_state.system_message.role == Role.SYSTEM
117 assert "Plan Mode" in prompt_state.system_message.content
118 assert session.prompt_format == "native"
119 assert session.prompt_sections == prompt_state.prompt_sections
120 assert len(session.prompt_history) == 1
121 assert session.prompt_history[0].workflow_mode == "plan"
122 assert session.prompt_history[0].current_task == "Plan the next runtime refactor."
123
124
125 def test_build_runtime_few_shot_examples_switches_tool_format() -> None:
126 react_examples = build_runtime_few_shot_examples(use_react=True)
127 native_examples = build_runtime_few_shot_examples(use_react=False)
128
129 assert "<tool_call>" in react_examples[1].content
130 assert '"background": true' in react_examples[5].content
131 assert native_examples == []
132
133
134 def test_resolve_runtime_shell_use_react_respects_force_react_and_capabilities(
135 temp_dir: Path,
136 ) -> None:
137 handle = _runtime_handle(
138 temp_dir,
139 backend=ScriptedBackend(supports_native_tools=True),
140 )
141
142 assert resolve_runtime_shell_use_react(handle) is False
143 assert handle._use_react is False
144
145 forced = _runtime_handle(
146 temp_dir,
147 backend=ScriptedBackend(supports_native_tools=True),
148 config=AgentConfig(auto_context=False, force_react=True),
149 )
150
151 assert resolve_runtime_shell_use_react(forced) is True
152 assert forced._use_react is True
153
154
155 def test_get_runtime_shell_system_message_caches_prompt_state_on_owner(
156 temp_dir: Path,
157 ) -> None:
158 handle = _runtime_handle(temp_dir)
159
160 first = get_runtime_shell_system_message(handle)
161 second = get_runtime_shell_system_message(handle)
162
163 assert first is second
164 assert handle.prompt_format in {"native", "react"}
165 assert handle.prompt_sections
166 assert len(handle.session.prompt_history) == 1
167
168
169 def test_set_runtime_shell_workflow_mode_invalidates_prompt_cache(
170 temp_dir: Path,
171 ) -> None:
172 handle = _runtime_handle(temp_dir)
173 original = get_runtime_shell_system_message(handle)
174
175 set_runtime_shell_workflow_mode(handle, "plan")
176
177 assert handle.workflow_mode == "plan"
178 assert handle._system_message is None
179
180 updated = get_runtime_shell_system_message(handle)
181
182 assert updated is not original
183 assert handle.session.prompt_history[-1].workflow_mode == "plan"
184
185
186 def test_get_runtime_shell_few_shot_examples_uses_owner_prompt_mode(
187 temp_dir: Path,
188 ) -> None:
189 native_handle = _runtime_handle(
190 temp_dir,
191 backend=ScriptedBackend(supports_native_tools=True),
192 )
193 react_handle = _runtime_handle(
194 temp_dir,
195 backend=ScriptedBackend(supports_native_tools=True),
196 config=AgentConfig(auto_context=False, force_react=True),
197 )
198
199 native_examples = get_runtime_shell_few_shot_examples(native_handle)
200 react_examples = get_runtime_shell_few_shot_examples(react_handle)
201
202 assert native_examples == []
203 assert "<tool_call>" in react_examples[1].content
204
205
206 @pytest.mark.asyncio
207 async def test_build_event_emitter_supports_sync_and_async_callbacks() -> None:
208 seen: list[tuple[str, str]] = []
209
210 def on_event_sync(event) -> None:
211 seen.append(("sync", event.type))
212
213 async def on_event_async(event) -> None:
214 seen.append(("async", event.type))
215
216 sync_emit = build_event_emitter(on_event_sync)
217 async_emit = build_event_emitter(on_event_async)
218
219 await sync_emit(SimpleNamespace(type="response"))
220 await async_emit(SimpleNamespace(type="stream"))
221 await build_event_emitter(None)(SimpleNamespace(type="ignored"))
222
223 assert seen == [("sync", "response"), ("async", "stream")]
224
225
226 @pytest.mark.asyncio
227 async def test_run_runtime_shell_uses_runtime_launcher_entrypoint(
228 temp_dir: Path,
229 ) -> None:
230 handle = _runtime_handle(
231 temp_dir,
232 backend=ScriptedBackend(
233 completions=[CompletionResponse(content="Runtime shell reply.")]
234 ),
235 config=AgentConfig(auto_context=False, stream=False),
236 )
237 handle.config.reasoning.completion_check = False
238 events = []
239
240 async def capture(event) -> None:
241 events.append(event)
242
243 response = await run_runtime_shell(
244 handle,
245 "Summarize the runtime shell state.",
246 on_event=capture,
247 use_plan=False,
248 )
249
250 assert response == "Runtime shell reply."
251 assert handle.last_turn_summary is not None
252 assert handle.last_turn_summary.final_response == "Runtime shell reply."
253 assert handle.steering.is_running is False
254 assert any(event.type == "response" for event in events)
255
256
257 @pytest.mark.asyncio
258 async def test_stream_runtime_shell_yields_streamed_events(
259 temp_dir: Path,
260 ) -> None:
261 handle = _runtime_handle(
262 temp_dir,
263 backend=ScriptedBackend(
264 streams=[
265 [
266 StreamChunk(content="Quick ", is_done=False),
267 StreamChunk(
268 content="reply.",
269 full_content="Quick reply.",
270 is_done=True,
271 ),
272 ]
273 ]
274 ),
275 config=AgentConfig(auto_context=False),
276 )
277
278 events = [event async for event in stream_runtime_shell(handle, "thanks")]
279
280 assert any(event.type == "response" and event.content == "Quick reply." for event in events)
281 assert handle.steering.is_running is False
282
283
284 @pytest.mark.asyncio
285 async def test_run_runtime_shell_explore_updates_last_turn_summary(
286 temp_dir: Path,
287 ) -> None:
288 handle = _runtime_handle(
289 temp_dir,
290 backend=ScriptedBackend(
291 completions=[CompletionResponse(content="Quick repo summary.")]
292 ),
293 config=AgentConfig(auto_context=False, stream=False),
294 )
295 events = []
296
297 async def capture(event) -> None:
298 events.append(event)
299
300 response = await run_runtime_shell_explore(
301 handle,
302 "Give me a quick repo summary.",
303 on_event=capture,
304 )
305
306 assert response == "Quick repo summary."
307 assert handle.last_turn_summary is not None
308 assert handle.last_turn_summary.workflow_mode == "explore"
309 assert any(event.type == "response" for event in events)
310
311
312 def test_steering_mailbox_tracks_running_state_and_fifo_messages() -> None:
313 mailbox = SteeringMailbox()
314
315 assert mailbox.is_running is False
316 assert mailbox.steer("stay in runtime") is False
317 assert mailbox.drain() == []
318
319 mailbox.mark_running()
320
321 assert mailbox.steer("stay in runtime") is True
322
323 mailbox.queue("double-check the current task")
324 mailbox.queue_ephemeral("show a lighter nudge")
325
326 assert mailbox.drain() == [
327 SteeringDirective(content="stay in runtime"),
328 SteeringDirective(content="double-check the current task"),
329 SteeringDirective(content="show a lighter nudge", persist_to_model=False),
330 ]
331
332 mailbox.mark_idle()
333 assert mailbox.is_running is False
334
335 mailbox.mark_running()
336 mailbox.queue("stale message")
337 mailbox.clear()
338 assert mailbox.is_running is False
339 assert mailbox.drain() == []
340
341
342 def test_refresh_runtime_capability_state_reports_prompt_reset_requirement() -> None:
343 backend = ScriptedBackend(supports_native_tools=False)
344 current_profile = SimpleNamespace(supports_native_tools=True)
345
346 refresh = refresh_runtime_capability_state(
347 backend=backend,
348 current_profile=current_profile, # type: ignore[arg-type]
349 )
350
351 assert refresh.capability_profile.supports_native_tools is False
352 assert refresh.prompt_reset_required is True
353
354
355 def test_refresh_runtime_shell_capability_profile_updates_owner_cache_state(
356 temp_dir: Path,
357 ) -> None:
358 backend = ScriptedBackend(supports_native_tools=True)
359 handle = _runtime_handle(temp_dir, backend=backend)
360 handle._system_message = Message(role=Role.SYSTEM, content="cached")
361 handle._use_react = True
362 backend._supports_native_tools = False # type: ignore[attr-defined]
363
364 refresh = refresh_runtime_shell_capability_profile(handle)
365
366 assert refresh.capability_profile.supports_native_tools is False
367 assert refresh.prompt_reset_required is True
368 assert handle.capability_profile.supports_native_tools is False
369 assert handle._system_message is None
370 assert handle._use_react is None
371
372
373 def test_refresh_runtime_shell_capability_profile_reclamps_session_threshold(
374 temp_dir: Path,
375 ) -> None:
376 class ProfiledBackend(ScriptedBackend):
377 def __init__(self) -> None:
378 super().__init__(supports_native_tools=True)
379 self.context_window = 8192
380
381 def capability_profile(self) -> CapabilityProfile:
382 return CapabilityProfile(
383 model_name="qwen3-coder:30b",
384 supports_native_tools=True,
385 supports_streaming=True,
386 context_window=self.context_window,
387 preferred_tool_call_format="native",
388 verification_strictness="standard",
389 notes=["scripted"],
390 )
391
392 backend = ProfiledBackend()
393 handle = _runtime_handle(temp_dir, backend=backend)
394
395 assert handle.session.auto_compaction_input_tokens_threshold == 6_144
396
397 backend.context_window = 131_072
398 refresh_runtime_shell_capability_profile(handle)
399
400 assert handle.session.auto_compaction_input_tokens_threshold == 98_304
401
402
403 def test_create_runtime_session_install_builds_restored_shell_state(
404 temp_dir: Path,
405 ) -> None:
406 handle = _runtime_handle(temp_dir)
407
408 install = create_runtime_session_install(
409 project_root=handle.project_root,
410 messages=handle.messages,
411 permission_policy=handle.permission_policy,
412 permission_config_status=handle.permission_config_status,
413 prompt_format="native",
414 prompt_sections=["Runtime Config", "Workflow Context"],
415 workflow_mode="execute",
416 runtime_owner_type="RuntimeHandle",
417 runtime_owner_path="runtime-handle",
418 rotate_after_bytes=handle.config.session_rotate_after_bytes,
419 auto_compaction_input_tokens_threshold=(
420 handle.config.session_auto_compaction_input_tokens_threshold
421 ),
422 compaction_keep_last_messages=handle.config.session_compaction_keep_last_messages,
423 system_message_factory=_dummy_system,
424 few_shot_factory=_dummy_few_shots,
425 )
426
427 assert install.session.permission_mode == handle.active_permission_mode
428 assert install.restored.workflow_mode == "execute"
429 assert install.restored.prompt_format == "native"
430 assert install.restored.prompt_sections == ["Runtime Config", "Workflow Context"]
431 assert install.restored.last_turn_summary is None
432
433
434 def test_apply_runtime_session_install_updates_owner_shell_state(
435 temp_dir: Path,
436 ) -> None:
437 handle = _runtime_handle(temp_dir)
438 install = create_runtime_session_install(
439 project_root=handle.project_root,
440 messages=[Message(role=Role.USER, content="Resume the runtime session.")],
441 permission_policy=handle.permission_policy,
442 permission_config_status=handle.permission_config_status,
443 prompt_format="native",
444 prompt_sections=["Runtime Config", "Workflow Context"],
445 workflow_mode="plan",
446 runtime_owner_type="Agent",
447 runtime_owner_path="public-agent",
448 rotate_after_bytes=handle.config.session_rotate_after_bytes,
449 auto_compaction_input_tokens_threshold=(
450 handle.config.session_auto_compaction_input_tokens_threshold
451 ),
452 compaction_keep_last_messages=handle.config.session_compaction_keep_last_messages,
453 system_message_factory=_dummy_system,
454 few_shot_factory=_dummy_few_shots,
455 )
456 install.session.permission_mode = "prompt"
457 install.restored.current_task = "Resume the runtime session."
458 install.restored.permission_mode = "prompt"
459
460 apply_runtime_session_install(handle, install)
461
462 assert handle.session is install.session
463 assert handle.messages[-1].content == "Resume the runtime session."
464 assert handle.current_task == "Resume the runtime session."
465 assert handle.workflow_mode == "plan"
466 assert handle.active_permission_mode == "prompt"
467 assert handle.prompt_format == "native"
468 assert handle.prompt_sections == ["Runtime Config", "Workflow Context"]
469 assert handle.session.runtime_owner_type == "RuntimeHandle"
470 assert handle.session.runtime_owner_path == "runtime-handle"
471
472
473 def test_build_fresh_runtime_session_install_uses_current_owner_shell_state(
474 temp_dir: Path,
475 ) -> None:
476 handle = _runtime_handle(temp_dir)
477 handle.current_task = "Keep the runtime shell tidy."
478 handle.prompt_format = "native"
479 handle.prompt_sections = ["Runtime Config", "Workflow Context"]
480 handle.set_workflow_mode("clarify")
481
482 install = build_fresh_runtime_session_install(
483 handle,
484 messages=[Message(role=Role.USER, content="Fresh runtime task.")],
485 )
486
487 assert install.session.prompt_format == "native"
488 assert install.session.prompt_sections == ["Runtime Config", "Workflow Context"]
489 assert install.restored.workflow_mode == "clarify"
490 assert install.restored.messages[-1].content == "Fresh runtime task."
491
492
493 def test_restore_runtime_session_state_recovers_last_turn_summary(
494 temp_dir: Path,
495 ) -> None:
496 session = ConversationSession(
497 system_message_factory=_dummy_system,
498 few_shot_factory=_dummy_few_shots,
499 project_root=temp_dir,
500 )
501 dod = create_definition_of_done("Ship the runtime shell cleanup.")
502 dod_path = DefinitionOfDoneStore(temp_dir).save(dod)
503 session.active_dod_path = str(dod_path)
504 session.current_task = "Ship the runtime shell cleanup."
505 session.workflow_mode = "verify"
506 session.workflow_reason_code = "verification_needed"
507 session.workflow_reason_summary = "pending verification evidence remains"
508 session.workflow_decision_kind = "handoff"
509 session.last_completion_decision_code = "verification_passed"
510 session.last_completion_decision_summary = (
511 "accepted the response after verification evidence passed"
512 )
513 session.append_completion_trace_entry(
514 CompletionTraceEntry(
515 stage="definition_of_done",
516 outcome="complete",
517 decision_code="verification_passed",
518 decision_summary="accepted the response after verification evidence passed",
519 )
520 )
521 session.usage_totals = {"input_tokens": 10, "output_tokens": 4}
522
523 restored = restore_runtime_session_state(
524 project_root=temp_dir,
525 session=session,
526 )
527
528 assert restored.current_task == "Ship the runtime shell cleanup."
529 assert restored.workflow_mode == "verify"
530 assert restored.last_turn_summary is not None
531 assert (
532 restored.last_turn_summary.definition_of_done.task_statement
533 == "Ship the runtime shell cleanup."
534 )
535 assert restored.last_turn_summary.workflow_reason_code == "verification_needed"
536 assert restored.last_completion_decision_code == "verification_passed"
537 assert restored.last_completion_decision_summary == (
538 "accepted the response after verification evidence passed"
539 )
540 assert restored.last_turn_summary.completion_decision_code == "verification_passed"
541 assert restored.last_turn_summary.completion_trace[0].decision_code == (
542 "verification_passed"
543 )
544
545
546 def test_load_runtime_session_install_reconstructs_saved_shell_state(
547 temp_dir: Path,
548 ) -> None:
549 session = ConversationSession(
550 system_message_factory=_dummy_system,
551 few_shot_factory=_dummy_few_shots,
552 project_root=temp_dir,
553 )
554 session.current_task = "Resume the saved runtime session."
555 session.workflow_mode = "execute"
556 session.permission_mode = "prompt"
557 session.prompt_format = "native"
558 session.prompt_sections = ["Runtime Config", "Workflow Context"]
559 session.append(Message(role=Role.USER, content="Resume the saved runtime session."))
560 session.persist()
561
562 install = load_runtime_session_install(
563 project_root=temp_dir,
564 system_message_factory=_dummy_system,
565 few_shot_factory=_dummy_few_shots,
566 session_id=session.session_id,
567 rotate_after_bytes=256 * 1024,
568 auto_compaction_input_tokens_threshold=100_000,
569 compaction_keep_last_messages=4,
570 )
571
572 assert install is not None
573 assert install.session.session_id == session.session_id
574 assert install.restored.current_task == "Resume the saved runtime session."
575 assert install.restored.permission_mode == "prompt"
576 assert install.restored.messages[-1].content == "Resume the saved runtime session."
577
578
579 def test_resume_runtime_shell_session_restores_saved_owner_state(
580 temp_dir: Path,
581 ) -> None:
582 session = ConversationSession(
583 system_message_factory=_dummy_system,
584 few_shot_factory=_dummy_few_shots,
585 project_root=temp_dir,
586 )
587 session.current_task = "Resume the saved runtime session."
588 session.workflow_mode = "plan"
589 session.permission_mode = "prompt"
590 session.prompt_format = "native"
591 session.prompt_sections = ["Runtime Config", "Workflow Context"]
592 session.append(Message(role=Role.USER, content="Resume the saved runtime session."))
593 session.persist()
594
595 handle = _runtime_handle(temp_dir)
596
597 assert resume_runtime_shell_session(handle, session_id=session.session_id) is True
598 assert handle.session.session_id == session.session_id
599 assert handle.current_task == "Resume the saved runtime session."
600 assert handle.workflow_mode == "plan"
601 assert handle.active_permission_mode == "prompt"
602 assert handle.prompt_format == "native"
603
604
605 def test_clear_runtime_shell_history_resets_owner_shell_state(
606 temp_dir: Path,
607 ) -> None:
608 handle = _runtime_handle(temp_dir)
609 original_session_id = handle.session.session_id
610 handle.current_task = "Keep runtime state tidy."
611 handle.prompt_format = "native"
612 handle.prompt_sections = ["Runtime Config", "Workflow Context"]
613 handle.set_workflow_mode("clarify")
614 handle.queue_steering_message("Stay in runtime.")
615
616 clear_runtime_shell_history(handle)
617
618 assert handle.session.session_id != original_session_id
619 assert handle.current_task is None
620 assert handle.workflow_mode == "execute"
621 assert handle.prompt_format is None
622 assert handle.prompt_sections == []
623 assert handle.messages == []
624 assert handle.last_turn_summary is None
625 assert handle.drain_steering_messages() == []