Python · 22023 bytes Raw Blame History
1 """Tests for persisted session state and resume support."""
2
3 from __future__ import annotations
4
5 import json
6 from pathlib import Path
7
8 import pytest
9
10 from loader.agent.loop import Agent, AgentConfig, ReasoningConfig
11 from loader.llm.base import CompletionResponse, Message, Role, ToolCall
12 from loader.runtime.completion_trace import CompletionTraceEntry
13 from loader.runtime.evidence_provenance import EvidenceProvenance
14 from loader.runtime.prompt_history import PromptSnapshot
15 from loader.runtime.runtime_handle import RuntimeHandle
16 from loader.runtime.session import ConversationSession
17 from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem
18 from loader.runtime.workflow_policy import WorkflowTimelineEntry
19 from tests.helpers.runtime_harness import ScriptedBackend
20
21
22 def _dummy_system() -> Message:
23 return Message(role=Role.SYSTEM, content="system")
24
25
26 def _dummy_few_shots() -> list[Message]:
27 return []
28
29
30 @pytest.mark.asyncio
31 async def test_session_persists_and_resumes_across_agent_restart(temp_dir: Path) -> None:
32 backend = ScriptedBackend(
33 completions=[
34 CompletionResponse(
35 content="I'll create the file.",
36 tool_calls=[
37 ToolCall(
38 id="write-1",
39 name="write",
40 arguments={
41 "file_path": str(temp_dir / "hello.txt"),
42 "content": "hello\n",
43 },
44 )
45 ],
46 usage={"prompt_tokens": 12, "completion_tokens": 5},
47 ),
48 CompletionResponse(
49 content="The file is written.",
50 usage={"prompt_tokens": 10, "completion_tokens": 4},
51 ),
52 ]
53 )
54 config = AgentConfig(auto_context=False, stream=False)
55 first_agent = Agent(backend=backend, config=config, project_root=temp_dir)
56
57 response = await first_agent.run("Create hello.txt in the workspace root.")
58
59 assert response.startswith("The file is written.")
60 session_id = first_agent.session.session_id
61 assert first_agent.session.storage_path.exists()
62
63 resumed_agent = Agent(
64 backend=ScriptedBackend(completions=[]),
65 config=config,
66 project_root=temp_dir,
67 )
68
69 assert resumed_agent.resume_session(session_id) is True
70 assert resumed_agent.session.session_id == session_id
71 assert resumed_agent._current_task == "Create hello.txt in the workspace root."
72 assert resumed_agent.active_permission_mode == "workspace-write"
73 assert resumed_agent.workflow_mode == first_agent.workflow_mode
74 assert resumed_agent.last_turn_summary is not None
75 assert resumed_agent.last_turn_summary.definition_of_done is not None
76 assert resumed_agent.last_turn_summary.definition_of_done.task_statement == (
77 "Create hello.txt in the workspace root."
78 )
79 assert any(
80 message.role == Role.USER
81 and message.content == "Create hello.txt in the workspace root."
82 for message in resumed_agent.messages
83 )
84
85
86 def test_agent_clear_history_rebuilds_a_fresh_runtime_session(temp_dir: Path) -> None:
87 agent = Agent(
88 backend=ScriptedBackend(),
89 config=AgentConfig(auto_context=False, stream=False),
90 project_root=temp_dir,
91 )
92 original_session_id = agent.session.session_id
93 agent.current_task = "Keep runtime state tidy."
94 agent.prompt_format = "native"
95 agent.prompt_sections = ["Runtime Config", "Workflow Context"]
96 agent.set_workflow_mode("clarify")
97 agent.queue_steering_message("Stay in runtime.")
98
99 agent.clear_history()
100
101 assert agent.session.session_id != original_session_id
102 assert agent.current_task is None
103 assert agent.workflow_mode == "execute"
104 assert agent.prompt_format is None
105 assert agent.prompt_sections == []
106 assert agent.messages == []
107 assert agent.last_turn_summary is None
108 assert agent.drain_steering_messages() == []
109
110
111 def test_session_rotation_kicks_in_at_size_cap(temp_dir: Path) -> None:
112 session = ConversationSession(
113 system_message_factory=_dummy_system,
114 few_shot_factory=_dummy_few_shots,
115 project_root=temp_dir,
116 rotate_after_bytes=250,
117 )
118
119 for index in range(6):
120 session.append(
121 Message(
122 role=Role.USER,
123 content=f"Message {index}: " + ("x" * 120),
124 )
125 )
126
127 assert session.storage_path.exists()
128 assert session.storage_path.with_suffix(".1.json").exists()
129
130
131 def test_session_compaction_persists_summary_and_recent_messages(temp_dir: Path) -> None:
132 session = ConversationSession(
133 system_message_factory=_dummy_system,
134 few_shot_factory=_dummy_few_shots,
135 project_root=temp_dir,
136 messages=[
137 Message(role=Role.USER, content="Kick off runtime audit"),
138 Message(role=Role.ASSISTANT, content="Initial findings"),
139 Message(role=Role.USER, content="Focus on sessions"),
140 Message(role=Role.ASSISTANT, content="Compaction design drafted"),
141 Message(role=Role.USER, content="Preserve the latest four messages"),
142 Message(role=Role.ASSISTANT, content="Ready to compact"),
143 ],
144 auto_compaction_input_tokens_threshold=1,
145 compaction_keep_last_messages=4,
146 )
147
148 result = session.maybe_compact()
149
150 assert result is not None
151 assert session.compaction is not None
152 assert session.storage_path.exists()
153 assert session.messages[0].content.startswith("[COMPACTED CONTEXT]")
154 assert [message.content for message in session.messages[-4:]] == [
155 "Focus on sessions",
156 "Compaction design drafted",
157 "Preserve the latest four messages",
158 "Ready to compact",
159 ]
160
161
162 def test_session_persists_permission_policy_metadata(temp_dir: Path) -> None:
163 session = ConversationSession(
164 system_message_factory=_dummy_system,
165 few_shot_factory=_dummy_few_shots,
166 project_root=temp_dir,
167 permission_mode="prompt",
168 permission_prompting_enabled=True,
169 permission_rule_counts={"allow": 1, "deny": 2, "ask": 3},
170 permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"),
171 prompt_format="react",
172 prompt_sections=["Runtime Config", "Workflow Context"],
173 )
174
175 session.update_runtime_state(
176 current_task="Inspect permission history",
177 runtime_owner_type="RuntimeHandle",
178 permission_mode="allow",
179 permission_prompting_enabled=True,
180 permission_rule_counts={"allow": 2, "deny": 1, "ask": 4},
181 permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"),
182 prompt_format="native",
183 prompt_sections=["Runtime Config", "Workflow Context", "Project Context"],
184 workflow_reason_code="task_is_complex",
185 workflow_reason_summary="task looks complex enough to benefit from a persisted plan",
186 workflow_decision_kind="initial_route",
187 workflow_ambiguity_score=0.2,
188 workflow_complexity_score=0.6,
189 workflow_scheduled_next_mode="execute",
190 last_completion_decision_code="verification_failed_reentry",
191 last_completion_decision_summary=(
192 "continued after verification failed and the runtime re-entered execute mode"
193 ),
194 last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn",
195 last_turn_transition_kind="terminal",
196 last_turn_transition_reason_code="turn_complete",
197 )
198 session.append_workflow_timeline_entry(
199 WorkflowTimelineEntry(
200 timestamp="2026-04-07T12:00:00Z",
201 kind="route",
202 mode="plan",
203 reason_code="task_is_complex",
204 summary="plan: workflow pressure favors a persisted plan before execution",
205 decision_kind="initial_route",
206 route_score=0.72,
207 runner_up_mode="clarify",
208 runner_up_score=0.61,
209 scheduled_next_mode="execute",
210 unresolved_questions=["Scope is still broad."],
211 prompt_format="native",
212 prompt_sections=["Runtime Config", "Workflow Context", "Project Context"],
213 )
214 )
215 session.append_completion_trace_entry(
216 CompletionTraceEntry(
217 stage="definition_of_done",
218 outcome="continue",
219 decision_code="verification_failed_reentry",
220 decision_summary=(
221 "continued after verification failed and the runtime "
222 "re-entered execute mode"
223 ),
224 evidence_summary=["verification contradiction: pytest still failed"],
225 )
226 )
227
228 reloaded = ConversationSession.load(
229 project_root=temp_dir,
230 system_message_factory=_dummy_system,
231 few_shot_factory=_dummy_few_shots,
232 session_id=session.session_id,
233 )
234
235 assert reloaded is not None
236 assert reloaded.permission_mode == "allow"
237 assert reloaded.permission_prompting_enabled is True
238 assert reloaded.permission_rule_counts == {"allow": 2, "deny": 1, "ask": 4}
239 assert reloaded.permission_rules_source == str(
240 temp_dir / ".loader" / "permission-rules.json"
241 )
242 assert reloaded.runtime_owner_type == "RuntimeHandle"
243 assert reloaded.runtime_owner_path == "runtime-handle"
244 assert reloaded.prompt_format == "native"
245 assert reloaded.prompt_sections == [
246 "Runtime Config",
247 "Workflow Context",
248 "Project Context",
249 ]
250 assert reloaded.workflow_reason_code == "task_is_complex"
251 assert reloaded.workflow_reason_summary == (
252 "task looks complex enough to benefit from a persisted plan"
253 )
254 assert reloaded.workflow_decision_kind == "initial_route"
255 assert reloaded.workflow_ambiguity_score == pytest.approx(0.2)
256 assert reloaded.workflow_complexity_score == pytest.approx(0.6)
257 assert reloaded.workflow_scheduled_next_mode == "execute"
258 assert reloaded.last_completion_decision_code == "verification_failed_reentry"
259 assert reloaded.last_completion_decision_summary == (
260 "continued after verification failed and the runtime re-entered execute mode"
261 )
262 assert [entry.decision_code for entry in reloaded.completion_trace] == [
263 "verification_failed_reentry"
264 ]
265 assert reloaded.completion_trace[0].evidence_summary == [
266 "verification contradiction: pytest still failed"
267 ]
268 assert reloaded.last_turn_transition_summary == (
269 "completion -> finalize [terminal] Finalizing completed turn"
270 )
271 assert reloaded.last_turn_transition_kind == "terminal"
272 assert reloaded.last_turn_transition_reason_code == "turn_complete"
273 assert len(reloaded.workflow_timeline) == 1
274 assert reloaded.workflow_timeline[0].mode == "plan"
275 assert reloaded.workflow_timeline[0].route_score == pytest.approx(0.72)
276 assert reloaded.workflow_timeline[0].unresolved_questions == [
277 "Scope is still broad."
278 ]
279
280
281 def test_resume_session_updates_runtime_owner_metadata(temp_dir: Path) -> None:
282 agent = Agent(
283 backend=ScriptedBackend(),
284 config=AgentConfig(auto_context=False, stream=False),
285 project_root=temp_dir,
286 )
287 agent.session.persist()
288 session_id = agent.session.session_id
289
290 handle = RuntimeHandle(
291 backend=ScriptedBackend(),
292 config=AgentConfig(auto_context=False, stream=False),
293 project_root=temp_dir,
294 )
295
296 assert handle.resume_session(session_id) is True
297
298 reloaded = ConversationSession.load(
299 project_root=temp_dir,
300 system_message_factory=_dummy_system,
301 few_shot_factory=_dummy_few_shots,
302 session_id=session_id,
303 )
304
305 assert reloaded is not None
306 assert reloaded.runtime_owner_type == "RuntimeHandle"
307 assert reloaded.runtime_owner_path == "runtime-handle"
308
309
310 def test_session_prefers_canonical_workflow_timeline_for_completion_trace(
311 temp_dir: Path,
312 ) -> None:
313 session = ConversationSession(
314 system_message_factory=_dummy_system,
315 few_shot_factory=_dummy_few_shots,
316 project_root=temp_dir,
317 )
318
319 session.update_runtime_state(
320 current_task="Explain why the turn stopped",
321 last_completion_decision_code="continuation_budget_exhausted",
322 last_completion_decision_summary=(
323 "stopped because the continuation budget was exhausted while "
324 "follow-through evidence was still missing"
325 ),
326 )
327 session.append_completion_trace_entry(
328 CompletionTraceEntry(
329 stage="definition_of_done",
330 outcome="complete",
331 decision_code="stale_completion_trace",
332 decision_summary="this legacy trace entry should be ignored",
333 )
334 )
335 session.append_workflow_timeline_entry(
336 WorkflowTimelineEntry(
337 timestamp="2026-04-09T12:00:00Z",
338 kind="completion_check",
339 mode="execute",
340 reason_code="premature_completion_nudge",
341 summary=(
342 "completion: requested one continuation because the non-mutating "
343 "response looked incomplete"
344 ),
345 decision_kind="forced",
346 policy_stage="continuation_check",
347 policy_outcome="continue",
348 evidence_summary=["showing the requested work was actually carried out"],
349 )
350 )
351 session.append_workflow_timeline_entry(
352 WorkflowTimelineEntry(
353 timestamp="2026-04-09T12:01:00Z",
354 kind="completion_finalize",
355 mode="execute",
356 reason_code="continuation_budget_exhausted",
357 summary=(
358 "completion: stopped because the continuation budget was exhausted "
359 "while follow-through evidence was still missing"
360 ),
361 decision_kind="forced",
362 policy_stage="continuation_check",
363 policy_outcome="finalize",
364 evidence_summary=["showing the requested work was actually carried out"],
365 )
366 )
367
368 persisted = json.loads(session.storage_path.read_text())
369 assert "completion_trace" not in persisted
370
371 reloaded = ConversationSession.load(
372 project_root=temp_dir,
373 system_message_factory=_dummy_system,
374 few_shot_factory=_dummy_few_shots,
375 session_id=session.session_id,
376 )
377
378 assert reloaded is not None
379 assert [entry.decision_code for entry in reloaded.completion_trace] == [
380 "premature_completion_nudge",
381 "continuation_budget_exhausted",
382 ]
383 assert reloaded.completion_trace[-1].stage == "continuation_check"
384 assert reloaded.completion_trace[-1].outcome == "finalize"
385 assert reloaded.completion_trace[-1].evidence_summary == [
386 "showing the requested work was actually carried out"
387 ]
388
389
390 def test_session_projects_live_completion_trace_from_workflow_timeline(
391 temp_dir: Path,
392 ) -> None:
393 session = ConversationSession(
394 system_message_factory=_dummy_system,
395 few_shot_factory=_dummy_few_shots,
396 project_root=temp_dir,
397 )
398
399 session.append_workflow_timeline_entry(
400 WorkflowTimelineEntry(
401 timestamp="2026-04-09T12:00:00Z",
402 kind="completion_check",
403 mode="execute",
404 reason_code="completion_response_accepted",
405 summary="completion: accepted the response because follow-through evidence was present",
406 decision_kind="forced",
407 policy_stage="continuation_check",
408 policy_outcome="accept",
409 )
410 )
411 session.append_workflow_timeline_entry(
412 WorkflowTimelineEntry(
413 timestamp="2026-04-09T12:01:00Z",
414 kind="completion_finalize",
415 mode="execute",
416 reason_code="continuation_budget_exhausted",
417 summary="completion: stopped because verification evidence was still missing",
418 decision_kind="forced",
419 policy_stage="continuation_check",
420 policy_outcome="finalize",
421 evidence_summary=["a passing verification result from `pytest -q`"],
422 evidence_provenance=[
423 EvidenceProvenance(
424 category="verification",
425 source="dod.verification_commands",
426 summary="verification evidence was still missing for `pytest -q`",
427 status="missing",
428 subject="pytest -q",
429 )
430 ],
431 )
432 )
433 session.update_runtime_state(
434 last_completion_decision_code="continuation_budget_exhausted",
435 last_completion_decision_summary=(
436 "stopped because verification evidence was still missing"
437 ),
438 )
439
440 assert [entry.decision_code for entry in session.completion_trace] == [
441 "completion_response_accepted",
442 "continuation_budget_exhausted",
443 ]
444 assert session.completion_trace[-1].stage == "continuation_check"
445 assert session.completion_trace[-1].outcome == "finalize"
446 assert session.completion_trace[-1].evidence_summary == [
447 "a passing verification result from `pytest -q`"
448 ]
449 assert [item.summary for item in session.completion_trace[-1].evidence_provenance] == [
450 "verification evidence was still missing for `pytest -q`"
451 ]
452
453
454 def test_session_persists_workflow_ledger_state(temp_dir: Path) -> None:
455 session = ConversationSession(
456 system_message_factory=_dummy_system,
457 few_shot_factory=_dummy_few_shots,
458 project_root=temp_dir,
459 )
460
461 session.update_workflow_ledger(
462 WorkflowLedger(
463 assumptions=[
464 WorkflowLedgerItem(
465 text="notes.txt stays out of scope unless clarified otherwise.",
466 status="contradicted",
467 introduced_phase="clarify",
468 updated_phase="recovery",
469 evidence=["Clarify scope assumed `notes.txt` stayed out of scope."],
470 )
471 ],
472 acceptance_anchors=[
473 WorkflowLedgerItem(
474 text="notes.txt exists in the workspace root.",
475 status="changed",
476 introduced_phase="clarify",
477 updated_phase="recovery",
478 )
479 ],
480 decision_boundaries=[
481 WorkflowLedgerItem(
482 text="Escalate before broad UX changes.",
483 status="tracked",
484 introduced_phase="clarify",
485 )
486 ],
487 )
488 )
489
490 reloaded = ConversationSession.load(
491 project_root=temp_dir,
492 system_message_factory=_dummy_system,
493 few_shot_factory=_dummy_few_shots,
494 session_id=session.session_id,
495 )
496
497 assert reloaded is not None
498 assert reloaded.workflow_ledger.assumptions[0].status == "contradicted"
499 assert reloaded.workflow_ledger.assumptions[0].updated_phase == "recovery"
500 assert reloaded.workflow_ledger.acceptance_anchors[0].status == "changed"
501 assert reloaded.workflow_ledger.decision_boundaries[0].text == (
502 "Escalate before broad UX changes."
503 )
504
505
506 def test_session_persists_prompt_history_state(temp_dir: Path) -> None:
507 session = ConversationSession(
508 system_message_factory=_dummy_system,
509 few_shot_factory=_dummy_few_shots,
510 project_root=temp_dir,
511 )
512
513 session.append_prompt_snapshot(
514 PromptSnapshot(
515 timestamp="2026-04-07T14:00:00Z",
516 workflow_mode="plan",
517 permission_mode="prompt",
518 current_task="Tighten Loader workflow behavior",
519 prompt_format="native",
520 prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
521 content="# Introduction\nplan around planned.txt\n",
522 )
523 )
524 session.append_prompt_snapshot(
525 PromptSnapshot(
526 timestamp="2026-04-07T14:02:00Z",
527 workflow_mode="execute",
528 permission_mode="prompt",
529 current_task="Tighten Loader workflow behavior",
530 prompt_format="native",
531 prompt_sections=[
532 "Runtime Config",
533 "Workflow Context",
534 "Mode Guidance",
535 "Project Context",
536 ],
537 content="# Introduction\nexecute around notes.txt\n# Project Context\npython\n",
538 )
539 )
540
541 reloaded = ConversationSession.load(
542 project_root=temp_dir,
543 system_message_factory=_dummy_system,
544 few_shot_factory=_dummy_few_shots,
545 session_id=session.session_id,
546 )
547
548 assert reloaded is not None
549 assert len(reloaded.prompt_history) == 2
550 assert reloaded.prompt_history[0].workflow_mode == "plan"
551 assert reloaded.prompt_history[-1].workflow_mode == "execute"
552 assert "notes.txt" in reloaded.prompt_history[-1].content
553
554
555 @pytest.mark.asyncio
556 async def test_turn_summary_usage_rolls_up_into_session_totals(temp_dir: Path) -> None:
557 backend = ScriptedBackend(
558 completions=[
559 CompletionResponse(
560 content="Here's the answer.",
561 usage={"prompt_tokens": 9, "completion_tokens": 3},
562 )
563 ]
564 )
565 agent = Agent(
566 backend=backend,
567 config=AgentConfig(
568 auto_context=False,
569 stream=False,
570 reasoning=ReasoningConfig(completion_check=False),
571 ),
572 project_root=temp_dir,
573 )
574
575 await agent.run("Write a short release-note style summary of what Loader does well.")
576
577 assert agent.last_turn_summary is not None
578 assert agent.last_turn_summary.usage["input_tokens"] == 9
579 assert agent.last_turn_summary.usage["output_tokens"] == 3
580 assert agent.last_turn_summary.cumulative_usage["input_tokens"] == 9
581 assert agent.last_turn_summary.cumulative_usage["output_tokens"] == 3
582 assert agent.last_turn_summary.cumulative_usage["turns"] == 1