| 1 | """Tests for persisted session state and resume support.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import json |
| 6 | from pathlib import Path |
| 7 | |
| 8 | import pytest |
| 9 | |
| 10 | from loader.agent.loop import Agent, AgentConfig, ReasoningConfig |
| 11 | from loader.llm.base import CompletionResponse, Message, Role, ToolCall |
| 12 | from loader.runtime.completion_trace import CompletionTraceEntry |
| 13 | from loader.runtime.dod import ( |
| 14 | DefinitionOfDoneStore, |
| 15 | VerificationEvidence, |
| 16 | create_definition_of_done, |
| 17 | ) |
| 18 | from loader.runtime.evidence_provenance import EvidenceProvenance |
| 19 | from loader.runtime.prompt_history import PromptSnapshot |
| 20 | from loader.runtime.runtime_handle import RuntimeHandle |
| 21 | from loader.runtime.session import ConversationSession |
| 22 | from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem |
| 23 | from loader.runtime.workflow_policy import WorkflowTimelineEntry |
| 24 | from tests.helpers.runtime_harness import ScriptedBackend |
| 25 | |
| 26 | |
| 27 | def _dummy_system() -> Message: |
| 28 | return Message(role=Role.SYSTEM, content="system") |
| 29 | |
| 30 | |
| 31 | def _dummy_few_shots() -> list[Message]: |
| 32 | return [] |
| 33 | |
| 34 | |
| 35 | @pytest.mark.asyncio |
| 36 | async def test_session_persists_and_resumes_across_agent_restart(temp_dir: Path) -> None: |
| 37 | backend = ScriptedBackend( |
| 38 | completions=[ |
| 39 | CompletionResponse( |
| 40 | content="I'll create the file.", |
| 41 | tool_calls=[ |
| 42 | ToolCall( |
| 43 | id="write-1", |
| 44 | name="write", |
| 45 | arguments={ |
| 46 | "file_path": str(temp_dir / "hello.txt"), |
| 47 | "content": "hello\n", |
| 48 | }, |
| 49 | ) |
| 50 | ], |
| 51 | usage={"prompt_tokens": 12, "completion_tokens": 5}, |
| 52 | ), |
| 53 | CompletionResponse( |
| 54 | content="The file is written.", |
| 55 | usage={"prompt_tokens": 10, "completion_tokens": 4}, |
| 56 | ), |
| 57 | ] |
| 58 | ) |
| 59 | config = AgentConfig(auto_context=False, stream=False) |
| 60 | first_agent = Agent(backend=backend, config=config, project_root=temp_dir) |
| 61 | |
| 62 | response = await first_agent.run("Create hello.txt in the workspace root.") |
| 63 | |
| 64 | assert response.startswith("The file is written.") |
| 65 | session_id = first_agent.session.session_id |
| 66 | assert first_agent.session.storage_path.exists() |
| 67 | |
| 68 | resumed_agent = Agent( |
| 69 | backend=ScriptedBackend(completions=[]), |
| 70 | config=config, |
| 71 | project_root=temp_dir, |
| 72 | ) |
| 73 | |
| 74 | assert resumed_agent.resume_session(session_id) is True |
| 75 | assert resumed_agent.session.session_id == session_id |
| 76 | assert resumed_agent._current_task == "Create hello.txt in the workspace root." |
| 77 | assert resumed_agent.active_permission_mode == "workspace-write" |
| 78 | assert resumed_agent.workflow_mode == first_agent.workflow_mode |
| 79 | assert resumed_agent.last_turn_summary is not None |
| 80 | assert resumed_agent.last_turn_summary.definition_of_done is not None |
| 81 | assert resumed_agent.last_turn_summary.definition_of_done.task_statement == ( |
| 82 | "Create hello.txt in the workspace root." |
| 83 | ) |
| 84 | assert any( |
| 85 | message.role == Role.USER |
| 86 | and message.content == "Create hello.txt in the workspace root." |
| 87 | for message in resumed_agent.messages |
| 88 | ) |
| 89 | |
| 90 | |
| 91 | def test_agent_clear_history_rebuilds_a_fresh_runtime_session(temp_dir: Path) -> None: |
| 92 | agent = Agent( |
| 93 | backend=ScriptedBackend(), |
| 94 | config=AgentConfig(auto_context=False, stream=False), |
| 95 | project_root=temp_dir, |
| 96 | ) |
| 97 | original_session_id = agent.session.session_id |
| 98 | agent.current_task = "Keep runtime state tidy." |
| 99 | agent.prompt_format = "native" |
| 100 | agent.prompt_sections = ["Runtime Config", "Workflow Context"] |
| 101 | agent.set_workflow_mode("clarify") |
| 102 | agent.queue_steering_message("Stay in runtime.") |
| 103 | |
| 104 | agent.clear_history() |
| 105 | |
| 106 | assert agent.session.session_id != original_session_id |
| 107 | assert agent.current_task is None |
| 108 | assert agent.workflow_mode == "execute" |
| 109 | assert agent.prompt_format is None |
| 110 | assert agent.prompt_sections == [] |
| 111 | assert agent.messages == [] |
| 112 | assert agent.last_turn_summary is None |
| 113 | assert agent.drain_steering_messages() == [] |
| 114 | |
| 115 | |
| 116 | def test_session_rotation_kicks_in_at_size_cap(temp_dir: Path) -> None: |
| 117 | session = ConversationSession( |
| 118 | system_message_factory=_dummy_system, |
| 119 | few_shot_factory=_dummy_few_shots, |
| 120 | project_root=temp_dir, |
| 121 | rotate_after_bytes=250, |
| 122 | ) |
| 123 | |
| 124 | for index in range(6): |
| 125 | session.append( |
| 126 | Message( |
| 127 | role=Role.USER, |
| 128 | content=f"Message {index}: " + ("x" * 120), |
| 129 | ) |
| 130 | ) |
| 131 | |
| 132 | assert session.storage_path.exists() |
| 133 | assert session.storage_path.with_suffix(".1.json").exists() |
| 134 | |
| 135 | |
| 136 | def test_session_compaction_persists_summary_and_recent_messages(temp_dir: Path) -> None: |
| 137 | session = ConversationSession( |
| 138 | system_message_factory=_dummy_system, |
| 139 | few_shot_factory=_dummy_few_shots, |
| 140 | project_root=temp_dir, |
| 141 | messages=[ |
| 142 | Message(role=Role.USER, content="Kick off runtime audit"), |
| 143 | Message(role=Role.ASSISTANT, content="Initial findings"), |
| 144 | Message(role=Role.USER, content="Focus on sessions"), |
| 145 | Message(role=Role.ASSISTANT, content="Compaction design drafted"), |
| 146 | Message(role=Role.USER, content="Preserve the latest four messages"), |
| 147 | Message(role=Role.ASSISTANT, content="Ready to compact"), |
| 148 | ], |
| 149 | auto_compaction_input_tokens_threshold=1, |
| 150 | compaction_keep_last_messages=4, |
| 151 | ) |
| 152 | |
| 153 | result = session.maybe_compact() |
| 154 | |
| 155 | assert result is not None |
| 156 | assert session.compaction is not None |
| 157 | assert session.storage_path.exists() |
| 158 | assert session.messages[0].content.startswith("[COMPACTED CONTEXT]") |
| 159 | assert [message.content for message in session.messages[-4:]] == [ |
| 160 | "Focus on sessions", |
| 161 | "Compaction design drafted", |
| 162 | "Preserve the latest four messages", |
| 163 | "Ready to compact", |
| 164 | ] |
| 165 | |
| 166 | |
| 167 | def test_session_compaction_summarizes_active_dod_failure(temp_dir: Path) -> None: |
| 168 | dod_store = DefinitionOfDoneStore(temp_dir) |
| 169 | dod = create_definition_of_done("Create a generated guide.") |
| 170 | dod.status = "fixing" |
| 171 | dod.last_verification_result = "failed" |
| 172 | dod.pending_items = ["Expand generated chapters to satisfy quality verification"] |
| 173 | dod.evidence.append( |
| 174 | VerificationEvidence( |
| 175 | command="python3 verify_html_quality.py", |
| 176 | passed=False, |
| 177 | output=( |
| 178 | "Exit code 1\n" |
| 179 | "HTML guide content quality issues:\n" |
| 180 | f"{temp_dir / 'guide' / 'chapters' / '05-load-balancing.html'}: " |
| 181 | "thin content (1500 text chars, expected at least 1758)\n" |
| 182 | ), |
| 183 | ) |
| 184 | ) |
| 185 | dod_path = dod_store.save(dod) |
| 186 | session = ConversationSession( |
| 187 | system_message_factory=_dummy_system, |
| 188 | few_shot_factory=_dummy_few_shots, |
| 189 | project_root=temp_dir, |
| 190 | messages=[ |
| 191 | Message(role=Role.USER, content="Create the guide."), |
| 192 | Message(role=Role.ASSISTANT, content="Created draft files."), |
| 193 | Message( |
| 194 | role=Role.TOOL, |
| 195 | content="Observation [notepad_read]: Result: guide complete", |
| 196 | ), |
| 197 | Message(role=Role.ASSISTANT, content="Trying to finish."), |
| 198 | Message(role=Role.USER, content="Continue repairing."), |
| 199 | ], |
| 200 | active_dod_path=str(dod_path), |
| 201 | auto_compaction_input_tokens_threshold=1, |
| 202 | compaction_keep_last_messages=2, |
| 203 | ) |
| 204 | |
| 205 | result = session.maybe_compact() |
| 206 | |
| 207 | assert result is not None |
| 208 | assert session.messages[0].content.startswith("[COMPACTED CONTEXT]") |
| 209 | assert "- Active DoD: status=fixing; last verification=failed" in result.summary |
| 210 | assert "05-load-balancing.html" in result.summary |
| 211 | assert "thin content" in result.summary |
| 212 | assert "authoritative over older summaries or durable memory notes" in result.summary |
| 213 | |
| 214 | |
| 215 | def test_build_request_messages_omits_large_mutation_tool_calls_from_history( |
| 216 | temp_dir: Path, |
| 217 | ) -> None: |
| 218 | large_html = "<html>" + ("x" * 400) + "</html>" |
| 219 | old_block = "old\n" * 120 |
| 220 | new_block = "new\n" * 120 |
| 221 | session = ConversationSession( |
| 222 | system_message_factory=_dummy_system, |
| 223 | few_shot_factory=_dummy_few_shots, |
| 224 | project_root=temp_dir, |
| 225 | messages=[ |
| 226 | Message(role=Role.USER, content="Create the guide."), |
| 227 | Message( |
| 228 | role=Role.ASSISTANT, |
| 229 | content="I'll write the first files now.", |
| 230 | tool_calls=[ |
| 231 | ToolCall( |
| 232 | id="write-1", |
| 233 | name="write", |
| 234 | arguments={ |
| 235 | "file_path": str(temp_dir / "guides" / "nginx" / "index.html"), |
| 236 | "content": large_html, |
| 237 | }, |
| 238 | ), |
| 239 | ToolCall( |
| 240 | id="edit-1", |
| 241 | name="edit", |
| 242 | arguments={ |
| 243 | "file_path": str(temp_dir / "README.md"), |
| 244 | "old_string": old_block, |
| 245 | "new_string": new_block, |
| 246 | }, |
| 247 | ), |
| 248 | ], |
| 249 | ), |
| 250 | ], |
| 251 | ) |
| 252 | |
| 253 | request_messages = session.build_request_messages() |
| 254 | |
| 255 | assert request_messages[2].tool_calls == [] |
| 256 | assert request_messages[2].content == "I'll write the first files now." |
| 257 | assert session.messages[1].tool_calls[0].arguments["content"] == large_html |
| 258 | assert session.messages[1].tool_calls[1].arguments["old_string"] == old_block |
| 259 | assert session.messages[1].tool_calls[1].arguments["new_string"] == new_block |
| 260 | |
| 261 | |
| 262 | def test_session_persists_permission_policy_metadata(temp_dir: Path) -> None: |
| 263 | session = ConversationSession( |
| 264 | system_message_factory=_dummy_system, |
| 265 | few_shot_factory=_dummy_few_shots, |
| 266 | project_root=temp_dir, |
| 267 | permission_mode="prompt", |
| 268 | permission_prompting_enabled=True, |
| 269 | permission_rule_counts={"allow": 1, "deny": 2, "ask": 3}, |
| 270 | permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"), |
| 271 | prompt_format="react", |
| 272 | prompt_sections=["Runtime Config", "Workflow Context"], |
| 273 | ) |
| 274 | |
| 275 | session.update_runtime_state( |
| 276 | current_task="Inspect permission history", |
| 277 | runtime_owner_type="RuntimeHandle", |
| 278 | permission_mode="allow", |
| 279 | permission_prompting_enabled=True, |
| 280 | permission_rule_counts={"allow": 2, "deny": 1, "ask": 4}, |
| 281 | permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"), |
| 282 | prompt_format="native", |
| 283 | prompt_sections=["Runtime Config", "Workflow Context", "Project Context"], |
| 284 | workflow_reason_code="task_is_complex", |
| 285 | workflow_reason_summary="task looks complex enough to benefit from a persisted plan", |
| 286 | workflow_decision_kind="initial_route", |
| 287 | workflow_ambiguity_score=0.2, |
| 288 | workflow_complexity_score=0.6, |
| 289 | workflow_scheduled_next_mode="execute", |
| 290 | last_completion_decision_code="verification_failed_reentry", |
| 291 | last_completion_decision_summary=( |
| 292 | "continued after verification failed and the runtime re-entered execute mode" |
| 293 | ), |
| 294 | last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn", |
| 295 | last_turn_transition_kind="terminal", |
| 296 | last_turn_transition_reason_code="turn_complete", |
| 297 | ) |
| 298 | session.append_workflow_timeline_entry( |
| 299 | WorkflowTimelineEntry( |
| 300 | timestamp="2026-04-07T12:00:00Z", |
| 301 | kind="route", |
| 302 | mode="plan", |
| 303 | reason_code="task_is_complex", |
| 304 | summary="plan: workflow pressure favors a persisted plan before execution", |
| 305 | decision_kind="initial_route", |
| 306 | route_score=0.72, |
| 307 | runner_up_mode="clarify", |
| 308 | runner_up_score=0.61, |
| 309 | scheduled_next_mode="execute", |
| 310 | unresolved_questions=["Scope is still broad."], |
| 311 | prompt_format="native", |
| 312 | prompt_sections=["Runtime Config", "Workflow Context", "Project Context"], |
| 313 | ) |
| 314 | ) |
| 315 | session.append_completion_trace_entry( |
| 316 | CompletionTraceEntry( |
| 317 | stage="definition_of_done", |
| 318 | outcome="continue", |
| 319 | decision_code="verification_failed_reentry", |
| 320 | decision_summary=( |
| 321 | "continued after verification failed and the runtime " |
| 322 | "re-entered execute mode" |
| 323 | ), |
| 324 | evidence_summary=["verification contradiction: pytest still failed"], |
| 325 | ) |
| 326 | ) |
| 327 | |
| 328 | reloaded = ConversationSession.load( |
| 329 | project_root=temp_dir, |
| 330 | system_message_factory=_dummy_system, |
| 331 | few_shot_factory=_dummy_few_shots, |
| 332 | session_id=session.session_id, |
| 333 | ) |
| 334 | |
| 335 | assert reloaded is not None |
| 336 | assert reloaded.permission_mode == "allow" |
| 337 | assert reloaded.permission_prompting_enabled is True |
| 338 | assert reloaded.permission_rule_counts == {"allow": 2, "deny": 1, "ask": 4} |
| 339 | assert reloaded.permission_rules_source == str( |
| 340 | temp_dir / ".loader" / "permission-rules.json" |
| 341 | ) |
| 342 | assert reloaded.runtime_owner_type == "RuntimeHandle" |
| 343 | assert reloaded.runtime_owner_path == "runtime-handle" |
| 344 | assert reloaded.prompt_format == "native" |
| 345 | assert reloaded.prompt_sections == [ |
| 346 | "Runtime Config", |
| 347 | "Workflow Context", |
| 348 | "Project Context", |
| 349 | ] |
| 350 | assert reloaded.workflow_reason_code == "task_is_complex" |
| 351 | assert reloaded.workflow_reason_summary == ( |
| 352 | "task looks complex enough to benefit from a persisted plan" |
| 353 | ) |
| 354 | assert reloaded.workflow_decision_kind == "initial_route" |
| 355 | assert reloaded.workflow_ambiguity_score == pytest.approx(0.2) |
| 356 | assert reloaded.workflow_complexity_score == pytest.approx(0.6) |
| 357 | assert reloaded.workflow_scheduled_next_mode == "execute" |
| 358 | assert reloaded.last_completion_decision_code == "verification_failed_reentry" |
| 359 | assert reloaded.last_completion_decision_summary == ( |
| 360 | "continued after verification failed and the runtime re-entered execute mode" |
| 361 | ) |
| 362 | assert [entry.decision_code for entry in reloaded.completion_trace] == [ |
| 363 | "verification_failed_reentry" |
| 364 | ] |
| 365 | assert reloaded.completion_trace[0].evidence_summary == [ |
| 366 | "verification contradiction: pytest still failed" |
| 367 | ] |
| 368 | assert reloaded.last_turn_transition_summary == ( |
| 369 | "completion -> finalize [terminal] Finalizing completed turn" |
| 370 | ) |
| 371 | assert reloaded.last_turn_transition_kind == "terminal" |
| 372 | assert reloaded.last_turn_transition_reason_code == "turn_complete" |
| 373 | assert len(reloaded.workflow_timeline) == 1 |
| 374 | assert reloaded.workflow_timeline[0].mode == "plan" |
| 375 | assert reloaded.workflow_timeline[0].route_score == pytest.approx(0.72) |
| 376 | assert reloaded.workflow_timeline[0].unresolved_questions == [ |
| 377 | "Scope is still broad." |
| 378 | ] |
| 379 | |
| 380 | |
| 381 | def test_resume_session_updates_runtime_owner_metadata(temp_dir: Path) -> None: |
| 382 | agent = Agent( |
| 383 | backend=ScriptedBackend(), |
| 384 | config=AgentConfig(auto_context=False, stream=False), |
| 385 | project_root=temp_dir, |
| 386 | ) |
| 387 | agent.session.persist() |
| 388 | session_id = agent.session.session_id |
| 389 | |
| 390 | handle = RuntimeHandle( |
| 391 | backend=ScriptedBackend(), |
| 392 | config=AgentConfig(auto_context=False, stream=False), |
| 393 | project_root=temp_dir, |
| 394 | ) |
| 395 | |
| 396 | assert handle.resume_session(session_id) is True |
| 397 | |
| 398 | reloaded = ConversationSession.load( |
| 399 | project_root=temp_dir, |
| 400 | system_message_factory=_dummy_system, |
| 401 | few_shot_factory=_dummy_few_shots, |
| 402 | session_id=session_id, |
| 403 | ) |
| 404 | |
| 405 | assert reloaded is not None |
| 406 | assert reloaded.runtime_owner_type == "RuntimeHandle" |
| 407 | assert reloaded.runtime_owner_path == "runtime-handle" |
| 408 | |
| 409 | |
| 410 | def test_session_prefers_canonical_workflow_timeline_for_completion_trace( |
| 411 | temp_dir: Path, |
| 412 | ) -> None: |
| 413 | session = ConversationSession( |
| 414 | system_message_factory=_dummy_system, |
| 415 | few_shot_factory=_dummy_few_shots, |
| 416 | project_root=temp_dir, |
| 417 | ) |
| 418 | |
| 419 | session.update_runtime_state( |
| 420 | current_task="Explain why the turn stopped", |
| 421 | last_completion_decision_code="continuation_budget_exhausted", |
| 422 | last_completion_decision_summary=( |
| 423 | "stopped because the continuation budget was exhausted while " |
| 424 | "follow-through evidence was still missing" |
| 425 | ), |
| 426 | ) |
| 427 | session.append_completion_trace_entry( |
| 428 | CompletionTraceEntry( |
| 429 | stage="definition_of_done", |
| 430 | outcome="complete", |
| 431 | decision_code="stale_completion_trace", |
| 432 | decision_summary="this legacy trace entry should be ignored", |
| 433 | ) |
| 434 | ) |
| 435 | session.append_workflow_timeline_entry( |
| 436 | WorkflowTimelineEntry( |
| 437 | timestamp="2026-04-09T12:00:00Z", |
| 438 | kind="completion_check", |
| 439 | mode="execute", |
| 440 | reason_code="premature_completion_nudge", |
| 441 | summary=( |
| 442 | "completion: requested one continuation because the non-mutating " |
| 443 | "response looked incomplete" |
| 444 | ), |
| 445 | decision_kind="forced", |
| 446 | policy_stage="continuation_check", |
| 447 | policy_outcome="continue", |
| 448 | evidence_summary=["showing the requested work was actually carried out"], |
| 449 | ) |
| 450 | ) |
| 451 | session.append_workflow_timeline_entry( |
| 452 | WorkflowTimelineEntry( |
| 453 | timestamp="2026-04-09T12:01:00Z", |
| 454 | kind="completion_finalize", |
| 455 | mode="execute", |
| 456 | reason_code="continuation_budget_exhausted", |
| 457 | summary=( |
| 458 | "completion: stopped because the continuation budget was exhausted " |
| 459 | "while follow-through evidence was still missing" |
| 460 | ), |
| 461 | decision_kind="forced", |
| 462 | policy_stage="continuation_check", |
| 463 | policy_outcome="finalize", |
| 464 | evidence_summary=["showing the requested work was actually carried out"], |
| 465 | ) |
| 466 | ) |
| 467 | |
| 468 | persisted = json.loads(session.storage_path.read_text()) |
| 469 | assert "completion_trace" not in persisted |
| 470 | |
| 471 | reloaded = ConversationSession.load( |
| 472 | project_root=temp_dir, |
| 473 | system_message_factory=_dummy_system, |
| 474 | few_shot_factory=_dummy_few_shots, |
| 475 | session_id=session.session_id, |
| 476 | ) |
| 477 | |
| 478 | assert reloaded is not None |
| 479 | assert [entry.decision_code for entry in reloaded.completion_trace] == [ |
| 480 | "premature_completion_nudge", |
| 481 | "continuation_budget_exhausted", |
| 482 | ] |
| 483 | assert reloaded.completion_trace[-1].stage == "continuation_check" |
| 484 | assert reloaded.completion_trace[-1].outcome == "finalize" |
| 485 | assert reloaded.completion_trace[-1].evidence_summary == [ |
| 486 | "showing the requested work was actually carried out" |
| 487 | ] |
| 488 | |
| 489 | |
| 490 | def test_session_projects_live_completion_trace_from_workflow_timeline( |
| 491 | temp_dir: Path, |
| 492 | ) -> None: |
| 493 | session = ConversationSession( |
| 494 | system_message_factory=_dummy_system, |
| 495 | few_shot_factory=_dummy_few_shots, |
| 496 | project_root=temp_dir, |
| 497 | ) |
| 498 | |
| 499 | session.append_workflow_timeline_entry( |
| 500 | WorkflowTimelineEntry( |
| 501 | timestamp="2026-04-09T12:00:00Z", |
| 502 | kind="completion_check", |
| 503 | mode="execute", |
| 504 | reason_code="completion_response_accepted", |
| 505 | summary="completion: accepted the response because follow-through evidence was present", |
| 506 | decision_kind="forced", |
| 507 | policy_stage="continuation_check", |
| 508 | policy_outcome="accept", |
| 509 | ) |
| 510 | ) |
| 511 | session.append_workflow_timeline_entry( |
| 512 | WorkflowTimelineEntry( |
| 513 | timestamp="2026-04-09T12:01:00Z", |
| 514 | kind="completion_finalize", |
| 515 | mode="execute", |
| 516 | reason_code="continuation_budget_exhausted", |
| 517 | summary="completion: stopped because verification evidence was still missing", |
| 518 | decision_kind="forced", |
| 519 | policy_stage="continuation_check", |
| 520 | policy_outcome="finalize", |
| 521 | evidence_summary=["a passing verification result from `pytest -q`"], |
| 522 | evidence_provenance=[ |
| 523 | EvidenceProvenance( |
| 524 | category="verification", |
| 525 | source="dod.verification_commands", |
| 526 | summary="verification evidence was still missing for `pytest -q`", |
| 527 | status="missing", |
| 528 | subject="pytest -q", |
| 529 | ) |
| 530 | ], |
| 531 | ) |
| 532 | ) |
| 533 | session.update_runtime_state( |
| 534 | last_completion_decision_code="continuation_budget_exhausted", |
| 535 | last_completion_decision_summary=( |
| 536 | "stopped because verification evidence was still missing" |
| 537 | ), |
| 538 | ) |
| 539 | |
| 540 | assert [entry.decision_code for entry in session.completion_trace] == [ |
| 541 | "completion_response_accepted", |
| 542 | "continuation_budget_exhausted", |
| 543 | ] |
| 544 | assert session.completion_trace[-1].stage == "continuation_check" |
| 545 | assert session.completion_trace[-1].outcome == "finalize" |
| 546 | assert session.completion_trace[-1].evidence_summary == [ |
| 547 | "a passing verification result from `pytest -q`" |
| 548 | ] |
| 549 | assert [item.summary for item in session.completion_trace[-1].evidence_provenance] == [ |
| 550 | "verification evidence was still missing for `pytest -q`" |
| 551 | ] |
| 552 | |
| 553 | |
| 554 | def test_session_persists_workflow_ledger_state(temp_dir: Path) -> None: |
| 555 | session = ConversationSession( |
| 556 | system_message_factory=_dummy_system, |
| 557 | few_shot_factory=_dummy_few_shots, |
| 558 | project_root=temp_dir, |
| 559 | ) |
| 560 | |
| 561 | session.update_workflow_ledger( |
| 562 | WorkflowLedger( |
| 563 | assumptions=[ |
| 564 | WorkflowLedgerItem( |
| 565 | text="notes.txt stays out of scope unless clarified otherwise.", |
| 566 | status="contradicted", |
| 567 | introduced_phase="clarify", |
| 568 | updated_phase="recovery", |
| 569 | evidence=["Clarify scope assumed `notes.txt` stayed out of scope."], |
| 570 | ) |
| 571 | ], |
| 572 | acceptance_anchors=[ |
| 573 | WorkflowLedgerItem( |
| 574 | text="notes.txt exists in the workspace root.", |
| 575 | status="changed", |
| 576 | introduced_phase="clarify", |
| 577 | updated_phase="recovery", |
| 578 | ) |
| 579 | ], |
| 580 | decision_boundaries=[ |
| 581 | WorkflowLedgerItem( |
| 582 | text="Escalate before broad UX changes.", |
| 583 | status="tracked", |
| 584 | introduced_phase="clarify", |
| 585 | ) |
| 586 | ], |
| 587 | ) |
| 588 | ) |
| 589 | |
| 590 | reloaded = ConversationSession.load( |
| 591 | project_root=temp_dir, |
| 592 | system_message_factory=_dummy_system, |
| 593 | few_shot_factory=_dummy_few_shots, |
| 594 | session_id=session.session_id, |
| 595 | ) |
| 596 | |
| 597 | assert reloaded is not None |
| 598 | assert reloaded.workflow_ledger.assumptions[0].status == "contradicted" |
| 599 | assert reloaded.workflow_ledger.assumptions[0].updated_phase == "recovery" |
| 600 | assert reloaded.workflow_ledger.acceptance_anchors[0].status == "changed" |
| 601 | assert reloaded.workflow_ledger.decision_boundaries[0].text == ( |
| 602 | "Escalate before broad UX changes." |
| 603 | ) |
| 604 | |
| 605 | |
| 606 | def test_session_persists_prompt_history_state(temp_dir: Path) -> None: |
| 607 | session = ConversationSession( |
| 608 | system_message_factory=_dummy_system, |
| 609 | few_shot_factory=_dummy_few_shots, |
| 610 | project_root=temp_dir, |
| 611 | ) |
| 612 | |
| 613 | session.append_prompt_snapshot( |
| 614 | PromptSnapshot( |
| 615 | timestamp="2026-04-07T14:00:00Z", |
| 616 | workflow_mode="plan", |
| 617 | permission_mode="prompt", |
| 618 | current_task="Tighten Loader workflow behavior", |
| 619 | prompt_format="native", |
| 620 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 621 | content="# Introduction\nplan around planned.txt\n", |
| 622 | ) |
| 623 | ) |
| 624 | session.append_prompt_snapshot( |
| 625 | PromptSnapshot( |
| 626 | timestamp="2026-04-07T14:02:00Z", |
| 627 | workflow_mode="execute", |
| 628 | permission_mode="prompt", |
| 629 | current_task="Tighten Loader workflow behavior", |
| 630 | prompt_format="native", |
| 631 | prompt_sections=[ |
| 632 | "Runtime Config", |
| 633 | "Workflow Context", |
| 634 | "Mode Guidance", |
| 635 | "Project Context", |
| 636 | ], |
| 637 | content="# Introduction\nexecute around notes.txt\n# Project Context\npython\n", |
| 638 | ) |
| 639 | ) |
| 640 | |
| 641 | reloaded = ConversationSession.load( |
| 642 | project_root=temp_dir, |
| 643 | system_message_factory=_dummy_system, |
| 644 | few_shot_factory=_dummy_few_shots, |
| 645 | session_id=session.session_id, |
| 646 | ) |
| 647 | |
| 648 | assert reloaded is not None |
| 649 | assert len(reloaded.prompt_history) == 2 |
| 650 | assert reloaded.prompt_history[0].workflow_mode == "plan" |
| 651 | assert reloaded.prompt_history[-1].workflow_mode == "execute" |
| 652 | assert "notes.txt" in reloaded.prompt_history[-1].content |
| 653 | |
| 654 | |
| 655 | @pytest.mark.asyncio |
| 656 | async def test_turn_summary_usage_rolls_up_into_session_totals(temp_dir: Path) -> None: |
| 657 | backend = ScriptedBackend( |
| 658 | completions=[ |
| 659 | CompletionResponse( |
| 660 | content="Here's the answer.", |
| 661 | usage={"prompt_tokens": 9, "completion_tokens": 3}, |
| 662 | ) |
| 663 | ] |
| 664 | ) |
| 665 | agent = Agent( |
| 666 | backend=backend, |
| 667 | config=AgentConfig( |
| 668 | auto_context=False, |
| 669 | stream=False, |
| 670 | reasoning=ReasoningConfig(completion_check=False), |
| 671 | ), |
| 672 | project_root=temp_dir, |
| 673 | ) |
| 674 | |
| 675 | await agent.run("Write a short release-note style summary of what Loader does well.") |
| 676 | |
| 677 | assert agent.last_turn_summary is not None |
| 678 | assert agent.last_turn_summary.usage["input_tokens"] == 9 |
| 679 | assert agent.last_turn_summary.usage["output_tokens"] == 3 |
| 680 | assert agent.last_turn_summary.cumulative_usage["input_tokens"] == 9 |
| 681 | assert agent.last_turn_summary.cumulative_usage["output_tokens"] == 3 |
| 682 | assert agent.last_turn_summary.cumulative_usage["turns"] == 1 |