| 1 | """Tests for persisted session state and resume support.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import json |
| 6 | from pathlib import Path |
| 7 | |
| 8 | import pytest |
| 9 | |
| 10 | from loader.agent.loop import Agent, AgentConfig, ReasoningConfig |
| 11 | from loader.llm.base import CompletionResponse, Message, Role, ToolCall |
| 12 | from loader.runtime.completion_trace import CompletionTraceEntry |
| 13 | from loader.runtime.evidence_provenance import EvidenceProvenance |
| 14 | from loader.runtime.prompt_history import PromptSnapshot |
| 15 | from loader.runtime.runtime_handle import RuntimeHandle |
| 16 | from loader.runtime.session import ConversationSession |
| 17 | from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem |
| 18 | from loader.runtime.workflow_policy import WorkflowTimelineEntry |
| 19 | from tests.helpers.runtime_harness import ScriptedBackend |
| 20 | |
| 21 | |
| 22 | def _dummy_system() -> Message: |
| 23 | return Message(role=Role.SYSTEM, content="system") |
| 24 | |
| 25 | |
| 26 | def _dummy_few_shots() -> list[Message]: |
| 27 | return [] |
| 28 | |
| 29 | |
| 30 | @pytest.mark.asyncio |
| 31 | async def test_session_persists_and_resumes_across_agent_restart(temp_dir: Path) -> None: |
| 32 | backend = ScriptedBackend( |
| 33 | completions=[ |
| 34 | CompletionResponse( |
| 35 | content="I'll create the file.", |
| 36 | tool_calls=[ |
| 37 | ToolCall( |
| 38 | id="write-1", |
| 39 | name="write", |
| 40 | arguments={ |
| 41 | "file_path": str(temp_dir / "hello.txt"), |
| 42 | "content": "hello\n", |
| 43 | }, |
| 44 | ) |
| 45 | ], |
| 46 | usage={"prompt_tokens": 12, "completion_tokens": 5}, |
| 47 | ), |
| 48 | CompletionResponse( |
| 49 | content="The file is written.", |
| 50 | usage={"prompt_tokens": 10, "completion_tokens": 4}, |
| 51 | ), |
| 52 | ] |
| 53 | ) |
| 54 | config = AgentConfig(auto_context=False, stream=False) |
| 55 | first_agent = Agent(backend=backend, config=config, project_root=temp_dir) |
| 56 | |
| 57 | response = await first_agent.run("Create hello.txt in the workspace root.") |
| 58 | |
| 59 | assert response.startswith("The file is written.") |
| 60 | session_id = first_agent.session.session_id |
| 61 | assert first_agent.session.storage_path.exists() |
| 62 | |
| 63 | resumed_agent = Agent( |
| 64 | backend=ScriptedBackend(completions=[]), |
| 65 | config=config, |
| 66 | project_root=temp_dir, |
| 67 | ) |
| 68 | |
| 69 | assert resumed_agent.resume_session(session_id) is True |
| 70 | assert resumed_agent.session.session_id == session_id |
| 71 | assert resumed_agent._current_task == "Create hello.txt in the workspace root." |
| 72 | assert resumed_agent.active_permission_mode == "workspace-write" |
| 73 | assert resumed_agent.workflow_mode == first_agent.workflow_mode |
| 74 | assert resumed_agent.last_turn_summary is not None |
| 75 | assert resumed_agent.last_turn_summary.definition_of_done is not None |
| 76 | assert resumed_agent.last_turn_summary.definition_of_done.task_statement == ( |
| 77 | "Create hello.txt in the workspace root." |
| 78 | ) |
| 79 | assert any( |
| 80 | message.role == Role.USER |
| 81 | and message.content == "Create hello.txt in the workspace root." |
| 82 | for message in resumed_agent.messages |
| 83 | ) |
| 84 | |
| 85 | |
| 86 | def test_agent_clear_history_rebuilds_a_fresh_runtime_session(temp_dir: Path) -> None: |
| 87 | agent = Agent( |
| 88 | backend=ScriptedBackend(), |
| 89 | config=AgentConfig(auto_context=False, stream=False), |
| 90 | project_root=temp_dir, |
| 91 | ) |
| 92 | original_session_id = agent.session.session_id |
| 93 | agent.current_task = "Keep runtime state tidy." |
| 94 | agent.prompt_format = "native" |
| 95 | agent.prompt_sections = ["Runtime Config", "Workflow Context"] |
| 96 | agent.set_workflow_mode("clarify") |
| 97 | agent.queue_steering_message("Stay in runtime.") |
| 98 | |
| 99 | agent.clear_history() |
| 100 | |
| 101 | assert agent.session.session_id != original_session_id |
| 102 | assert agent.current_task is None |
| 103 | assert agent.workflow_mode == "execute" |
| 104 | assert agent.prompt_format is None |
| 105 | assert agent.prompt_sections == [] |
| 106 | assert agent.messages == [] |
| 107 | assert agent.last_turn_summary is None |
| 108 | assert agent.drain_steering_messages() == [] |
| 109 | |
| 110 | |
| 111 | def test_session_rotation_kicks_in_at_size_cap(temp_dir: Path) -> None: |
| 112 | session = ConversationSession( |
| 113 | system_message_factory=_dummy_system, |
| 114 | few_shot_factory=_dummy_few_shots, |
| 115 | project_root=temp_dir, |
| 116 | rotate_after_bytes=250, |
| 117 | ) |
| 118 | |
| 119 | for index in range(6): |
| 120 | session.append( |
| 121 | Message( |
| 122 | role=Role.USER, |
| 123 | content=f"Message {index}: " + ("x" * 120), |
| 124 | ) |
| 125 | ) |
| 126 | |
| 127 | assert session.storage_path.exists() |
| 128 | assert session.storage_path.with_suffix(".1.json").exists() |
| 129 | |
| 130 | |
| 131 | def test_session_compaction_persists_summary_and_recent_messages(temp_dir: Path) -> None: |
| 132 | session = ConversationSession( |
| 133 | system_message_factory=_dummy_system, |
| 134 | few_shot_factory=_dummy_few_shots, |
| 135 | project_root=temp_dir, |
| 136 | messages=[ |
| 137 | Message(role=Role.USER, content="Kick off runtime audit"), |
| 138 | Message(role=Role.ASSISTANT, content="Initial findings"), |
| 139 | Message(role=Role.USER, content="Focus on sessions"), |
| 140 | Message(role=Role.ASSISTANT, content="Compaction design drafted"), |
| 141 | Message(role=Role.USER, content="Preserve the latest four messages"), |
| 142 | Message(role=Role.ASSISTANT, content="Ready to compact"), |
| 143 | ], |
| 144 | auto_compaction_input_tokens_threshold=1, |
| 145 | compaction_keep_last_messages=4, |
| 146 | ) |
| 147 | |
| 148 | result = session.maybe_compact() |
| 149 | |
| 150 | assert result is not None |
| 151 | assert session.compaction is not None |
| 152 | assert session.storage_path.exists() |
| 153 | assert session.messages[0].content.startswith("[COMPACTED CONTEXT]") |
| 154 | assert [message.content for message in session.messages[-4:]] == [ |
| 155 | "Focus on sessions", |
| 156 | "Compaction design drafted", |
| 157 | "Preserve the latest four messages", |
| 158 | "Ready to compact", |
| 159 | ] |
| 160 | |
| 161 | |
| 162 | def test_session_persists_permission_policy_metadata(temp_dir: Path) -> None: |
| 163 | session = ConversationSession( |
| 164 | system_message_factory=_dummy_system, |
| 165 | few_shot_factory=_dummy_few_shots, |
| 166 | project_root=temp_dir, |
| 167 | permission_mode="prompt", |
| 168 | permission_prompting_enabled=True, |
| 169 | permission_rule_counts={"allow": 1, "deny": 2, "ask": 3}, |
| 170 | permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"), |
| 171 | prompt_format="react", |
| 172 | prompt_sections=["Runtime Config", "Workflow Context"], |
| 173 | ) |
| 174 | |
| 175 | session.update_runtime_state( |
| 176 | current_task="Inspect permission history", |
| 177 | runtime_owner_type="RuntimeHandle", |
| 178 | permission_mode="allow", |
| 179 | permission_prompting_enabled=True, |
| 180 | permission_rule_counts={"allow": 2, "deny": 1, "ask": 4}, |
| 181 | permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"), |
| 182 | prompt_format="native", |
| 183 | prompt_sections=["Runtime Config", "Workflow Context", "Project Context"], |
| 184 | workflow_reason_code="task_is_complex", |
| 185 | workflow_reason_summary="task looks complex enough to benefit from a persisted plan", |
| 186 | workflow_decision_kind="initial_route", |
| 187 | workflow_ambiguity_score=0.2, |
| 188 | workflow_complexity_score=0.6, |
| 189 | workflow_scheduled_next_mode="execute", |
| 190 | last_completion_decision_code="verification_failed_reentry", |
| 191 | last_completion_decision_summary=( |
| 192 | "continued after verification failed and the runtime re-entered execute mode" |
| 193 | ), |
| 194 | last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn", |
| 195 | last_turn_transition_kind="terminal", |
| 196 | last_turn_transition_reason_code="turn_complete", |
| 197 | ) |
| 198 | session.append_workflow_timeline_entry( |
| 199 | WorkflowTimelineEntry( |
| 200 | timestamp="2026-04-07T12:00:00Z", |
| 201 | kind="route", |
| 202 | mode="plan", |
| 203 | reason_code="task_is_complex", |
| 204 | summary="plan: workflow pressure favors a persisted plan before execution", |
| 205 | decision_kind="initial_route", |
| 206 | route_score=0.72, |
| 207 | runner_up_mode="clarify", |
| 208 | runner_up_score=0.61, |
| 209 | scheduled_next_mode="execute", |
| 210 | unresolved_questions=["Scope is still broad."], |
| 211 | prompt_format="native", |
| 212 | prompt_sections=["Runtime Config", "Workflow Context", "Project Context"], |
| 213 | ) |
| 214 | ) |
| 215 | session.append_completion_trace_entry( |
| 216 | CompletionTraceEntry( |
| 217 | stage="definition_of_done", |
| 218 | outcome="continue", |
| 219 | decision_code="verification_failed_reentry", |
| 220 | decision_summary=( |
| 221 | "continued after verification failed and the runtime " |
| 222 | "re-entered execute mode" |
| 223 | ), |
| 224 | evidence_summary=["verification contradiction: pytest still failed"], |
| 225 | ) |
| 226 | ) |
| 227 | |
| 228 | reloaded = ConversationSession.load( |
| 229 | project_root=temp_dir, |
| 230 | system_message_factory=_dummy_system, |
| 231 | few_shot_factory=_dummy_few_shots, |
| 232 | session_id=session.session_id, |
| 233 | ) |
| 234 | |
| 235 | assert reloaded is not None |
| 236 | assert reloaded.permission_mode == "allow" |
| 237 | assert reloaded.permission_prompting_enabled is True |
| 238 | assert reloaded.permission_rule_counts == {"allow": 2, "deny": 1, "ask": 4} |
| 239 | assert reloaded.permission_rules_source == str( |
| 240 | temp_dir / ".loader" / "permission-rules.json" |
| 241 | ) |
| 242 | assert reloaded.runtime_owner_type == "RuntimeHandle" |
| 243 | assert reloaded.runtime_owner_path == "runtime-handle" |
| 244 | assert reloaded.prompt_format == "native" |
| 245 | assert reloaded.prompt_sections == [ |
| 246 | "Runtime Config", |
| 247 | "Workflow Context", |
| 248 | "Project Context", |
| 249 | ] |
| 250 | assert reloaded.workflow_reason_code == "task_is_complex" |
| 251 | assert reloaded.workflow_reason_summary == ( |
| 252 | "task looks complex enough to benefit from a persisted plan" |
| 253 | ) |
| 254 | assert reloaded.workflow_decision_kind == "initial_route" |
| 255 | assert reloaded.workflow_ambiguity_score == pytest.approx(0.2) |
| 256 | assert reloaded.workflow_complexity_score == pytest.approx(0.6) |
| 257 | assert reloaded.workflow_scheduled_next_mode == "execute" |
| 258 | assert reloaded.last_completion_decision_code == "verification_failed_reentry" |
| 259 | assert reloaded.last_completion_decision_summary == ( |
| 260 | "continued after verification failed and the runtime re-entered execute mode" |
| 261 | ) |
| 262 | assert [entry.decision_code for entry in reloaded.completion_trace] == [ |
| 263 | "verification_failed_reentry" |
| 264 | ] |
| 265 | assert reloaded.completion_trace[0].evidence_summary == [ |
| 266 | "verification contradiction: pytest still failed" |
| 267 | ] |
| 268 | assert reloaded.last_turn_transition_summary == ( |
| 269 | "completion -> finalize [terminal] Finalizing completed turn" |
| 270 | ) |
| 271 | assert reloaded.last_turn_transition_kind == "terminal" |
| 272 | assert reloaded.last_turn_transition_reason_code == "turn_complete" |
| 273 | assert len(reloaded.workflow_timeline) == 1 |
| 274 | assert reloaded.workflow_timeline[0].mode == "plan" |
| 275 | assert reloaded.workflow_timeline[0].route_score == pytest.approx(0.72) |
| 276 | assert reloaded.workflow_timeline[0].unresolved_questions == [ |
| 277 | "Scope is still broad." |
| 278 | ] |
| 279 | |
| 280 | |
| 281 | def test_resume_session_updates_runtime_owner_metadata(temp_dir: Path) -> None: |
| 282 | agent = Agent( |
| 283 | backend=ScriptedBackend(), |
| 284 | config=AgentConfig(auto_context=False, stream=False), |
| 285 | project_root=temp_dir, |
| 286 | ) |
| 287 | agent.session.persist() |
| 288 | session_id = agent.session.session_id |
| 289 | |
| 290 | handle = RuntimeHandle( |
| 291 | backend=ScriptedBackend(), |
| 292 | config=AgentConfig(auto_context=False, stream=False), |
| 293 | project_root=temp_dir, |
| 294 | ) |
| 295 | |
| 296 | assert handle.resume_session(session_id) is True |
| 297 | |
| 298 | reloaded = ConversationSession.load( |
| 299 | project_root=temp_dir, |
| 300 | system_message_factory=_dummy_system, |
| 301 | few_shot_factory=_dummy_few_shots, |
| 302 | session_id=session_id, |
| 303 | ) |
| 304 | |
| 305 | assert reloaded is not None |
| 306 | assert reloaded.runtime_owner_type == "RuntimeHandle" |
| 307 | assert reloaded.runtime_owner_path == "runtime-handle" |
| 308 | |
| 309 | |
| 310 | def test_session_prefers_canonical_workflow_timeline_for_completion_trace( |
| 311 | temp_dir: Path, |
| 312 | ) -> None: |
| 313 | session = ConversationSession( |
| 314 | system_message_factory=_dummy_system, |
| 315 | few_shot_factory=_dummy_few_shots, |
| 316 | project_root=temp_dir, |
| 317 | ) |
| 318 | |
| 319 | session.update_runtime_state( |
| 320 | current_task="Explain why the turn stopped", |
| 321 | last_completion_decision_code="continuation_budget_exhausted", |
| 322 | last_completion_decision_summary=( |
| 323 | "stopped because the continuation budget was exhausted while " |
| 324 | "follow-through evidence was still missing" |
| 325 | ), |
| 326 | ) |
| 327 | session.append_completion_trace_entry( |
| 328 | CompletionTraceEntry( |
| 329 | stage="definition_of_done", |
| 330 | outcome="complete", |
| 331 | decision_code="stale_completion_trace", |
| 332 | decision_summary="this legacy trace entry should be ignored", |
| 333 | ) |
| 334 | ) |
| 335 | session.append_workflow_timeline_entry( |
| 336 | WorkflowTimelineEntry( |
| 337 | timestamp="2026-04-09T12:00:00Z", |
| 338 | kind="completion_check", |
| 339 | mode="execute", |
| 340 | reason_code="premature_completion_nudge", |
| 341 | summary=( |
| 342 | "completion: requested one continuation because the non-mutating " |
| 343 | "response looked incomplete" |
| 344 | ), |
| 345 | decision_kind="forced", |
| 346 | policy_stage="continuation_check", |
| 347 | policy_outcome="continue", |
| 348 | evidence_summary=["showing the requested work was actually carried out"], |
| 349 | ) |
| 350 | ) |
| 351 | session.append_workflow_timeline_entry( |
| 352 | WorkflowTimelineEntry( |
| 353 | timestamp="2026-04-09T12:01:00Z", |
| 354 | kind="completion_finalize", |
| 355 | mode="execute", |
| 356 | reason_code="continuation_budget_exhausted", |
| 357 | summary=( |
| 358 | "completion: stopped because the continuation budget was exhausted " |
| 359 | "while follow-through evidence was still missing" |
| 360 | ), |
| 361 | decision_kind="forced", |
| 362 | policy_stage="continuation_check", |
| 363 | policy_outcome="finalize", |
| 364 | evidence_summary=["showing the requested work was actually carried out"], |
| 365 | ) |
| 366 | ) |
| 367 | |
| 368 | persisted = json.loads(session.storage_path.read_text()) |
| 369 | assert "completion_trace" not in persisted |
| 370 | |
| 371 | reloaded = ConversationSession.load( |
| 372 | project_root=temp_dir, |
| 373 | system_message_factory=_dummy_system, |
| 374 | few_shot_factory=_dummy_few_shots, |
| 375 | session_id=session.session_id, |
| 376 | ) |
| 377 | |
| 378 | assert reloaded is not None |
| 379 | assert [entry.decision_code for entry in reloaded.completion_trace] == [ |
| 380 | "premature_completion_nudge", |
| 381 | "continuation_budget_exhausted", |
| 382 | ] |
| 383 | assert reloaded.completion_trace[-1].stage == "continuation_check" |
| 384 | assert reloaded.completion_trace[-1].outcome == "finalize" |
| 385 | assert reloaded.completion_trace[-1].evidence_summary == [ |
| 386 | "showing the requested work was actually carried out" |
| 387 | ] |
| 388 | |
| 389 | |
| 390 | def test_session_projects_live_completion_trace_from_workflow_timeline( |
| 391 | temp_dir: Path, |
| 392 | ) -> None: |
| 393 | session = ConversationSession( |
| 394 | system_message_factory=_dummy_system, |
| 395 | few_shot_factory=_dummy_few_shots, |
| 396 | project_root=temp_dir, |
| 397 | ) |
| 398 | |
| 399 | session.append_workflow_timeline_entry( |
| 400 | WorkflowTimelineEntry( |
| 401 | timestamp="2026-04-09T12:00:00Z", |
| 402 | kind="completion_check", |
| 403 | mode="execute", |
| 404 | reason_code="completion_response_accepted", |
| 405 | summary="completion: accepted the response because follow-through evidence was present", |
| 406 | decision_kind="forced", |
| 407 | policy_stage="continuation_check", |
| 408 | policy_outcome="accept", |
| 409 | ) |
| 410 | ) |
| 411 | session.append_workflow_timeline_entry( |
| 412 | WorkflowTimelineEntry( |
| 413 | timestamp="2026-04-09T12:01:00Z", |
| 414 | kind="completion_finalize", |
| 415 | mode="execute", |
| 416 | reason_code="continuation_budget_exhausted", |
| 417 | summary="completion: stopped because verification evidence was still missing", |
| 418 | decision_kind="forced", |
| 419 | policy_stage="continuation_check", |
| 420 | policy_outcome="finalize", |
| 421 | evidence_summary=["a passing verification result from `pytest -q`"], |
| 422 | evidence_provenance=[ |
| 423 | EvidenceProvenance( |
| 424 | category="verification", |
| 425 | source="dod.verification_commands", |
| 426 | summary="verification evidence was still missing for `pytest -q`", |
| 427 | status="missing", |
| 428 | subject="pytest -q", |
| 429 | ) |
| 430 | ], |
| 431 | ) |
| 432 | ) |
| 433 | session.update_runtime_state( |
| 434 | last_completion_decision_code="continuation_budget_exhausted", |
| 435 | last_completion_decision_summary=( |
| 436 | "stopped because verification evidence was still missing" |
| 437 | ), |
| 438 | ) |
| 439 | |
| 440 | assert [entry.decision_code for entry in session.completion_trace] == [ |
| 441 | "completion_response_accepted", |
| 442 | "continuation_budget_exhausted", |
| 443 | ] |
| 444 | assert session.completion_trace[-1].stage == "continuation_check" |
| 445 | assert session.completion_trace[-1].outcome == "finalize" |
| 446 | assert session.completion_trace[-1].evidence_summary == [ |
| 447 | "a passing verification result from `pytest -q`" |
| 448 | ] |
| 449 | assert [item.summary for item in session.completion_trace[-1].evidence_provenance] == [ |
| 450 | "verification evidence was still missing for `pytest -q`" |
| 451 | ] |
| 452 | |
| 453 | |
| 454 | def test_session_persists_workflow_ledger_state(temp_dir: Path) -> None: |
| 455 | session = ConversationSession( |
| 456 | system_message_factory=_dummy_system, |
| 457 | few_shot_factory=_dummy_few_shots, |
| 458 | project_root=temp_dir, |
| 459 | ) |
| 460 | |
| 461 | session.update_workflow_ledger( |
| 462 | WorkflowLedger( |
| 463 | assumptions=[ |
| 464 | WorkflowLedgerItem( |
| 465 | text="notes.txt stays out of scope unless clarified otherwise.", |
| 466 | status="contradicted", |
| 467 | introduced_phase="clarify", |
| 468 | updated_phase="recovery", |
| 469 | evidence=["Clarify scope assumed `notes.txt` stayed out of scope."], |
| 470 | ) |
| 471 | ], |
| 472 | acceptance_anchors=[ |
| 473 | WorkflowLedgerItem( |
| 474 | text="notes.txt exists in the workspace root.", |
| 475 | status="changed", |
| 476 | introduced_phase="clarify", |
| 477 | updated_phase="recovery", |
| 478 | ) |
| 479 | ], |
| 480 | decision_boundaries=[ |
| 481 | WorkflowLedgerItem( |
| 482 | text="Escalate before broad UX changes.", |
| 483 | status="tracked", |
| 484 | introduced_phase="clarify", |
| 485 | ) |
| 486 | ], |
| 487 | ) |
| 488 | ) |
| 489 | |
| 490 | reloaded = ConversationSession.load( |
| 491 | project_root=temp_dir, |
| 492 | system_message_factory=_dummy_system, |
| 493 | few_shot_factory=_dummy_few_shots, |
| 494 | session_id=session.session_id, |
| 495 | ) |
| 496 | |
| 497 | assert reloaded is not None |
| 498 | assert reloaded.workflow_ledger.assumptions[0].status == "contradicted" |
| 499 | assert reloaded.workflow_ledger.assumptions[0].updated_phase == "recovery" |
| 500 | assert reloaded.workflow_ledger.acceptance_anchors[0].status == "changed" |
| 501 | assert reloaded.workflow_ledger.decision_boundaries[0].text == ( |
| 502 | "Escalate before broad UX changes." |
| 503 | ) |
| 504 | |
| 505 | |
| 506 | def test_session_persists_prompt_history_state(temp_dir: Path) -> None: |
| 507 | session = ConversationSession( |
| 508 | system_message_factory=_dummy_system, |
| 509 | few_shot_factory=_dummy_few_shots, |
| 510 | project_root=temp_dir, |
| 511 | ) |
| 512 | |
| 513 | session.append_prompt_snapshot( |
| 514 | PromptSnapshot( |
| 515 | timestamp="2026-04-07T14:00:00Z", |
| 516 | workflow_mode="plan", |
| 517 | permission_mode="prompt", |
| 518 | current_task="Tighten Loader workflow behavior", |
| 519 | prompt_format="native", |
| 520 | prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"], |
| 521 | content="# Introduction\nplan around planned.txt\n", |
| 522 | ) |
| 523 | ) |
| 524 | session.append_prompt_snapshot( |
| 525 | PromptSnapshot( |
| 526 | timestamp="2026-04-07T14:02:00Z", |
| 527 | workflow_mode="execute", |
| 528 | permission_mode="prompt", |
| 529 | current_task="Tighten Loader workflow behavior", |
| 530 | prompt_format="native", |
| 531 | prompt_sections=[ |
| 532 | "Runtime Config", |
| 533 | "Workflow Context", |
| 534 | "Mode Guidance", |
| 535 | "Project Context", |
| 536 | ], |
| 537 | content="# Introduction\nexecute around notes.txt\n# Project Context\npython\n", |
| 538 | ) |
| 539 | ) |
| 540 | |
| 541 | reloaded = ConversationSession.load( |
| 542 | project_root=temp_dir, |
| 543 | system_message_factory=_dummy_system, |
| 544 | few_shot_factory=_dummy_few_shots, |
| 545 | session_id=session.session_id, |
| 546 | ) |
| 547 | |
| 548 | assert reloaded is not None |
| 549 | assert len(reloaded.prompt_history) == 2 |
| 550 | assert reloaded.prompt_history[0].workflow_mode == "plan" |
| 551 | assert reloaded.prompt_history[-1].workflow_mode == "execute" |
| 552 | assert "notes.txt" in reloaded.prompt_history[-1].content |
| 553 | |
| 554 | |
| 555 | @pytest.mark.asyncio |
| 556 | async def test_turn_summary_usage_rolls_up_into_session_totals(temp_dir: Path) -> None: |
| 557 | backend = ScriptedBackend( |
| 558 | completions=[ |
| 559 | CompletionResponse( |
| 560 | content="Here's the answer.", |
| 561 | usage={"prompt_tokens": 9, "completion_tokens": 3}, |
| 562 | ) |
| 563 | ] |
| 564 | ) |
| 565 | agent = Agent( |
| 566 | backend=backend, |
| 567 | config=AgentConfig( |
| 568 | auto_context=False, |
| 569 | stream=False, |
| 570 | reasoning=ReasoningConfig(completion_check=False), |
| 571 | ), |
| 572 | project_root=temp_dir, |
| 573 | ) |
| 574 | |
| 575 | await agent.run("Write a short release-note style summary of what Loader does well.") |
| 576 | |
| 577 | assert agent.last_turn_summary is not None |
| 578 | assert agent.last_turn_summary.usage["input_tokens"] == 9 |
| 579 | assert agent.last_turn_summary.usage["output_tokens"] == 3 |
| 580 | assert agent.last_turn_summary.cumulative_usage["input_tokens"] == 9 |
| 581 | assert agent.last_turn_summary.cumulative_usage["output_tokens"] == 3 |
| 582 | assert agent.last_turn_summary.cumulative_usage["turns"] == 1 |