loader Public

Watch 0 Fork 0 Star 0
Python · 25910 bytes Raw Blame History
  
        1
        """Tests for persisted session state and resume support."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        import json
      
        6
        from pathlib import Path
      
        7
        
        8
        import pytest
      
        9
        
        10
        from loader.agent.loop import Agent, AgentConfig, ReasoningConfig
      
        11
        from loader.llm.base import CompletionResponse, Message, Role, ToolCall
      
        12
        from loader.runtime.completion_trace import CompletionTraceEntry
      
        13
        from loader.runtime.dod import (
      
        14
            DefinitionOfDoneStore,
      
        15
            VerificationEvidence,
      
        16
            create_definition_of_done,
      
        17
        )
      
        18
        from loader.runtime.evidence_provenance import EvidenceProvenance
      
        19
        from loader.runtime.prompt_history import PromptSnapshot
      
        20
        from loader.runtime.runtime_handle import RuntimeHandle
      
        21
        from loader.runtime.session import ConversationSession
      
        22
        from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem
      
        23
        from loader.runtime.workflow_policy import WorkflowTimelineEntry
      
        24
        from tests.helpers.runtime_harness import ScriptedBackend
      
        25
        
        26
        
        27
        def _dummy_system() -> Message:
      
        28
            return Message(role=Role.SYSTEM, content="system")
      
        29
        
        30
        
        31
        def _dummy_few_shots() -> list[Message]:
      
        32
            return []
      
        33
        
        34
        
        35
        @pytest.mark.asyncio
      
        36
        async def test_session_persists_and_resumes_across_agent_restart(temp_dir: Path) -> None:
      
        37
            backend = ScriptedBackend(
      
        38
                completions=[
      
        39
                    CompletionResponse(
      
        40
                        content="I'll create the file.",
      
        41
                        tool_calls=[
      
        42
                            ToolCall(
      
        43
                                id="write-1",
      
        44
                                name="write",
      
        45
                                arguments={
      
        46
                                    "file_path": str(temp_dir / "hello.txt"),
      
        47
                                    "content": "hello\n",
      
        48
                                },
      
        49
                            )
      
        50
                        ],
      
        51
                        usage={"prompt_tokens": 12, "completion_tokens": 5},
      
        52
                    ),
      
        53
                    CompletionResponse(
      
        54
                        content="The file is written.",
      
        55
                        usage={"prompt_tokens": 10, "completion_tokens": 4},
      
        56
                    ),
      
        57
                ]
      
        58
            )
      
        59
            config = AgentConfig(auto_context=False, stream=False)
      
        60
            first_agent = Agent(backend=backend, config=config, project_root=temp_dir)
      
        61
        
        62
            response = await first_agent.run("Create hello.txt in the workspace root.")
      
        63
        
        64
            assert response.startswith("The file is written.")
      
        65
            session_id = first_agent.session.session_id
      
        66
            assert first_agent.session.storage_path.exists()
      
        67
        
        68
            resumed_agent = Agent(
      
        69
                backend=ScriptedBackend(completions=[]),
      
        70
                config=config,
      
        71
                project_root=temp_dir,
      
        72
            )
      
        73
        
        74
            assert resumed_agent.resume_session(session_id) is True
      
        75
            assert resumed_agent.session.session_id == session_id
      
        76
            assert resumed_agent._current_task == "Create hello.txt in the workspace root."
      
        77
            assert resumed_agent.active_permission_mode == "workspace-write"
      
        78
            assert resumed_agent.workflow_mode == first_agent.workflow_mode
      
        79
            assert resumed_agent.last_turn_summary is not None
      
        80
            assert resumed_agent.last_turn_summary.definition_of_done is not None
      
        81
            assert resumed_agent.last_turn_summary.definition_of_done.task_statement == (
      
        82
                "Create hello.txt in the workspace root."
      
        83
            )
      
        84
            assert any(
      
        85
                message.role == Role.USER
      
        86
                and message.content == "Create hello.txt in the workspace root."
      
        87
                for message in resumed_agent.messages
      
        88
            )
      
        89
        
        90
        
        91
        def test_agent_clear_history_rebuilds_a_fresh_runtime_session(temp_dir: Path) -> None:
      
        92
            agent = Agent(
      
        93
                backend=ScriptedBackend(),
      
        94
                config=AgentConfig(auto_context=False, stream=False),
      
        95
                project_root=temp_dir,
      
        96
            )
      
        97
            original_session_id = agent.session.session_id
      
        98
            agent.current_task = "Keep runtime state tidy."
      
        99
            agent.prompt_format = "native"
      
        100
            agent.prompt_sections = ["Runtime Config", "Workflow Context"]
      
        101
            agent.set_workflow_mode("clarify")
      
        102
            agent.queue_steering_message("Stay in runtime.")
      
        103
        
        104
            agent.clear_history()
      
        105
        
        106
            assert agent.session.session_id != original_session_id
      
        107
            assert agent.current_task is None
      
        108
            assert agent.workflow_mode == "execute"
      
        109
            assert agent.prompt_format is None
      
        110
            assert agent.prompt_sections == []
      
        111
            assert agent.messages == []
      
        112
            assert agent.last_turn_summary is None
      
        113
            assert agent.drain_steering_messages() == []
      
        114
        
        115
        
        116
        def test_session_rotation_kicks_in_at_size_cap(temp_dir: Path) -> None:
      
        117
            session = ConversationSession(
      
        118
                system_message_factory=_dummy_system,
      
        119
                few_shot_factory=_dummy_few_shots,
      
        120
                project_root=temp_dir,
      
        121
                rotate_after_bytes=250,
      
        122
            )
      
        123
        
        124
            for index in range(6):
      
        125
                session.append(
      
        126
                    Message(
      
        127
                        role=Role.USER,
      
        128
                        content=f"Message {index}: " + ("x" * 120),
      
        129
                    )
      
        130
                )
      
        131
        
        132
            assert session.storage_path.exists()
      
        133
            assert session.storage_path.with_suffix(".1.json").exists()
      
        134
        
        135
        
        136
        def test_session_compaction_persists_summary_and_recent_messages(temp_dir: Path) -> None:
      
        137
            session = ConversationSession(
      
        138
                system_message_factory=_dummy_system,
      
        139
                few_shot_factory=_dummy_few_shots,
      
        140
                project_root=temp_dir,
      
        141
                messages=[
      
        142
                    Message(role=Role.USER, content="Kick off runtime audit"),
      
        143
                    Message(role=Role.ASSISTANT, content="Initial findings"),
      
        144
                    Message(role=Role.USER, content="Focus on sessions"),
      
        145
                    Message(role=Role.ASSISTANT, content="Compaction design drafted"),
      
        146
                    Message(role=Role.USER, content="Preserve the latest four messages"),
      
        147
                    Message(role=Role.ASSISTANT, content="Ready to compact"),
      
        148
                ],
      
        149
                auto_compaction_input_tokens_threshold=1,
      
        150
                compaction_keep_last_messages=4,
      
        151
            )
      
        152
        
        153
            result = session.maybe_compact()
      
        154
        
        155
            assert result is not None
      
        156
            assert session.compaction is not None
      
        157
            assert session.storage_path.exists()
      
        158
            assert session.messages[0].content.startswith("[COMPACTED CONTEXT]")
      
        159
            assert [message.content for message in session.messages[-4:]] == [
      
        160
                "Focus on sessions",
      
        161
                "Compaction design drafted",
      
        162
                "Preserve the latest four messages",
      
        163
                "Ready to compact",
      
        164
            ]
      
        165
        
        166
        
        167
        def test_session_compaction_summarizes_active_dod_failure(temp_dir: Path) -> None:
      
        168
            dod_store = DefinitionOfDoneStore(temp_dir)
      
        169
            dod = create_definition_of_done("Create a generated guide.")
      
        170
            dod.status = "fixing"
      
        171
            dod.last_verification_result = "failed"
      
        172
            dod.pending_items = ["Expand generated chapters to satisfy quality verification"]
      
        173
            dod.evidence.append(
      
        174
                VerificationEvidence(
      
        175
                    command="python3 verify_html_quality.py",
      
        176
                    passed=False,
      
        177
                    output=(
      
        178
                        "Exit code 1\n"
      
        179
                        "HTML guide content quality issues:\n"
      
        180
                        f"{temp_dir / 'guide' / 'chapters' / '05-load-balancing.html'}: "
      
        181
                        "thin content (1500 text chars, expected at least 1758)\n"
      
        182
                    ),
      
        183
                )
      
        184
            )
      
        185
            dod_path = dod_store.save(dod)
      
        186
            session = ConversationSession(
      
        187
                system_message_factory=_dummy_system,
      
        188
                few_shot_factory=_dummy_few_shots,
      
        189
                project_root=temp_dir,
      
        190
                messages=[
      
        191
                    Message(role=Role.USER, content="Create the guide."),
      
        192
                    Message(role=Role.ASSISTANT, content="Created draft files."),
      
        193
                    Message(
      
        194
                        role=Role.TOOL,
      
        195
                        content="Observation [notepad_read]: Result: guide complete",
      
        196
                    ),
      
        197
                    Message(role=Role.ASSISTANT, content="Trying to finish."),
      
        198
                    Message(role=Role.USER, content="Continue repairing."),
      
        199
                ],
      
        200
                active_dod_path=str(dod_path),
      
        201
                auto_compaction_input_tokens_threshold=1,
      
        202
                compaction_keep_last_messages=2,
      
        203
            )
      
        204
        
        205
            result = session.maybe_compact()
      
        206
        
        207
            assert result is not None
      
        208
            assert session.messages[0].content.startswith("[COMPACTED CONTEXT]")
      
        209
            assert "- Active DoD: status=fixing; last verification=failed" in result.summary
      
        210
            assert "05-load-balancing.html" in result.summary
      
        211
            assert "thin content" in result.summary
      
        212
            assert "authoritative over older summaries or durable memory notes" in result.summary
      
        213
        
        214
        
        215
        def test_build_request_messages_omits_large_mutation_tool_calls_from_history(
      
        216
            temp_dir: Path,
      
        217
        ) -> None:
      
        218
            large_html = "<html>" + ("x" * 400) + "</html>"
      
        219
            old_block = "old\n" * 120
      
        220
            new_block = "new\n" * 120
      
        221
            session = ConversationSession(
      
        222
                system_message_factory=_dummy_system,
      
        223
                few_shot_factory=_dummy_few_shots,
      
        224
                project_root=temp_dir,
      
        225
                messages=[
      
        226
                    Message(role=Role.USER, content="Create the guide."),
      
        227
                    Message(
      
        228
                        role=Role.ASSISTANT,
      
        229
                        content="I'll write the first files now.",
      
        230
                        tool_calls=[
      
        231
                            ToolCall(
      
        232
                                id="write-1",
      
        233
                                name="write",
      
        234
                                arguments={
      
        235
                                    "file_path": str(temp_dir / "guides" / "nginx" / "index.html"),
      
        236
                                    "content": large_html,
      
        237
                                },
      
        238
                            ),
      
        239
                            ToolCall(
      
        240
                                id="edit-1",
      
        241
                                name="edit",
      
        242
                                arguments={
      
        243
                                    "file_path": str(temp_dir / "README.md"),
      
        244
                                    "old_string": old_block,
      
        245
                                    "new_string": new_block,
      
        246
                                },
      
        247
                            ),
      
        248
                        ],
      
        249
                    ),
      
        250
                ],
      
        251
            )
      
        252
        
        253
            request_messages = session.build_request_messages()
      
        254
        
        255
            assert request_messages[2].tool_calls == []
      
        256
            assert request_messages[2].content == "I'll write the first files now."
      
        257
            assert session.messages[1].tool_calls[0].arguments["content"] == large_html
      
        258
            assert session.messages[1].tool_calls[1].arguments["old_string"] == old_block
      
        259
            assert session.messages[1].tool_calls[1].arguments["new_string"] == new_block
      
        260
        
        261
        
        262
        def test_session_persists_permission_policy_metadata(temp_dir: Path) -> None:
      
        263
            session = ConversationSession(
      
        264
                system_message_factory=_dummy_system,
      
        265
                few_shot_factory=_dummy_few_shots,
      
        266
                project_root=temp_dir,
      
        267
                permission_mode="prompt",
      
        268
                permission_prompting_enabled=True,
      
        269
                permission_rule_counts={"allow": 1, "deny": 2, "ask": 3},
      
        270
                permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"),
      
        271
                prompt_format="react",
      
        272
                prompt_sections=["Runtime Config", "Workflow Context"],
      
        273
            )
      
        274
        
        275
            session.update_runtime_state(
      
        276
                current_task="Inspect permission history",
      
        277
                runtime_owner_type="RuntimeHandle",
      
        278
                permission_mode="allow",
      
        279
                permission_prompting_enabled=True,
      
        280
                permission_rule_counts={"allow": 2, "deny": 1, "ask": 4},
      
        281
                permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"),
      
        282
                prompt_format="native",
      
        283
                prompt_sections=["Runtime Config", "Workflow Context", "Project Context"],
      
        284
                workflow_reason_code="task_is_complex",
      
        285
                workflow_reason_summary="task looks complex enough to benefit from a persisted plan",
      
        286
                workflow_decision_kind="initial_route",
      
        287
                workflow_ambiguity_score=0.2,
      
        288
                workflow_complexity_score=0.6,
      
        289
                workflow_scheduled_next_mode="execute",
      
        290
                last_completion_decision_code="verification_failed_reentry",
      
        291
                last_completion_decision_summary=(
      
        292
                    "continued after verification failed and the runtime re-entered execute mode"
      
        293
                ),
      
        294
                last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn",
      
        295
                last_turn_transition_kind="terminal",
      
        296
                last_turn_transition_reason_code="turn_complete",
      
        297
            )
      
        298
            session.append_workflow_timeline_entry(
      
        299
                WorkflowTimelineEntry(
      
        300
                    timestamp="2026-04-07T12:00:00Z",
      
        301
                    kind="route",
      
        302
                    mode="plan",
      
        303
                    reason_code="task_is_complex",
      
        304
                    summary="plan: workflow pressure favors a persisted plan before execution",
      
        305
                    decision_kind="initial_route",
      
        306
                    route_score=0.72,
      
        307
                    runner_up_mode="clarify",
      
        308
                    runner_up_score=0.61,
      
        309
                    scheduled_next_mode="execute",
      
        310
                    unresolved_questions=["Scope is still broad."],
      
        311
                    prompt_format="native",
      
        312
                    prompt_sections=["Runtime Config", "Workflow Context", "Project Context"],
      
        313
                )
      
        314
            )
      
        315
            session.append_completion_trace_entry(
      
        316
                CompletionTraceEntry(
      
        317
                    stage="definition_of_done",
      
        318
                    outcome="continue",
      
        319
                    decision_code="verification_failed_reentry",
      
        320
                    decision_summary=(
      
        321
                        "continued after verification failed and the runtime "
      
        322
                        "re-entered execute mode"
      
        323
                    ),
      
        324
                    evidence_summary=["verification contradiction: pytest still failed"],
      
        325
                )
      
        326
            )
      
        327
        
        328
            reloaded = ConversationSession.load(
      
        329
                project_root=temp_dir,
      
        330
                system_message_factory=_dummy_system,
      
        331
                few_shot_factory=_dummy_few_shots,
      
        332
                session_id=session.session_id,
      
        333
            )
      
        334
        
        335
            assert reloaded is not None
      
        336
            assert reloaded.permission_mode == "allow"
      
        337
            assert reloaded.permission_prompting_enabled is True
      
        338
            assert reloaded.permission_rule_counts == {"allow": 2, "deny": 1, "ask": 4}
      
        339
            assert reloaded.permission_rules_source == str(
      
        340
                temp_dir / ".loader" / "permission-rules.json"
      
        341
            )
      
        342
            assert reloaded.runtime_owner_type == "RuntimeHandle"
      
        343
            assert reloaded.runtime_owner_path == "runtime-handle"
      
        344
            assert reloaded.prompt_format == "native"
      
        345
            assert reloaded.prompt_sections == [
      
        346
                "Runtime Config",
      
        347
                "Workflow Context",
      
        348
                "Project Context",
      
        349
            ]
      
        350
            assert reloaded.workflow_reason_code == "task_is_complex"
      
        351
            assert reloaded.workflow_reason_summary == (
      
        352
                "task looks complex enough to benefit from a persisted plan"
      
        353
            )
      
        354
            assert reloaded.workflow_decision_kind == "initial_route"
      
        355
            assert reloaded.workflow_ambiguity_score == pytest.approx(0.2)
      
        356
            assert reloaded.workflow_complexity_score == pytest.approx(0.6)
      
        357
            assert reloaded.workflow_scheduled_next_mode == "execute"
      
        358
            assert reloaded.last_completion_decision_code == "verification_failed_reentry"
      
        359
            assert reloaded.last_completion_decision_summary == (
      
        360
                "continued after verification failed and the runtime re-entered execute mode"
      
        361
            )
      
        362
            assert [entry.decision_code for entry in reloaded.completion_trace] == [
      
        363
                "verification_failed_reentry"
      
        364
            ]
      
        365
            assert reloaded.completion_trace[0].evidence_summary == [
      
        366
                "verification contradiction: pytest still failed"
      
        367
            ]
      
        368
            assert reloaded.last_turn_transition_summary == (
      
        369
                "completion -> finalize [terminal] Finalizing completed turn"
      
        370
            )
      
        371
            assert reloaded.last_turn_transition_kind == "terminal"
      
        372
            assert reloaded.last_turn_transition_reason_code == "turn_complete"
      
        373
            assert len(reloaded.workflow_timeline) == 1
      
        374
            assert reloaded.workflow_timeline[0].mode == "plan"
      
        375
            assert reloaded.workflow_timeline[0].route_score == pytest.approx(0.72)
      
        376
            assert reloaded.workflow_timeline[0].unresolved_questions == [
      
        377
                "Scope is still broad."
      
        378
            ]
      
        379
        
        380
        
        381
        def test_resume_session_updates_runtime_owner_metadata(temp_dir: Path) -> None:
      
        382
            agent = Agent(
      
        383
                backend=ScriptedBackend(),
      
        384
                config=AgentConfig(auto_context=False, stream=False),
      
        385
                project_root=temp_dir,
      
        386
            )
      
        387
            agent.session.persist()
      
        388
            session_id = agent.session.session_id
      
        389
        
        390
            handle = RuntimeHandle(
      
        391
                backend=ScriptedBackend(),
      
        392
                config=AgentConfig(auto_context=False, stream=False),
      
        393
                project_root=temp_dir,
      
        394
            )
      
        395
        
        396
            assert handle.resume_session(session_id) is True
      
        397
        
        398
            reloaded = ConversationSession.load(
      
        399
                project_root=temp_dir,
      
        400
                system_message_factory=_dummy_system,
      
        401
                few_shot_factory=_dummy_few_shots,
      
        402
                session_id=session_id,
      
        403
            )
      
        404
        
        405
            assert reloaded is not None
      
        406
            assert reloaded.runtime_owner_type == "RuntimeHandle"
      
        407
            assert reloaded.runtime_owner_path == "runtime-handle"
      
        408
        
        409
        
        410
        def test_session_prefers_canonical_workflow_timeline_for_completion_trace(
      
        411
            temp_dir: Path,
      
        412
        ) -> None:
      
        413
            session = ConversationSession(
      
        414
                system_message_factory=_dummy_system,
      
        415
                few_shot_factory=_dummy_few_shots,
      
        416
                project_root=temp_dir,
      
        417
            )
      
        418
        
        419
            session.update_runtime_state(
      
        420
                current_task="Explain why the turn stopped",
      
        421
                last_completion_decision_code="continuation_budget_exhausted",
      
        422
                last_completion_decision_summary=(
      
        423
                    "stopped because the continuation budget was exhausted while "
      
        424
                    "follow-through evidence was still missing"
      
        425
                ),
      
        426
            )
      
        427
            session.append_completion_trace_entry(
      
        428
                CompletionTraceEntry(
      
        429
                    stage="definition_of_done",
      
        430
                    outcome="complete",
      
        431
                    decision_code="stale_completion_trace",
      
        432
                    decision_summary="this legacy trace entry should be ignored",
      
        433
                )
      
        434
            )
      
        435
            session.append_workflow_timeline_entry(
      
        436
                WorkflowTimelineEntry(
      
        437
                    timestamp="2026-04-09T12:00:00Z",
      
        438
                    kind="completion_check",
      
        439
                    mode="execute",
      
        440
                    reason_code="premature_completion_nudge",
      
        441
                    summary=(
      
        442
                        "completion: requested one continuation because the non-mutating "
      
        443
                        "response looked incomplete"
      
        444
                    ),
      
        445
                    decision_kind="forced",
      
        446
                    policy_stage="continuation_check",
      
        447
                    policy_outcome="continue",
      
        448
                    evidence_summary=["showing the requested work was actually carried out"],
      
        449
                )
      
        450
            )
      
        451
            session.append_workflow_timeline_entry(
      
        452
                WorkflowTimelineEntry(
      
        453
                    timestamp="2026-04-09T12:01:00Z",
      
        454
                    kind="completion_finalize",
      
        455
                    mode="execute",
      
        456
                    reason_code="continuation_budget_exhausted",
      
        457
                    summary=(
      
        458
                        "completion: stopped because the continuation budget was exhausted "
      
        459
                        "while follow-through evidence was still missing"
      
        460
                    ),
      
        461
                    decision_kind="forced",
      
        462
                    policy_stage="continuation_check",
      
        463
                    policy_outcome="finalize",
      
        464
                    evidence_summary=["showing the requested work was actually carried out"],
      
        465
                )
      
        466
            )
      
        467
        
        468
            persisted = json.loads(session.storage_path.read_text())
      
        469
            assert "completion_trace" not in persisted
      
        470
        
        471
            reloaded = ConversationSession.load(
      
        472
                project_root=temp_dir,
      
        473
                system_message_factory=_dummy_system,
      
        474
                few_shot_factory=_dummy_few_shots,
      
        475
                session_id=session.session_id,
      
        476
            )
      
        477
        
        478
            assert reloaded is not None
      
        479
            assert [entry.decision_code for entry in reloaded.completion_trace] == [
      
        480
                "premature_completion_nudge",
      
        481
                "continuation_budget_exhausted",
      
        482
            ]
      
        483
            assert reloaded.completion_trace[-1].stage == "continuation_check"
      
        484
            assert reloaded.completion_trace[-1].outcome == "finalize"
      
        485
            assert reloaded.completion_trace[-1].evidence_summary == [
      
        486
                "showing the requested work was actually carried out"
      
        487
            ]
      
        488
        
        489
        
        490
        def test_session_projects_live_completion_trace_from_workflow_timeline(
      
        491
            temp_dir: Path,
      
        492
        ) -> None:
      
        493
            session = ConversationSession(
      
        494
                system_message_factory=_dummy_system,
      
        495
                few_shot_factory=_dummy_few_shots,
      
        496
                project_root=temp_dir,
      
        497
            )
      
        498
        
        499
            session.append_workflow_timeline_entry(
      
        500
                WorkflowTimelineEntry(
      
        501
                    timestamp="2026-04-09T12:00:00Z",
      
        502
                    kind="completion_check",
      
        503
                    mode="execute",
      
        504
                    reason_code="completion_response_accepted",
      
        505
                    summary="completion: accepted the response because follow-through evidence was present",
      
        506
                    decision_kind="forced",
      
        507
                    policy_stage="continuation_check",
      
        508
                    policy_outcome="accept",
      
        509
                )
      
        510
            )
      
        511
            session.append_workflow_timeline_entry(
      
        512
                WorkflowTimelineEntry(
      
        513
                    timestamp="2026-04-09T12:01:00Z",
      
        514
                    kind="completion_finalize",
      
        515
                    mode="execute",
      
        516
                    reason_code="continuation_budget_exhausted",
      
        517
                    summary="completion: stopped because verification evidence was still missing",
      
        518
                    decision_kind="forced",
      
        519
                    policy_stage="continuation_check",
      
        520
                    policy_outcome="finalize",
      
        521
                    evidence_summary=["a passing verification result from `pytest -q`"],
      
        522
                    evidence_provenance=[
      
        523
                        EvidenceProvenance(
      
        524
                            category="verification",
      
        525
                            source="dod.verification_commands",
      
        526
                            summary="verification evidence was still missing for `pytest -q`",
      
        527
                            status="missing",
      
        528
                            subject="pytest -q",
      
        529
                        )
      
        530
                    ],
      
        531
                )
      
        532
            )
      
        533
            session.update_runtime_state(
      
        534
                last_completion_decision_code="continuation_budget_exhausted",
      
        535
                last_completion_decision_summary=(
      
        536
                    "stopped because verification evidence was still missing"
      
        537
                ),
      
        538
            )
      
        539
        
        540
            assert [entry.decision_code for entry in session.completion_trace] == [
      
        541
                "completion_response_accepted",
      
        542
                "continuation_budget_exhausted",
      
        543
            ]
      
        544
            assert session.completion_trace[-1].stage == "continuation_check"
      
        545
            assert session.completion_trace[-1].outcome == "finalize"
      
        546
            assert session.completion_trace[-1].evidence_summary == [
      
        547
                "a passing verification result from `pytest -q`"
      
        548
            ]
      
        549
            assert [item.summary for item in session.completion_trace[-1].evidence_provenance] == [
      
        550
                "verification evidence was still missing for `pytest -q`"
      
        551
            ]
      
        552
        
        553
        
        554
        def test_session_persists_workflow_ledger_state(temp_dir: Path) -> None:
      
        555
            session = ConversationSession(
      
        556
                system_message_factory=_dummy_system,
      
        557
                few_shot_factory=_dummy_few_shots,
      
        558
                project_root=temp_dir,
      
        559
            )
      
        560
        
        561
            session.update_workflow_ledger(
      
        562
                WorkflowLedger(
      
        563
                    assumptions=[
      
        564
                        WorkflowLedgerItem(
      
        565
                            text="notes.txt stays out of scope unless clarified otherwise.",
      
        566
                            status="contradicted",
      
        567
                            introduced_phase="clarify",
      
        568
                            updated_phase="recovery",
      
        569
                            evidence=["Clarify scope assumed `notes.txt` stayed out of scope."],
      
        570
                        )
      
        571
                    ],
      
        572
                    acceptance_anchors=[
      
        573
                        WorkflowLedgerItem(
      
        574
                            text="notes.txt exists in the workspace root.",
      
        575
                            status="changed",
      
        576
                            introduced_phase="clarify",
      
        577
                            updated_phase="recovery",
      
        578
                        )
      
        579
                    ],
      
        580
                    decision_boundaries=[
      
        581
                        WorkflowLedgerItem(
      
        582
                            text="Escalate before broad UX changes.",
      
        583
                            status="tracked",
      
        584
                            introduced_phase="clarify",
      
        585
                        )
      
        586
                    ],
      
        587
                )
      
        588
            )
      
        589
        
        590
            reloaded = ConversationSession.load(
      
        591
                project_root=temp_dir,
      
        592
                system_message_factory=_dummy_system,
      
        593
                few_shot_factory=_dummy_few_shots,
      
        594
                session_id=session.session_id,
      
        595
            )
      
        596
        
        597
            assert reloaded is not None
      
        598
            assert reloaded.workflow_ledger.assumptions[0].status == "contradicted"
      
        599
            assert reloaded.workflow_ledger.assumptions[0].updated_phase == "recovery"
      
        600
            assert reloaded.workflow_ledger.acceptance_anchors[0].status == "changed"
      
        601
            assert reloaded.workflow_ledger.decision_boundaries[0].text == (
      
        602
                "Escalate before broad UX changes."
      
        603
            )
      
        604
        
        605
        
        606
        def test_session_persists_prompt_history_state(temp_dir: Path) -> None:
      
        607
            session = ConversationSession(
      
        608
                system_message_factory=_dummy_system,
      
        609
                few_shot_factory=_dummy_few_shots,
      
        610
                project_root=temp_dir,
      
        611
            )
      
        612
        
        613
            session.append_prompt_snapshot(
      
        614
                PromptSnapshot(
      
        615
                    timestamp="2026-04-07T14:00:00Z",
      
        616
                    workflow_mode="plan",
      
        617
                    permission_mode="prompt",
      
        618
                    current_task="Tighten Loader workflow behavior",
      
        619
                    prompt_format="native",
      
        620
                    prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
      
        621
                    content="# Introduction\nplan around planned.txt\n",
      
        622
                )
      
        623
            )
      
        624
            session.append_prompt_snapshot(
      
        625
                PromptSnapshot(
      
        626
                    timestamp="2026-04-07T14:02:00Z",
      
        627
                    workflow_mode="execute",
      
        628
                    permission_mode="prompt",
      
        629
                    current_task="Tighten Loader workflow behavior",
      
        630
                    prompt_format="native",
      
        631
                    prompt_sections=[
      
        632
                        "Runtime Config",
      
        633
                        "Workflow Context",
      
        634
                        "Mode Guidance",
      
        635
                        "Project Context",
      
        636
                    ],
      
        637
                    content="# Introduction\nexecute around notes.txt\n# Project Context\npython\n",
      
        638
                )
      
        639
            )
      
        640
        
        641
            reloaded = ConversationSession.load(
      
        642
                project_root=temp_dir,
      
        643
                system_message_factory=_dummy_system,
      
        644
                few_shot_factory=_dummy_few_shots,
      
        645
                session_id=session.session_id,
      
        646
            )
      
        647
        
        648
            assert reloaded is not None
      
        649
            assert len(reloaded.prompt_history) == 2
      
        650
            assert reloaded.prompt_history[0].workflow_mode == "plan"
      
        651
            assert reloaded.prompt_history[-1].workflow_mode == "execute"
      
        652
            assert "notes.txt" in reloaded.prompt_history[-1].content
      
        653
        
        654
        
        655
        @pytest.mark.asyncio
      
        656
        async def test_turn_summary_usage_rolls_up_into_session_totals(temp_dir: Path) -> None:
      
        657
            backend = ScriptedBackend(
      
        658
                completions=[
      
        659
                    CompletionResponse(
      
        660
                        content="Here's the answer.",
      
        661
                        usage={"prompt_tokens": 9, "completion_tokens": 3},
      
        662
                    )
      
        663
                ]
      
        664
            )
      
        665
            agent = Agent(
      
        666
                backend=backend,
      
        667
                config=AgentConfig(
      
        668
                    auto_context=False,
      
        669
                    stream=False,
      
        670
                    reasoning=ReasoningConfig(completion_check=False),
      
        671
                ),
      
        672
                project_root=temp_dir,
      
        673
            )
      
        674
        
        675
            await agent.run("Write a short release-note style summary of what Loader does well.")
      
        676
        
        677
            assert agent.last_turn_summary is not None
      
        678
            assert agent.last_turn_summary.usage["input_tokens"] == 9
      
        679
            assert agent.last_turn_summary.usage["output_tokens"] == 3
      
        680
            assert agent.last_turn_summary.cumulative_usage["input_tokens"] == 9
      
        681
            assert agent.last_turn_summary.cumulative_usage["output_tokens"] == 3
      
        682
            assert agent.last_turn_summary.cumulative_usage["turns"] == 1