loader Public

Watch 0 Fork 0 Star 0

Python · 22023 bytes Raw Blame History

  
        1
        """Tests for persisted session state and resume support."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        import json
      
        6
        from pathlib import Path
      
        7
        
        8
        import pytest
      
        9
        
        10
        from loader.agent.loop import Agent, AgentConfig, ReasoningConfig
      
        11
        from loader.llm.base import CompletionResponse, Message, Role, ToolCall
      
        12
        from loader.runtime.completion_trace import CompletionTraceEntry
      
        13
        from loader.runtime.evidence_provenance import EvidenceProvenance
      
        14
        from loader.runtime.prompt_history import PromptSnapshot
      
        15
        from loader.runtime.runtime_handle import RuntimeHandle
      
        16
        from loader.runtime.session import ConversationSession
      
        17
        from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem
      
        18
        from loader.runtime.workflow_policy import WorkflowTimelineEntry
      
        19
        from tests.helpers.runtime_harness import ScriptedBackend
      
        20
        
        21
        
        22
        def _dummy_system() -> Message:
      
        23
            return Message(role=Role.SYSTEM, content="system")
      
        24
        
        25
        
        26
        def _dummy_few_shots() -> list[Message]:
      
        27
            return []
      
        28
        
        29
        
        30
        @pytest.mark.asyncio
      
        31
        async def test_session_persists_and_resumes_across_agent_restart(temp_dir: Path) -> None:
      
        32
            backend = ScriptedBackend(
      
        33
                completions=[
      
        34
                    CompletionResponse(
      
        35
                        content="I'll create the file.",
      
        36
                        tool_calls=[
      
        37
                            ToolCall(
      
        38
                                id="write-1",
      
        39
                                name="write",
      
        40
                                arguments={
      
        41
                                    "file_path": str(temp_dir / "hello.txt"),
      
        42
                                    "content": "hello\n",
      
        43
                                },
      
        44
                            )
      
        45
                        ],
      
        46
                        usage={"prompt_tokens": 12, "completion_tokens": 5},
      
        47
                    ),
      
        48
                    CompletionResponse(
      
        49
                        content="The file is written.",
      
        50
                        usage={"prompt_tokens": 10, "completion_tokens": 4},
      
        51
                    ),
      
        52
                ]
      
        53
            )
      
        54
            config = AgentConfig(auto_context=False, stream=False)
      
        55
            first_agent = Agent(backend=backend, config=config, project_root=temp_dir)
      
        56
        
        57
            response = await first_agent.run("Create hello.txt in the workspace root.")
      
        58
        
        59
            assert response.startswith("The file is written.")
      
        60
            session_id = first_agent.session.session_id
      
        61
            assert first_agent.session.storage_path.exists()
      
        62
        
        63
            resumed_agent = Agent(
      
        64
                backend=ScriptedBackend(completions=[]),
      
        65
                config=config,
      
        66
                project_root=temp_dir,
      
        67
            )
      
        68
        
        69
            assert resumed_agent.resume_session(session_id) is True
      
        70
            assert resumed_agent.session.session_id == session_id
      
        71
            assert resumed_agent._current_task == "Create hello.txt in the workspace root."
      
        72
            assert resumed_agent.active_permission_mode == "workspace-write"
      
        73
            assert resumed_agent.workflow_mode == first_agent.workflow_mode
      
        74
            assert resumed_agent.last_turn_summary is not None
      
        75
            assert resumed_agent.last_turn_summary.definition_of_done is not None
      
        76
            assert resumed_agent.last_turn_summary.definition_of_done.task_statement == (
      
        77
                "Create hello.txt in the workspace root."
      
        78
            )
      
        79
            assert any(
      
        80
                message.role == Role.USER
      
        81
                and message.content == "Create hello.txt in the workspace root."
      
        82
                for message in resumed_agent.messages
      
        83
            )
      
        84
        
        85
        
        86
        def test_agent_clear_history_rebuilds_a_fresh_runtime_session(temp_dir: Path) -> None:
      
        87
            agent = Agent(
      
        88
                backend=ScriptedBackend(),
      
        89
                config=AgentConfig(auto_context=False, stream=False),
      
        90
                project_root=temp_dir,
      
        91
            )
      
        92
            original_session_id = agent.session.session_id
      
        93
            agent.current_task = "Keep runtime state tidy."
      
        94
            agent.prompt_format = "native"
      
        95
            agent.prompt_sections = ["Runtime Config", "Workflow Context"]
      
        96
            agent.set_workflow_mode("clarify")
      
        97
            agent.queue_steering_message("Stay in runtime.")
      
        98
        
        99
            agent.clear_history()
      
        100
        
        101
            assert agent.session.session_id != original_session_id
      
        102
            assert agent.current_task is None
      
        103
            assert agent.workflow_mode == "execute"
      
        104
            assert agent.prompt_format is None
      
        105
            assert agent.prompt_sections == []
      
        106
            assert agent.messages == []
      
        107
            assert agent.last_turn_summary is None
      
        108
            assert agent.drain_steering_messages() == []
      
        109
        
        110
        
        111
        def test_session_rotation_kicks_in_at_size_cap(temp_dir: Path) -> None:
      
        112
            session = ConversationSession(
      
        113
                system_message_factory=_dummy_system,
      
        114
                few_shot_factory=_dummy_few_shots,
      
        115
                project_root=temp_dir,
      
        116
                rotate_after_bytes=250,
      
        117
            )
      
        118
        
        119
            for index in range(6):
      
        120
                session.append(
      
        121
                    Message(
      
        122
                        role=Role.USER,
      
        123
                        content=f"Message {index}: " + ("x" * 120),
      
        124
                    )
      
        125
                )
      
        126
        
        127
            assert session.storage_path.exists()
      
        128
            assert session.storage_path.with_suffix(".1.json").exists()
      
        129
        
        130
        
        131
        def test_session_compaction_persists_summary_and_recent_messages(temp_dir: Path) -> None:
      
        132
            session = ConversationSession(
      
        133
                system_message_factory=_dummy_system,
      
        134
                few_shot_factory=_dummy_few_shots,
      
        135
                project_root=temp_dir,
      
        136
                messages=[
      
        137
                    Message(role=Role.USER, content="Kick off runtime audit"),
      
        138
                    Message(role=Role.ASSISTANT, content="Initial findings"),
      
        139
                    Message(role=Role.USER, content="Focus on sessions"),
      
        140
                    Message(role=Role.ASSISTANT, content="Compaction design drafted"),
      
        141
                    Message(role=Role.USER, content="Preserve the latest four messages"),
      
        142
                    Message(role=Role.ASSISTANT, content="Ready to compact"),
      
        143
                ],
      
        144
                auto_compaction_input_tokens_threshold=1,
      
        145
                compaction_keep_last_messages=4,
      
        146
            )
      
        147
        
        148
            result = session.maybe_compact()
      
        149
        
        150
            assert result is not None
      
        151
            assert session.compaction is not None
      
        152
            assert session.storage_path.exists()
      
        153
            assert session.messages[0].content.startswith("[COMPACTED CONTEXT]")
      
        154
            assert [message.content for message in session.messages[-4:]] == [
      
        155
                "Focus on sessions",
      
        156
                "Compaction design drafted",
      
        157
                "Preserve the latest four messages",
      
        158
                "Ready to compact",
      
        159
            ]
      
        160
        
        161
        
        162
        def test_session_persists_permission_policy_metadata(temp_dir: Path) -> None:
      
        163
            session = ConversationSession(
      
        164
                system_message_factory=_dummy_system,
      
        165
                few_shot_factory=_dummy_few_shots,
      
        166
                project_root=temp_dir,
      
        167
                permission_mode="prompt",
      
        168
                permission_prompting_enabled=True,
      
        169
                permission_rule_counts={"allow": 1, "deny": 2, "ask": 3},
      
        170
                permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"),
      
        171
                prompt_format="react",
      
        172
                prompt_sections=["Runtime Config", "Workflow Context"],
      
        173
            )
      
        174
        
        175
            session.update_runtime_state(
      
        176
                current_task="Inspect permission history",
      
        177
                runtime_owner_type="RuntimeHandle",
      
        178
                permission_mode="allow",
      
        179
                permission_prompting_enabled=True,
      
        180
                permission_rule_counts={"allow": 2, "deny": 1, "ask": 4},
      
        181
                permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"),
      
        182
                prompt_format="native",
      
        183
                prompt_sections=["Runtime Config", "Workflow Context", "Project Context"],
      
        184
                workflow_reason_code="task_is_complex",
      
        185
                workflow_reason_summary="task looks complex enough to benefit from a persisted plan",
      
        186
                workflow_decision_kind="initial_route",
      
        187
                workflow_ambiguity_score=0.2,
      
        188
                workflow_complexity_score=0.6,
      
        189
                workflow_scheduled_next_mode="execute",
      
        190
                last_completion_decision_code="verification_failed_reentry",
      
        191
                last_completion_decision_summary=(
      
        192
                    "continued after verification failed and the runtime re-entered execute mode"
      
        193
                ),
      
        194
                last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn",
      
        195
                last_turn_transition_kind="terminal",
      
        196
                last_turn_transition_reason_code="turn_complete",
      
        197
            )
      
        198
            session.append_workflow_timeline_entry(
      
        199
                WorkflowTimelineEntry(
      
        200
                    timestamp="2026-04-07T12:00:00Z",
      
        201
                    kind="route",
      
        202
                    mode="plan",
      
        203
                    reason_code="task_is_complex",
      
        204
                    summary="plan: workflow pressure favors a persisted plan before execution",
      
        205
                    decision_kind="initial_route",
      
        206
                    route_score=0.72,
      
        207
                    runner_up_mode="clarify",
      
        208
                    runner_up_score=0.61,
      
        209
                    scheduled_next_mode="execute",
      
        210
                    unresolved_questions=["Scope is still broad."],
      
        211
                    prompt_format="native",
      
        212
                    prompt_sections=["Runtime Config", "Workflow Context", "Project Context"],
      
        213
                )
      
        214
            )
      
        215
            session.append_completion_trace_entry(
      
        216
                CompletionTraceEntry(
      
        217
                    stage="definition_of_done",
      
        218
                    outcome="continue",
      
        219
                    decision_code="verification_failed_reentry",
      
        220
                    decision_summary=(
      
        221
                        "continued after verification failed and the runtime "
      
        222
                        "re-entered execute mode"
      
        223
                    ),
      
        224
                    evidence_summary=["verification contradiction: pytest still failed"],
      
        225
                )
      
        226
            )
      
        227
        
        228
            reloaded = ConversationSession.load(
      
        229
                project_root=temp_dir,
      
        230
                system_message_factory=_dummy_system,
      
        231
                few_shot_factory=_dummy_few_shots,
      
        232
                session_id=session.session_id,
      
        233
            )
      
        234
        
        235
            assert reloaded is not None
      
        236
            assert reloaded.permission_mode == "allow"
      
        237
            assert reloaded.permission_prompting_enabled is True
      
        238
            assert reloaded.permission_rule_counts == {"allow": 2, "deny": 1, "ask": 4}
      
        239
            assert reloaded.permission_rules_source == str(
      
        240
                temp_dir / ".loader" / "permission-rules.json"
      
        241
            )
      
        242
            assert reloaded.runtime_owner_type == "RuntimeHandle"
      
        243
            assert reloaded.runtime_owner_path == "runtime-handle"
      
        244
            assert reloaded.prompt_format == "native"
      
        245
            assert reloaded.prompt_sections == [
      
        246
                "Runtime Config",
      
        247
                "Workflow Context",
      
        248
                "Project Context",
      
        249
            ]
      
        250
            assert reloaded.workflow_reason_code == "task_is_complex"
      
        251
            assert reloaded.workflow_reason_summary == (
      
        252
                "task looks complex enough to benefit from a persisted plan"
      
        253
            )
      
        254
            assert reloaded.workflow_decision_kind == "initial_route"
      
        255
            assert reloaded.workflow_ambiguity_score == pytest.approx(0.2)
      
        256
            assert reloaded.workflow_complexity_score == pytest.approx(0.6)
      
        257
            assert reloaded.workflow_scheduled_next_mode == "execute"
      
        258
            assert reloaded.last_completion_decision_code == "verification_failed_reentry"
      
        259
            assert reloaded.last_completion_decision_summary == (
      
        260
                "continued after verification failed and the runtime re-entered execute mode"
      
        261
            )
      
        262
            assert [entry.decision_code for entry in reloaded.completion_trace] == [
      
        263
                "verification_failed_reentry"
      
        264
            ]
      
        265
            assert reloaded.completion_trace[0].evidence_summary == [
      
        266
                "verification contradiction: pytest still failed"
      
        267
            ]
      
        268
            assert reloaded.last_turn_transition_summary == (
      
        269
                "completion -> finalize [terminal] Finalizing completed turn"
      
        270
            )
      
        271
            assert reloaded.last_turn_transition_kind == "terminal"
      
        272
            assert reloaded.last_turn_transition_reason_code == "turn_complete"
      
        273
            assert len(reloaded.workflow_timeline) == 1
      
        274
            assert reloaded.workflow_timeline[0].mode == "plan"
      
        275
            assert reloaded.workflow_timeline[0].route_score == pytest.approx(0.72)
      
        276
            assert reloaded.workflow_timeline[0].unresolved_questions == [
      
        277
                "Scope is still broad."
      
        278
            ]
      
        279
        
        280
        
        281
        def test_resume_session_updates_runtime_owner_metadata(temp_dir: Path) -> None:
      
        282
            agent = Agent(
      
        283
                backend=ScriptedBackend(),
      
        284
                config=AgentConfig(auto_context=False, stream=False),
      
        285
                project_root=temp_dir,
      
        286
            )
      
        287
            agent.session.persist()
      
        288
            session_id = agent.session.session_id
      
        289
        
        290
            handle = RuntimeHandle(
      
        291
                backend=ScriptedBackend(),
      
        292
                config=AgentConfig(auto_context=False, stream=False),
      
        293
                project_root=temp_dir,
      
        294
            )
      
        295
        
        296
            assert handle.resume_session(session_id) is True
      
        297
        
        298
            reloaded = ConversationSession.load(
      
        299
                project_root=temp_dir,
      
        300
                system_message_factory=_dummy_system,
      
        301
                few_shot_factory=_dummy_few_shots,
      
        302
                session_id=session_id,
      
        303
            )
      
        304
        
        305
            assert reloaded is not None
      
        306
            assert reloaded.runtime_owner_type == "RuntimeHandle"
      
        307
            assert reloaded.runtime_owner_path == "runtime-handle"
      
        308
        
        309
        
        310
        def test_session_prefers_canonical_workflow_timeline_for_completion_trace(
      
        311
            temp_dir: Path,
      
        312
        ) -> None:
      
        313
            session = ConversationSession(
      
        314
                system_message_factory=_dummy_system,
      
        315
                few_shot_factory=_dummy_few_shots,
      
        316
                project_root=temp_dir,
      
        317
            )
      
        318
        
        319
            session.update_runtime_state(
      
        320
                current_task="Explain why the turn stopped",
      
        321
                last_completion_decision_code="continuation_budget_exhausted",
      
        322
                last_completion_decision_summary=(
      
        323
                    "stopped because the continuation budget was exhausted while "
      
        324
                    "follow-through evidence was still missing"
      
        325
                ),
      
        326
            )
      
        327
            session.append_completion_trace_entry(
      
        328
                CompletionTraceEntry(
      
        329
                    stage="definition_of_done",
      
        330
                    outcome="complete",
      
        331
                    decision_code="stale_completion_trace",
      
        332
                    decision_summary="this legacy trace entry should be ignored",
      
        333
                )
      
        334
            )
      
        335
            session.append_workflow_timeline_entry(
      
        336
                WorkflowTimelineEntry(
      
        337
                    timestamp="2026-04-09T12:00:00Z",
      
        338
                    kind="completion_check",
      
        339
                    mode="execute",
      
        340
                    reason_code="premature_completion_nudge",
      
        341
                    summary=(
      
        342
                        "completion: requested one continuation because the non-mutating "
      
        343
                        "response looked incomplete"
      
        344
                    ),
      
        345
                    decision_kind="forced",
      
        346
                    policy_stage="continuation_check",
      
        347
                    policy_outcome="continue",
      
        348
                    evidence_summary=["showing the requested work was actually carried out"],
      
        349
                )
      
        350
            )
      
        351
            session.append_workflow_timeline_entry(
      
        352
                WorkflowTimelineEntry(
      
        353
                    timestamp="2026-04-09T12:01:00Z",
      
        354
                    kind="completion_finalize",
      
        355
                    mode="execute",
      
        356
                    reason_code="continuation_budget_exhausted",
      
        357
                    summary=(
      
        358
                        "completion: stopped because the continuation budget was exhausted "
      
        359
                        "while follow-through evidence was still missing"
      
        360
                    ),
      
        361
                    decision_kind="forced",
      
        362
                    policy_stage="continuation_check",
      
        363
                    policy_outcome="finalize",
      
        364
                    evidence_summary=["showing the requested work was actually carried out"],
      
        365
                )
      
        366
            )
      
        367
        
        368
            persisted = json.loads(session.storage_path.read_text())
      
        369
            assert "completion_trace" not in persisted
      
        370
        
        371
            reloaded = ConversationSession.load(
      
        372
                project_root=temp_dir,
      
        373
                system_message_factory=_dummy_system,
      
        374
                few_shot_factory=_dummy_few_shots,
      
        375
                session_id=session.session_id,
      
        376
            )
      
        377
        
        378
            assert reloaded is not None
      
        379
            assert [entry.decision_code for entry in reloaded.completion_trace] == [
      
        380
                "premature_completion_nudge",
      
        381
                "continuation_budget_exhausted",
      
        382
            ]
      
        383
            assert reloaded.completion_trace[-1].stage == "continuation_check"
      
        384
            assert reloaded.completion_trace[-1].outcome == "finalize"
      
        385
            assert reloaded.completion_trace[-1].evidence_summary == [
      
        386
                "showing the requested work was actually carried out"
      
        387
            ]
      
        388
        
        389
        
        390
        def test_session_projects_live_completion_trace_from_workflow_timeline(
      
        391
            temp_dir: Path,
      
        392
        ) -> None:
      
        393
            session = ConversationSession(
      
        394
                system_message_factory=_dummy_system,
      
        395
                few_shot_factory=_dummy_few_shots,
      
        396
                project_root=temp_dir,
      
        397
            )
      
        398
        
        399
            session.append_workflow_timeline_entry(
      
        400
                WorkflowTimelineEntry(
      
        401
                    timestamp="2026-04-09T12:00:00Z",
      
        402
                    kind="completion_check",
      
        403
                    mode="execute",
      
        404
                    reason_code="completion_response_accepted",
      
        405
                    summary="completion: accepted the response because follow-through evidence was present",
      
        406
                    decision_kind="forced",
      
        407
                    policy_stage="continuation_check",
      
        408
                    policy_outcome="accept",
      
        409
                )
      
        410
            )
      
        411
            session.append_workflow_timeline_entry(
      
        412
                WorkflowTimelineEntry(
      
        413
                    timestamp="2026-04-09T12:01:00Z",
      
        414
                    kind="completion_finalize",
      
        415
                    mode="execute",
      
        416
                    reason_code="continuation_budget_exhausted",
      
        417
                    summary="completion: stopped because verification evidence was still missing",
      
        418
                    decision_kind="forced",
      
        419
                    policy_stage="continuation_check",
      
        420
                    policy_outcome="finalize",
      
        421
                    evidence_summary=["a passing verification result from `pytest -q`"],
      
        422
                    evidence_provenance=[
      
        423
                        EvidenceProvenance(
      
        424
                            category="verification",
      
        425
                            source="dod.verification_commands",
      
        426
                            summary="verification evidence was still missing for `pytest -q`",
      
        427
                            status="missing",
      
        428
                            subject="pytest -q",
      
        429
                        )
      
        430
                    ],
      
        431
                )
      
        432
            )
      
        433
            session.update_runtime_state(
      
        434
                last_completion_decision_code="continuation_budget_exhausted",
      
        435
                last_completion_decision_summary=(
      
        436
                    "stopped because verification evidence was still missing"
      
        437
                ),
      
        438
            )
      
        439
        
        440
            assert [entry.decision_code for entry in session.completion_trace] == [
      
        441
                "completion_response_accepted",
      
        442
                "continuation_budget_exhausted",
      
        443
            ]
      
        444
            assert session.completion_trace[-1].stage == "continuation_check"
      
        445
            assert session.completion_trace[-1].outcome == "finalize"
      
        446
            assert session.completion_trace[-1].evidence_summary == [
      
        447
                "a passing verification result from `pytest -q`"
      
        448
            ]
      
        449
            assert [item.summary for item in session.completion_trace[-1].evidence_provenance] == [
      
        450
                "verification evidence was still missing for `pytest -q`"
      
        451
            ]
      
        452
        
        453
        
        454
        def test_session_persists_workflow_ledger_state(temp_dir: Path) -> None:
      
        455
            session = ConversationSession(
      
        456
                system_message_factory=_dummy_system,
      
        457
                few_shot_factory=_dummy_few_shots,
      
        458
                project_root=temp_dir,
      
        459
            )
      
        460
        
        461
            session.update_workflow_ledger(
      
        462
                WorkflowLedger(
      
        463
                    assumptions=[
      
        464
                        WorkflowLedgerItem(
      
        465
                            text="notes.txt stays out of scope unless clarified otherwise.",
      
        466
                            status="contradicted",
      
        467
                            introduced_phase="clarify",
      
        468
                            updated_phase="recovery",
      
        469
                            evidence=["Clarify scope assumed `notes.txt` stayed out of scope."],
      
        470
                        )
      
        471
                    ],
      
        472
                    acceptance_anchors=[
      
        473
                        WorkflowLedgerItem(
      
        474
                            text="notes.txt exists in the workspace root.",
      
        475
                            status="changed",
      
        476
                            introduced_phase="clarify",
      
        477
                            updated_phase="recovery",
      
        478
                        )
      
        479
                    ],
      
        480
                    decision_boundaries=[
      
        481
                        WorkflowLedgerItem(
      
        482
                            text="Escalate before broad UX changes.",
      
        483
                            status="tracked",
      
        484
                            introduced_phase="clarify",
      
        485
                        )
      
        486
                    ],
      
        487
                )
      
        488
            )
      
        489
        
        490
            reloaded = ConversationSession.load(
      
        491
                project_root=temp_dir,
      
        492
                system_message_factory=_dummy_system,
      
        493
                few_shot_factory=_dummy_few_shots,
      
        494
                session_id=session.session_id,
      
        495
            )
      
        496
        
        497
            assert reloaded is not None
      
        498
            assert reloaded.workflow_ledger.assumptions[0].status == "contradicted"
      
        499
            assert reloaded.workflow_ledger.assumptions[0].updated_phase == "recovery"
      
        500
            assert reloaded.workflow_ledger.acceptance_anchors[0].status == "changed"
      
        501
            assert reloaded.workflow_ledger.decision_boundaries[0].text == (
      
        502
                "Escalate before broad UX changes."
      
        503
            )
      
        504
        
        505
        
        506
        def test_session_persists_prompt_history_state(temp_dir: Path) -> None:
      
        507
            session = ConversationSession(
      
        508
                system_message_factory=_dummy_system,
      
        509
                few_shot_factory=_dummy_few_shots,
      
        510
                project_root=temp_dir,
      
        511
            )
      
        512
        
        513
            session.append_prompt_snapshot(
      
        514
                PromptSnapshot(
      
        515
                    timestamp="2026-04-07T14:00:00Z",
      
        516
                    workflow_mode="plan",
      
        517
                    permission_mode="prompt",
      
        518
                    current_task="Tighten Loader workflow behavior",
      
        519
                    prompt_format="native",
      
        520
                    prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
      
        521
                    content="# Introduction\nplan around planned.txt\n",
      
        522
                )
      
        523
            )
      
        524
            session.append_prompt_snapshot(
      
        525
                PromptSnapshot(
      
        526
                    timestamp="2026-04-07T14:02:00Z",
      
        527
                    workflow_mode="execute",
      
        528
                    permission_mode="prompt",
      
        529
                    current_task="Tighten Loader workflow behavior",
      
        530
                    prompt_format="native",
      
        531
                    prompt_sections=[
      
        532
                        "Runtime Config",
      
        533
                        "Workflow Context",
      
        534
                        "Mode Guidance",
      
        535
                        "Project Context",
      
        536
                    ],
      
        537
                    content="# Introduction\nexecute around notes.txt\n# Project Context\npython\n",
      
        538
                )
      
        539
            )
      
        540
        
        541
            reloaded = ConversationSession.load(
      
        542
                project_root=temp_dir,
      
        543
                system_message_factory=_dummy_system,
      
        544
                few_shot_factory=_dummy_few_shots,
      
        545
                session_id=session.session_id,
      
        546
            )
      
        547
        
        548
            assert reloaded is not None
      
        549
            assert len(reloaded.prompt_history) == 2
      
        550
            assert reloaded.prompt_history[0].workflow_mode == "plan"
      
        551
            assert reloaded.prompt_history[-1].workflow_mode == "execute"
      
        552
            assert "notes.txt" in reloaded.prompt_history[-1].content
      
        553
        
        554
        
        555
        @pytest.mark.asyncio
      
        556
        async def test_turn_summary_usage_rolls_up_into_session_totals(temp_dir: Path) -> None:
      
        557
            backend = ScriptedBackend(
      
        558
                completions=[
      
        559
                    CompletionResponse(
      
        560
                        content="Here's the answer.",
      
        561
                        usage={"prompt_tokens": 9, "completion_tokens": 3},
      
        562
                    )
      
        563
                ]
      
        564
            )
      
        565
            agent = Agent(
      
        566
                backend=backend,
      
        567
                config=AgentConfig(
      
        568
                    auto_context=False,
      
        569
                    stream=False,
      
        570
                    reasoning=ReasoningConfig(completion_check=False),
      
        571
                ),
      
        572
                project_root=temp_dir,
      
        573
            )
      
        574
        
        575
            await agent.run("Write a short release-note style summary of what Loader does well.")
      
        576
        
        577
            assert agent.last_turn_summary is not None
      
        578
            assert agent.last_turn_summary.usage["input_tokens"] == 9
      
        579
            assert agent.last_turn_summary.usage["output_tokens"] == 3
      
        580
            assert agent.last_turn_summary.cumulative_usage["input_tokens"] == 9
      
        581
            assert agent.last_turn_summary.cumulative_usage["output_tokens"] == 3
      
        582
            assert agent.last_turn_summary.cumulative_usage["turns"] == 1

1	"""Tests for persisted session state and resume support."""
2
3	from __future__ import annotations
4
5	import json
6	from pathlib import Path
7
8	import pytest
9
10	from loader.agent.loop import Agent, AgentConfig, ReasoningConfig
11	from loader.llm.base import CompletionResponse, Message, Role, ToolCall
12	from loader.runtime.completion_trace import CompletionTraceEntry
13	from loader.runtime.evidence_provenance import EvidenceProvenance
14	from loader.runtime.prompt_history import PromptSnapshot
15	from loader.runtime.runtime_handle import RuntimeHandle
16	from loader.runtime.session import ConversationSession
17	from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem
18	from loader.runtime.workflow_policy import WorkflowTimelineEntry
19	from tests.helpers.runtime_harness import ScriptedBackend
20
21
22	def _dummy_system() -> Message:
23	return Message(role=Role.SYSTEM, content="system")
24
25
26	def _dummy_few_shots() -> list[Message]:
27	return []
28
29
30	@pytest.mark.asyncio
31	async def test_session_persists_and_resumes_across_agent_restart(temp_dir: Path) -> None:
32	backend = ScriptedBackend(
33	completions=[
34	CompletionResponse(
35	content="I'll create the file.",
36	tool_calls=[
37	ToolCall(
38	id="write-1",
39	name="write",
40	arguments={
41	"file_path": str(temp_dir / "hello.txt"),
42	"content": "hello\n",
43	},
44	)
45	],
46	usage={"prompt_tokens": 12, "completion_tokens": 5},
47	),
48	CompletionResponse(
49	content="The file is written.",
50	usage={"prompt_tokens": 10, "completion_tokens": 4},
51	),
52	]
53	)
54	config = AgentConfig(auto_context=False, stream=False)
55	first_agent = Agent(backend=backend, config=config, project_root=temp_dir)
56
57	response = await first_agent.run("Create hello.txt in the workspace root.")
58
59	assert response.startswith("The file is written.")
60	session_id = first_agent.session.session_id
61	assert first_agent.session.storage_path.exists()
62
63	resumed_agent = Agent(
64	backend=ScriptedBackend(completions=[]),
65	config=config,
66	project_root=temp_dir,
67	)
68
69	assert resumed_agent.resume_session(session_id) is True
70	assert resumed_agent.session.session_id == session_id
71	assert resumed_agent._current_task == "Create hello.txt in the workspace root."
72	assert resumed_agent.active_permission_mode == "workspace-write"
73	assert resumed_agent.workflow_mode == first_agent.workflow_mode
74	assert resumed_agent.last_turn_summary is not None
75	assert resumed_agent.last_turn_summary.definition_of_done is not None
76	assert resumed_agent.last_turn_summary.definition_of_done.task_statement == (
77	"Create hello.txt in the workspace root."
78	)
79	assert any(
80	message.role == Role.USER
81	and message.content == "Create hello.txt in the workspace root."
82	for message in resumed_agent.messages
83	)
84
85
86	def test_agent_clear_history_rebuilds_a_fresh_runtime_session(temp_dir: Path) -> None:
87	agent = Agent(
88	backend=ScriptedBackend(),
89	config=AgentConfig(auto_context=False, stream=False),
90	project_root=temp_dir,
91	)
92	original_session_id = agent.session.session_id
93	agent.current_task = "Keep runtime state tidy."
94	agent.prompt_format = "native"
95	agent.prompt_sections = ["Runtime Config", "Workflow Context"]
96	agent.set_workflow_mode("clarify")
97	agent.queue_steering_message("Stay in runtime.")
98
99	agent.clear_history()
100
101	assert agent.session.session_id != original_session_id
102	assert agent.current_task is None
103	assert agent.workflow_mode == "execute"
104	assert agent.prompt_format is None
105	assert agent.prompt_sections == []
106	assert agent.messages == []
107	assert agent.last_turn_summary is None
108	assert agent.drain_steering_messages() == []
109
110
111	def test_session_rotation_kicks_in_at_size_cap(temp_dir: Path) -> None:
112	session = ConversationSession(
113	system_message_factory=_dummy_system,
114	few_shot_factory=_dummy_few_shots,
115	project_root=temp_dir,
116	rotate_after_bytes=250,
117	)
118
119	for index in range(6):
120	session.append(
121	Message(
122	role=Role.USER,
123	content=f"Message {index}: " + ("x" * 120),
124	)
125	)
126
127	assert session.storage_path.exists()
128	assert session.storage_path.with_suffix(".1.json").exists()
129
130
131	def test_session_compaction_persists_summary_and_recent_messages(temp_dir: Path) -> None:
132	session = ConversationSession(
133	system_message_factory=_dummy_system,
134	few_shot_factory=_dummy_few_shots,
135	project_root=temp_dir,
136	messages=[
137	Message(role=Role.USER, content="Kick off runtime audit"),
138	Message(role=Role.ASSISTANT, content="Initial findings"),
139	Message(role=Role.USER, content="Focus on sessions"),
140	Message(role=Role.ASSISTANT, content="Compaction design drafted"),
141	Message(role=Role.USER, content="Preserve the latest four messages"),
142	Message(role=Role.ASSISTANT, content="Ready to compact"),
143	],
144	auto_compaction_input_tokens_threshold=1,
145	compaction_keep_last_messages=4,
146	)
147
148	result = session.maybe_compact()
149
150	assert result is not None
151	assert session.compaction is not None
152	assert session.storage_path.exists()
153	assert session.messages[0].content.startswith("[COMPACTED CONTEXT]")
154	assert [message.content for message in session.messages[-4:]] == [
155	"Focus on sessions",
156	"Compaction design drafted",
157	"Preserve the latest four messages",
158	"Ready to compact",
159	]
160
161
162	def test_session_persists_permission_policy_metadata(temp_dir: Path) -> None:
163	session = ConversationSession(
164	system_message_factory=_dummy_system,
165	few_shot_factory=_dummy_few_shots,
166	project_root=temp_dir,
167	permission_mode="prompt",
168	permission_prompting_enabled=True,
169	permission_rule_counts={"allow": 1, "deny": 2, "ask": 3},
170	permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"),
171	prompt_format="react",
172	prompt_sections=["Runtime Config", "Workflow Context"],
173	)
174
175	session.update_runtime_state(
176	current_task="Inspect permission history",
177	runtime_owner_type="RuntimeHandle",
178	permission_mode="allow",
179	permission_prompting_enabled=True,
180	permission_rule_counts={"allow": 2, "deny": 1, "ask": 4},
181	permission_rules_source=str(temp_dir / ".loader" / "permission-rules.json"),
182	prompt_format="native",
183	prompt_sections=["Runtime Config", "Workflow Context", "Project Context"],
184	workflow_reason_code="task_is_complex",
185	workflow_reason_summary="task looks complex enough to benefit from a persisted plan",
186	workflow_decision_kind="initial_route",
187	workflow_ambiguity_score=0.2,
188	workflow_complexity_score=0.6,
189	workflow_scheduled_next_mode="execute",
190	last_completion_decision_code="verification_failed_reentry",
191	last_completion_decision_summary=(
192	"continued after verification failed and the runtime re-entered execute mode"
193	),
194	last_turn_transition_summary="completion -> finalize [terminal] Finalizing completed turn",
195	last_turn_transition_kind="terminal",
196	last_turn_transition_reason_code="turn_complete",
197	)
198	session.append_workflow_timeline_entry(
199	WorkflowTimelineEntry(
200	timestamp="2026-04-07T12:00:00Z",
201	kind="route",
202	mode="plan",
203	reason_code="task_is_complex",
204	summary="plan: workflow pressure favors a persisted plan before execution",
205	decision_kind="initial_route",
206	route_score=0.72,
207	runner_up_mode="clarify",
208	runner_up_score=0.61,
209	scheduled_next_mode="execute",
210	unresolved_questions=["Scope is still broad."],
211	prompt_format="native",
212	prompt_sections=["Runtime Config", "Workflow Context", "Project Context"],
213	)
214	)
215	session.append_completion_trace_entry(
216	CompletionTraceEntry(
217	stage="definition_of_done",
218	outcome="continue",
219	decision_code="verification_failed_reentry",
220	decision_summary=(
221	"continued after verification failed and the runtime "
222	"re-entered execute mode"
223	),
224	evidence_summary=["verification contradiction: pytest still failed"],
225	)
226	)
227
228	reloaded = ConversationSession.load(
229	project_root=temp_dir,
230	system_message_factory=_dummy_system,
231	few_shot_factory=_dummy_few_shots,
232	session_id=session.session_id,
233	)
234
235	assert reloaded is not None
236	assert reloaded.permission_mode == "allow"
237	assert reloaded.permission_prompting_enabled is True
238	assert reloaded.permission_rule_counts == {"allow": 2, "deny": 1, "ask": 4}
239	assert reloaded.permission_rules_source == str(
240	temp_dir / ".loader" / "permission-rules.json"
241	)
242	assert reloaded.runtime_owner_type == "RuntimeHandle"
243	assert reloaded.runtime_owner_path == "runtime-handle"
244	assert reloaded.prompt_format == "native"
245	assert reloaded.prompt_sections == [
246	"Runtime Config",
247	"Workflow Context",
248	"Project Context",
249	]
250	assert reloaded.workflow_reason_code == "task_is_complex"
251	assert reloaded.workflow_reason_summary == (
252	"task looks complex enough to benefit from a persisted plan"
253	)
254	assert reloaded.workflow_decision_kind == "initial_route"
255	assert reloaded.workflow_ambiguity_score == pytest.approx(0.2)
256	assert reloaded.workflow_complexity_score == pytest.approx(0.6)
257	assert reloaded.workflow_scheduled_next_mode == "execute"
258	assert reloaded.last_completion_decision_code == "verification_failed_reentry"
259	assert reloaded.last_completion_decision_summary == (
260	"continued after verification failed and the runtime re-entered execute mode"
261	)
262	assert [entry.decision_code for entry in reloaded.completion_trace] == [
263	"verification_failed_reentry"
264	]
265	assert reloaded.completion_trace[0].evidence_summary == [
266	"verification contradiction: pytest still failed"
267	]
268	assert reloaded.last_turn_transition_summary == (
269	"completion -> finalize [terminal] Finalizing completed turn"
270	)
271	assert reloaded.last_turn_transition_kind == "terminal"
272	assert reloaded.last_turn_transition_reason_code == "turn_complete"
273	assert len(reloaded.workflow_timeline) == 1
274	assert reloaded.workflow_timeline[0].mode == "plan"
275	assert reloaded.workflow_timeline[0].route_score == pytest.approx(0.72)
276	assert reloaded.workflow_timeline[0].unresolved_questions == [
277	"Scope is still broad."
278	]
279
280
281	def test_resume_session_updates_runtime_owner_metadata(temp_dir: Path) -> None:
282	agent = Agent(
283	backend=ScriptedBackend(),
284	config=AgentConfig(auto_context=False, stream=False),
285	project_root=temp_dir,
286	)
287	agent.session.persist()
288	session_id = agent.session.session_id
289
290	handle = RuntimeHandle(
291	backend=ScriptedBackend(),
292	config=AgentConfig(auto_context=False, stream=False),
293	project_root=temp_dir,
294	)
295
296	assert handle.resume_session(session_id) is True
297
298	reloaded = ConversationSession.load(
299	project_root=temp_dir,
300	system_message_factory=_dummy_system,
301	few_shot_factory=_dummy_few_shots,
302	session_id=session_id,
303	)
304
305	assert reloaded is not None
306	assert reloaded.runtime_owner_type == "RuntimeHandle"
307	assert reloaded.runtime_owner_path == "runtime-handle"
308
309
310	def test_session_prefers_canonical_workflow_timeline_for_completion_trace(
311	temp_dir: Path,
312	) -> None:
313	session = ConversationSession(
314	system_message_factory=_dummy_system,
315	few_shot_factory=_dummy_few_shots,
316	project_root=temp_dir,
317	)
318
319	session.update_runtime_state(
320	current_task="Explain why the turn stopped",
321	last_completion_decision_code="continuation_budget_exhausted",
322	last_completion_decision_summary=(
323	"stopped because the continuation budget was exhausted while "
324	"follow-through evidence was still missing"
325	),
326	)
327	session.append_completion_trace_entry(
328	CompletionTraceEntry(
329	stage="definition_of_done",
330	outcome="complete",
331	decision_code="stale_completion_trace",
332	decision_summary="this legacy trace entry should be ignored",
333	)
334	)
335	session.append_workflow_timeline_entry(
336	WorkflowTimelineEntry(
337	timestamp="2026-04-09T12:00:00Z",
338	kind="completion_check",
339	mode="execute",
340	reason_code="premature_completion_nudge",
341	summary=(
342	"completion: requested one continuation because the non-mutating "
343	"response looked incomplete"
344	),
345	decision_kind="forced",
346	policy_stage="continuation_check",
347	policy_outcome="continue",
348	evidence_summary=["showing the requested work was actually carried out"],
349	)
350	)
351	session.append_workflow_timeline_entry(
352	WorkflowTimelineEntry(
353	timestamp="2026-04-09T12:01:00Z",
354	kind="completion_finalize",
355	mode="execute",
356	reason_code="continuation_budget_exhausted",
357	summary=(
358	"completion: stopped because the continuation budget was exhausted "
359	"while follow-through evidence was still missing"
360	),
361	decision_kind="forced",
362	policy_stage="continuation_check",
363	policy_outcome="finalize",
364	evidence_summary=["showing the requested work was actually carried out"],
365	)
366	)
367
368	persisted = json.loads(session.storage_path.read_text())
369	assert "completion_trace" not in persisted
370
371	reloaded = ConversationSession.load(
372	project_root=temp_dir,
373	system_message_factory=_dummy_system,
374	few_shot_factory=_dummy_few_shots,
375	session_id=session.session_id,
376	)
377
378	assert reloaded is not None
379	assert [entry.decision_code for entry in reloaded.completion_trace] == [
380	"premature_completion_nudge",
381	"continuation_budget_exhausted",
382	]
383	assert reloaded.completion_trace[-1].stage == "continuation_check"
384	assert reloaded.completion_trace[-1].outcome == "finalize"
385	assert reloaded.completion_trace[-1].evidence_summary == [
386	"showing the requested work was actually carried out"
387	]
388
389
390	def test_session_projects_live_completion_trace_from_workflow_timeline(
391	temp_dir: Path,
392	) -> None:
393	session = ConversationSession(
394	system_message_factory=_dummy_system,
395	few_shot_factory=_dummy_few_shots,
396	project_root=temp_dir,
397	)
398
399	session.append_workflow_timeline_entry(
400	WorkflowTimelineEntry(
401	timestamp="2026-04-09T12:00:00Z",
402	kind="completion_check",
403	mode="execute",
404	reason_code="completion_response_accepted",
405	summary="completion: accepted the response because follow-through evidence was present",
406	decision_kind="forced",
407	policy_stage="continuation_check",
408	policy_outcome="accept",
409	)
410	)
411	session.append_workflow_timeline_entry(
412	WorkflowTimelineEntry(
413	timestamp="2026-04-09T12:01:00Z",
414	kind="completion_finalize",
415	mode="execute",
416	reason_code="continuation_budget_exhausted",
417	summary="completion: stopped because verification evidence was still missing",
418	decision_kind="forced",
419	policy_stage="continuation_check",
420	policy_outcome="finalize",
421	evidence_summary=["a passing verification result from `pytest -q`"],
422	evidence_provenance=[
423	EvidenceProvenance(
424	category="verification",
425	source="dod.verification_commands",
426	summary="verification evidence was still missing for `pytest -q`",
427	status="missing",
428	subject="pytest -q",
429	)
430	],
431	)
432	)
433	session.update_runtime_state(
434	last_completion_decision_code="continuation_budget_exhausted",
435	last_completion_decision_summary=(
436	"stopped because verification evidence was still missing"
437	),
438	)
439
440	assert [entry.decision_code for entry in session.completion_trace] == [
441	"completion_response_accepted",
442	"continuation_budget_exhausted",
443	]
444	assert session.completion_trace[-1].stage == "continuation_check"
445	assert session.completion_trace[-1].outcome == "finalize"
446	assert session.completion_trace[-1].evidence_summary == [
447	"a passing verification result from `pytest -q`"
448	]
449	assert [item.summary for item in session.completion_trace[-1].evidence_provenance] == [
450	"verification evidence was still missing for `pytest -q`"
451	]
452
453
454	def test_session_persists_workflow_ledger_state(temp_dir: Path) -> None:
455	session = ConversationSession(
456	system_message_factory=_dummy_system,
457	few_shot_factory=_dummy_few_shots,
458	project_root=temp_dir,
459	)
460
461	session.update_workflow_ledger(
462	WorkflowLedger(
463	assumptions=[
464	WorkflowLedgerItem(
465	text="notes.txt stays out of scope unless clarified otherwise.",
466	status="contradicted",
467	introduced_phase="clarify",
468	updated_phase="recovery",
469	evidence=["Clarify scope assumed `notes.txt` stayed out of scope."],
470	)
471	],
472	acceptance_anchors=[
473	WorkflowLedgerItem(
474	text="notes.txt exists in the workspace root.",
475	status="changed",
476	introduced_phase="clarify",
477	updated_phase="recovery",
478	)
479	],
480	decision_boundaries=[
481	WorkflowLedgerItem(
482	text="Escalate before broad UX changes.",
483	status="tracked",
484	introduced_phase="clarify",
485	)
486	],
487	)
488	)
489
490	reloaded = ConversationSession.load(
491	project_root=temp_dir,
492	system_message_factory=_dummy_system,
493	few_shot_factory=_dummy_few_shots,
494	session_id=session.session_id,
495	)
496
497	assert reloaded is not None
498	assert reloaded.workflow_ledger.assumptions[0].status == "contradicted"
499	assert reloaded.workflow_ledger.assumptions[0].updated_phase == "recovery"
500	assert reloaded.workflow_ledger.acceptance_anchors[0].status == "changed"
501	assert reloaded.workflow_ledger.decision_boundaries[0].text == (
502	"Escalate before broad UX changes."
503	)
504
505
506	def test_session_persists_prompt_history_state(temp_dir: Path) -> None:
507	session = ConversationSession(
508	system_message_factory=_dummy_system,
509	few_shot_factory=_dummy_few_shots,
510	project_root=temp_dir,
511	)
512
513	session.append_prompt_snapshot(
514	PromptSnapshot(
515	timestamp="2026-04-07T14:00:00Z",
516	workflow_mode="plan",
517	permission_mode="prompt",
518	current_task="Tighten Loader workflow behavior",
519	prompt_format="native",
520	prompt_sections=["Runtime Config", "Workflow Context", "Mode Guidance"],
521	content="# Introduction\nplan around planned.txt\n",
522	)
523	)
524	session.append_prompt_snapshot(
525	PromptSnapshot(
526	timestamp="2026-04-07T14:02:00Z",
527	workflow_mode="execute",
528	permission_mode="prompt",
529	current_task="Tighten Loader workflow behavior",
530	prompt_format="native",
531	prompt_sections=[
532	"Runtime Config",
533	"Workflow Context",
534	"Mode Guidance",
535	"Project Context",
536	],
537	content="# Introduction\nexecute around notes.txt\n# Project Context\npython\n",
538	)
539	)
540
541	reloaded = ConversationSession.load(
542	project_root=temp_dir,
543	system_message_factory=_dummy_system,
544	few_shot_factory=_dummy_few_shots,
545	session_id=session.session_id,
546	)
547
548	assert reloaded is not None
549	assert len(reloaded.prompt_history) == 2
550	assert reloaded.prompt_history[0].workflow_mode == "plan"
551	assert reloaded.prompt_history[-1].workflow_mode == "execute"
552	assert "notes.txt" in reloaded.prompt_history[-1].content
553
554
555	@pytest.mark.asyncio
556	async def test_turn_summary_usage_rolls_up_into_session_totals(temp_dir: Path) -> None:
557	backend = ScriptedBackend(
558	completions=[
559	CompletionResponse(
560	content="Here's the answer.",
561	usage={"prompt_tokens": 9, "completion_tokens": 3},
562	)
563	]
564	)
565	agent = Agent(
566	backend=backend,
567	config=AgentConfig(
568	auto_context=False,
569	stream=False,
570	reasoning=ReasoningConfig(completion_check=False),
571	),
572	project_root=temp_dir,
573	)
574
575	await agent.run("Write a short release-note style summary of what Loader does well.")
576
577	assert agent.last_turn_summary is not None
578	assert agent.last_turn_summary.usage["input_tokens"] == 9
579	assert agent.last_turn_summary.usage["output_tokens"] == 3
580	assert agent.last_turn_summary.cumulative_usage["input_tokens"] == 9
581	assert agent.last_turn_summary.cumulative_usage["output_tokens"] == 3
582	assert agent.last_turn_summary.cumulative_usage["turns"] == 1