loader Public

Watch 0 Fork 0 Star 0
Python · 64001 bytes Raw Blame History
  
        1
        """Runtime integration coverage for Sprint 04 workflow routing."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        from pathlib import Path
      
        6
        
        7
        import pytest
      
        8
        
        9
        from loader.agent.loop import AgentConfig
      
        10
        from loader.llm.base import CompletionResponse, ToolCall
      
        11
        from tests.helpers.runtime_harness import ScriptedBackend, run_scenario
      
        12
        
        13
        
        14
        def non_streaming_config() -> AgentConfig:
      
        15
            """Shared config for deterministic workflow-mode runtime tests."""
      
        16
        
        17
            return AgentConfig(auto_context=False, stream=False, max_iterations=8)
      
        18
        
        19
        
        20
        def non_streaming_clarify_config() -> AgentConfig:
      
        21
            """Deterministic config that enters clarify mode directly."""
      
        22
        
        23
            return AgentConfig(
      
        24
                auto_context=False,
      
        25
                stream=False,
      
        26
                max_iterations=8,
      
        27
                workflow_mode_override="clarify",
      
        28
            )
      
        29
        
        30
        
        31
        def non_streaming_single_round_clarify_config() -> AgentConfig:
      
        32
            """Deterministic config for one-round clarify artifact tests."""
      
        33
        
        34
            return AgentConfig(
      
        35
                auto_context=False,
      
        36
                stream=False,
      
        37
                max_iterations=8,
      
        38
                workflow_mode_override="clarify",
      
        39
                clarify_max_rounds=1,
      
        40
            )
      
        41
        
        42
        
        43
        def non_streaming_pressure_clarify_config() -> AgentConfig:
      
        44
            """Deterministic config that allows a third clarify round for pressure passes."""
      
        45
        
        46
            return AgentConfig(
      
        47
                auto_context=False,
      
        48
                stream=False,
      
        49
                max_iterations=10,
      
        50
                workflow_mode_override="clarify",
      
        51
                clarify_max_rounds=3,
      
        52
            )
      
        53
        
        54
        
        55
        def workflow_modes(run) -> list[str]:
      
        56
            """Return emitted workflow modes in order."""
      
        57
        
        58
            return [
      
        59
                event.workflow_mode
      
        60
                for event in run.events
      
        61
                if event.type == "workflow_mode" and event.workflow_mode
      
        62
            ]
      
        63
        
        64
        
        65
        def artifact_kinds(run) -> list[str]:
      
        66
            """Return emitted artifact kinds in order."""
      
        67
        
        68
            return [
      
        69
                event.artifact_kind
      
        70
                for event in run.events
      
        71
                if event.type == "artifact" and event.artifact_kind
      
        72
            ]
      
        73
        
        74
        
        75
        def verification_commands(run) -> list[str]:
      
        76
            """Return verification-phase bash commands."""
      
        77
        
        78
            return [
      
        79
                str((event.tool_args or {}).get("command", ""))
      
        80
                for event in run.events
      
        81
                if event.type == "tool_call" and event.phase == "verification"
      
        82
            ]
      
        83
        
        84
        
        85
        def workflow_timeline_kinds(run) -> list[str]:
      
        86
            assert run.agent.last_turn_summary is not None
      
        87
            return [entry.kind for entry in run.agent.last_turn_summary.workflow_timeline]
      
        88
        
        89
        
        90
        def seed_runtime_workspace(root: Path) -> None:
      
        91
            """Create a small brownfield runtime workspace for clarify tests."""
      
        92
        
        93
            (root / "pyproject.toml").write_text("[project]\nname='loader'\n")
      
        94
            (root / "src" / "loader" / "runtime").mkdir(parents=True)
      
        95
            (root / "src" / "loader" / "runtime" / "workflow_lanes.py").write_text(
      
        96
                '"""Runtime lane orchestration for Loader."""\n\n'
      
        97
                "class WorkflowLaneRunner:\n"
      
        98
                "    pass\n"
      
        99
            )
      
        100
            (root / "src" / "loader" / "runtime" / "clarify_strategy.py").write_text(
      
        101
                '"""Intent-aware clarify strategy for runtime follow-up."""\n'
      
        102
            )
      
        103
            (root / "tests").mkdir()
      
        104
            (root / "tests" / "test_workflow_runtime.py").write_text("pass\n")
      
        105
        
        106
        
        107
        @pytest.mark.asyncio
      
        108
        async def test_ambiguous_prompt_routes_to_clarify_and_persists_brief(
      
        109
            temp_dir: Path,
      
        110
        ) -> None:
      
        111
            backend = ScriptedBackend(
      
        112
                completions=[
      
        113
                    CompletionResponse(
      
        114
                        content="I need one clarification before I proceed.",
      
        115
                        tool_calls=[
      
        116
                            ToolCall(
      
        117
                                id="ask-1",
      
        118
                                name="AskUserQuestion",
      
        119
                                arguments={
      
        120
                                    "question": (
      
        121
                                        "What should stay out of scope for this Loader "
      
        122
                                        "improvement?"
      
        123
                                    ),
      
        124
                                },
      
        125
                            )
      
        126
                        ],
      
        127
                    ),
      
        128
                    CompletionResponse(
      
        129
                        content="\n".join(
      
        130
                            [
      
        131
                                "## Task Statement",
      
        132
                                "Improve Loader so it feels more like claw-code.",
      
        133
                                "",
      
        134
                                "## Desired Outcome",
      
        135
                                "- Make Loader more reliable without broad redesign.",
      
        136
                                "",
      
        137
                                "## In Scope",
      
        138
                                "- Tighten the runtime workflow around the user-facing goal.",
      
        139
                                "",
      
        140
                                "## Non Goals",
      
        141
                                "- Rebuild unrelated subsystems.",
      
        142
                                "",
      
        143
                                "## Decision Boundaries",
      
        144
                                "- Escalate before changing unrelated UX patterns.",
      
        145
                                "",
      
        146
                                "## Constraints",
      
        147
                                "- Stay within the current repository.",
      
        148
                                "",
      
        149
                                "## Likely Touchpoints",
      
        150
                                "- Runtime entry points and prompt behavior.",
      
        151
                                "",
      
        152
                                "## Assumptions",
      
        153
                                "- The user wants a narrow runtime-quality improvement.",
      
        154
                                "",
      
        155
                                "## Acceptance Criteria",
      
        156
                                "- The improvement stays focused on runtime behavior.",
      
        157
                            ]
      
        158
                        )
      
        159
                    ),
      
        160
                    CompletionResponse(content="I have the brief and can move forward."),
      
        161
                ]
      
        162
            )
      
        163
        
        164
            async def answer(question: str, options: list[str] | None) -> str:
      
        165
                assert "out of scope" in question.lower()
      
        166
                assert options is None
      
        167
                return "Do not redesign the whole interface."
      
        168
        
        169
            run = await run_scenario(
      
        170
                "Improve Loader so it feels more like claw-code.",
      
        171
                backend,
      
        172
                config=non_streaming_config(),
      
        173
                project_root=temp_dir,
      
        174
                on_user_question=answer,
      
        175
            )
      
        176
        
        177
            dod = run.agent.last_turn_summary.definition_of_done
      
        178
            assert dod is not None
      
        179
            assert workflow_modes(run)[:2] == ["clarify", "execute"]
      
        180
            assert artifact_kinds(run) == ["clarify_brief"]
      
        181
            assert dod.clarify_brief is not None
      
        182
            assert Path(dod.clarify_brief).exists()
      
        183
            brief_markdown = Path(dod.clarify_brief).read_text()
      
        184
            assert "single-question clarify brief" in brief_markdown
      
        185
            assert "return control to `execute` mode" in brief_markdown
      
        186
            assert run.agent.session.workflow_artifact_status == "active"
      
        187
            assert run.agent.session.workflow_artifact_sources == ["clarify_brief"]
      
        188
            assert "runtime behavior" in dod.acceptance_criteria[0].lower()
      
        189
            assert "## Clarify Mode" in backend.invocations[0].messages[0].content
      
        190
            assert run.agent.last_turn_summary is not None
      
        191
            assert run.agent.last_turn_summary.workflow_mode == "execute"
      
        192
            assert run.agent.last_turn_summary.workflow_reason_code == "post_clarify_task_is_concrete"
      
        193
            assert run.agent.last_turn_summary.workflow_decision_kind == "handoff"
      
        194
            assert run.agent.last_turn_summary.workflow_timeline[0].mode == "clarify"
      
        195
            assert run.agent.last_turn_summary.workflow_timeline[-1].mode == "execute"
      
        196
        
        197
        
        198
        @pytest.mark.asyncio
      
        199
        async def test_clarify_prompt_and_brief_include_workspace_evidence(
      
        200
            temp_dir: Path,
      
        201
        ) -> None:
      
        202
            seed_runtime_workspace(temp_dir)
      
        203
            backend = ScriptedBackend(
      
        204
                completions=[
      
        205
                    CompletionResponse(
      
        206
                        content="I need one clarification before I proceed.",
      
        207
                        tool_calls=[
      
        208
                            ToolCall(
      
        209
                                id="ask-1",
      
        210
                                name="AskUserQuestion",
      
        211
                                arguments={
      
        212
                                    "question": (
      
        213
                                        "Should I keep the work inside "
      
        214
                                        "src/loader/runtime/workflow_lanes.py?"
      
        215
                                    ),
      
        216
                                },
      
        217
                            )
      
        218
                        ],
      
        219
                    ),
      
        220
                    CompletionResponse(
      
        221
                        content="\n".join(
      
        222
                            [
      
        223
                                "## Task Statement",
      
        224
                                "Tighten clarify behavior around src/loader/runtime/workflow_lanes.py.",
      
        225
                                "",
      
        226
                                "## Desired Outcome",
      
        227
                                "- Keep clarify behavior tighter around one runtime seam.",
      
        228
                                "",
      
        229
                                "## In Scope",
      
        230
                                "- Narrow the change to workflow lane handling.",
      
        231
                                "",
      
        232
                                "## Non Goals",
      
        233
                                "- Do not broaden into unrelated CLI changes.",
      
        234
                                "",
      
        235
                                "## Decision Boundaries",
      
        236
                                "- Escalate before changing other runtime modules.",
      
        237
                                "",
      
        238
                                "## Constraints",
      
        239
                                "- Stay within the existing workspace.",
      
        240
                                "",
      
        241
                                "## Likely Touchpoints",
      
        242
                                "- src/loader/runtime/workflow_lanes.py",
      
        243
                                "",
      
        244
                                "## Assumptions",
      
        245
                                "- The user wants a narrow brownfield change.",
      
        246
                                "",
      
        247
                                "## Acceptance Criteria",
      
        248
                                "- Clarify stays scoped to workflow_lanes.py.",
      
        249
                            ]
      
        250
                        )
      
        251
                    ),
      
        252
                    CompletionResponse(content="I can move forward now."),
      
        253
                    CompletionResponse(content="Done."),
      
        254
                    CompletionResponse(content="Done."),
      
        255
                ]
      
        256
            )
      
        257
        
        258
            async def answer(_: str, __: list[str] | None) -> str:
      
        259
                return "Yes, keep it there and avoid CLI churn."
      
        260
        
        261
            run = await run_scenario(
      
        262
                "Tighten clarify behavior around src/loader/runtime/workflow_lanes.py.",
      
        263
                backend,
      
        264
                config=non_streaming_clarify_config(),
      
        265
                project_root=temp_dir,
      
        266
                on_user_question=answer,
      
        267
            )
      
        268
        
        269
            assert "Relevant workspace evidence:" in backend.invocations[0].messages[-1].content
      
        270
            assert "Relevant repo facts:" in backend.invocations[0].messages[-1].content
      
        271
            assert "class WorkflowLaneRunner:" in backend.invocations[0].messages[-1].content
      
        272
            assert "Grounded brief hints:" in backend.invocations[1].messages[-1].content
      
        273
            assert "Seed likely touchpoints:" in backend.invocations[1].messages[-1].content
      
        274
            assert "Scope acceptance criteria:" in backend.invocations[1].messages[-1].content
      
        275
            assert (
      
        276
                "workflow_lanes.py"
      
        277
                in run.agent.last_turn_summary.definition_of_done.acceptance_criteria[0]
      
        278
            )
      
        279
        
        280
        
        281
        @pytest.mark.asyncio
      
        282
        async def test_fallback_clarify_brief_inherits_grounded_workspace_hints(
      
        283
            temp_dir: Path,
      
        284
        ) -> None:
      
        285
            seed_runtime_workspace(temp_dir)
      
        286
            backend = ScriptedBackend(
      
        287
                completions=[
      
        288
                    CompletionResponse(
      
        289
                        content="I need one clarification before I proceed.",
      
        290
                        tool_calls=[
      
        291
                            ToolCall(
      
        292
                                id="ask-1",
      
        293
                                name="AskUserQuestion",
      
        294
                                arguments={
      
        295
                                    "question": (
      
        296
                                        "Should I keep the work inside "
      
        297
                                        "src/loader/runtime/workflow_lanes.py?"
      
        298
                                    ),
      
        299
                                },
      
        300
                            )
      
        301
                        ],
      
        302
                    ),
      
        303
                    CompletionResponse(content=""),
      
        304
                    CompletionResponse(content="I can move forward now."),
      
        305
                    CompletionResponse(content="Done."),
      
        306
                    CompletionResponse(content="Done."),
      
        307
                ]
      
        308
            )
      
        309
        
        310
            async def answer(_: str, __: list[str] | None) -> str:
      
        311
                return (
      
        312
                    "Keep it scoped to src/loader/runtime/workflow_lanes.py and leave "
      
        313
                    "clarify_strategy.py unchanged."
      
        314
                )
      
        315
        
        316
            run = await run_scenario(
      
        317
                "Tighten clarify behavior around src/loader/runtime/workflow_lanes.py.",
      
        318
                backend,
      
        319
                config=non_streaming_single_round_clarify_config(),
      
        320
                project_root=temp_dir,
      
        321
                on_user_question=answer,
      
        322
            )
      
        323
        
        324
            brief_prompt = backend.invocations[1].messages[-1].content
      
        325
            assert "Grounded brief hints:" in brief_prompt
      
        326
            assert "Seed likely touchpoints:" in brief_prompt
      
        327
            assert "Scope acceptance criteria:" in brief_prompt
      
        328
        
        329
            dod = run.agent.last_turn_summary.definition_of_done
      
        330
            assert dod is not None
      
        331
            assert dod.clarify_brief is not None
      
        332
            brief_text = Path(dod.clarify_brief).read_text()
      
        333
            assert "src/loader/runtime/workflow_lanes.py" in brief_text
      
        334
            assert "src/loader/runtime/clarify_strategy.py" in brief_text
      
        335
            assert "WorkflowLaneRunner" in brief_text
      
        336
            assert any(
      
        337
                "Primary work stays scoped to `src/loader/runtime/workflow_lanes.py`."
      
        338
                == item
      
        339
                for item in dod.acceptance_criteria
      
        340
            )
      
        341
        
        342
        
        343
        @pytest.mark.asyncio
      
        344
        async def test_clarify_can_continue_for_a_second_round_when_scope_stays_ambiguous(
      
        345
            temp_dir: Path,
      
        346
        ) -> None:
      
        347
            backend = ScriptedBackend(
      
        348
                completions=[
      
        349
                    CompletionResponse(
      
        350
                        content="I need one clarification before I proceed.",
      
        351
                        tool_calls=[
      
        352
                            ToolCall(
      
        353
                                id="ask-1",
      
        354
                                name="AskUserQuestion",
      
        355
                                arguments={"question": "What part should change most?"},
      
        356
                            )
      
        357
                        ],
      
        358
                    ),
      
        359
                    CompletionResponse(content=""),
      
        360
                    CompletionResponse(
      
        361
                        content="I need one more focused detail before moving on.",
      
        362
                        tool_calls=[
      
        363
                            ToolCall(
      
        364
                                id="ask-2",
      
        365
                                name="AskUserQuestion",
      
        366
                                arguments={
      
        367
                                    "question": "Which file should change, and what should stay unchanged?",
      
        368
                                },
      
        369
                            )
      
        370
                        ],
      
        371
                    ),
      
        372
                    CompletionResponse(
      
        373
                        content="\n".join(
      
        374
                            [
      
        375
                                "## Task Statement",
      
        376
                                "Improve Loader so it feels more like claw-code.",
      
        377
                                "",
      
        378
                                "## Desired Outcome",
      
        379
                                "- Make the runtime feel more disciplined.",
      
        380
                                "",
      
        381
                                "## In Scope",
      
        382
                                "- Update src/loader/runtime/conversation.py only.",
      
        383
                                "",
      
        384
                                "## Non Goals",
      
        385
                                "- Do not change the CLI surface.",
      
        386
                                "",
      
        387
                                "## Decision Boundaries",
      
        388
                                "- Escalate before touching unrelated modules.",
      
        389
                                "",
      
        390
                                "## Constraints",
      
        391
                                "- Stay within the repository.",
      
        392
                                "",
      
        393
                                "## Likely Touchpoints",
      
        394
                                "- src/loader/runtime/conversation.py",
      
        395
                                "",
      
        396
                                "## Assumptions",
      
        397
                                "- The user wants a narrow runtime change.",
      
        398
                                "",
      
        399
                                "## Acceptance Criteria",
      
        400
                                "- Only conversation.py changes.",
      
        401
                            ]
      
        402
                        )
      
        403
                    ),
      
        404
                    CompletionResponse(content="I have enough detail now and can move forward."),
      
        405
                ]
      
        406
            )
      
        407
        
        408
            answers = iter(
      
        409
                [
      
        410
                    "Make it nicer.",
      
        411
                    "Only update src/loader/runtime/conversation.py and keep the CLI unchanged.",
      
        412
                ]
      
        413
            )
      
        414
        
        415
            async def answer(_: str, __: list[str] | None) -> str:
      
        416
                return next(answers)
      
        417
        
        418
            run = await run_scenario(
      
        419
                "Improve Loader so it feels more like claw-code.",
      
        420
                backend,
      
        421
                config=non_streaming_config(),
      
        422
                project_root=temp_dir,
      
        423
                on_user_question=answer,
      
        424
            )
      
        425
        
        426
            dod = run.agent.last_turn_summary.definition_of_done
      
        427
            assert dod is not None
      
        428
            assert dod.clarify_brief is not None
      
        429
            assert Path(dod.clarify_brief).exists()
      
        430
            assert workflow_modes(run)[:2] == ["clarify", "execute"]
      
        431
            assert workflow_timeline_kinds(run).count("clarify_continue") == 1
      
        432
            assert "clarify_exit" in workflow_timeline_kinds(run)
      
        433
            assert "Focus slot: likely touchpoints" in backend.invocations[2].messages[-1].content
      
        434
            assert any(
      
        435
                entry.reason_code == "clarify_follow_up_needed"
      
        436
                for entry in run.agent.last_turn_summary.workflow_timeline
      
        437
            )
      
        438
            clarify_continue = next(
      
        439
                entry
      
        440
                for entry in run.agent.last_turn_summary.workflow_timeline
      
        441
                if entry.kind == "clarify_continue"
      
        442
            )
      
        443
            assert clarify_continue.clarify_stage == "readiness"
      
        444
        
        445
        
        446
        @pytest.mark.asyncio
      
        447
        async def test_second_round_fallback_question_uses_workspace_grounding(
      
        448
            temp_dir: Path,
      
        449
        ) -> None:
      
        450
            seed_runtime_workspace(temp_dir)
      
        451
            backend = ScriptedBackend(
      
        452
                completions=[
      
        453
                    CompletionResponse(
      
        454
                        content="I need one clarification before I proceed.",
      
        455
                        tool_calls=[
      
        456
                            ToolCall(
      
        457
                                id="ask-1",
      
        458
                                name="AskUserQuestion",
      
        459
                                arguments={"question": "What part should change most?"},
      
        460
                            )
      
        461
                        ],
      
        462
                    ),
      
        463
                    CompletionResponse(content=""),
      
        464
                    CompletionResponse(content=""),
      
        465
                    CompletionResponse(
      
        466
                        content="\n".join(
      
        467
                            [
      
        468
                                "## Task Statement",
      
        469
                                "Tighten Loader runtime clarify behavior.",
      
        470
                                "",
      
        471
                                "## Desired Outcome",
      
        472
                                "- Keep the clarify workflow more grounded.",
      
        473
                                "",
      
        474
                                "## In Scope",
      
        475
                                "- Stay inside workflow lane handling.",
      
        476
                                "",
      
        477
                                "## Non Goals",
      
        478
                                "- Do not broaden into the CLI surface.",
      
        479
                                "",
      
        480
                                "## Decision Boundaries",
      
        481
                                "- Escalate before changing unrelated modules.",
      
        482
                                "",
      
        483
                                "## Constraints",
      
        484
                                "- Stay within the repository.",
      
        485
                                "",
      
        486
                                "## Likely Touchpoints",
      
        487
                                "- src/loader/runtime/workflow_lanes.py",
      
        488
                                "",
      
        489
                                "## Assumptions",
      
        490
                                "- The user wants a narrow runtime behavior fix.",
      
        491
                                "",
      
        492
                                "## Acceptance Criteria",
      
        493
                                "- workflow_lanes.py stays the main touchpoint.",
      
        494
                            ]
      
        495
                        )
      
        496
                    ),
      
        497
                    CompletionResponse(content="I can move forward now."),
      
        498
                    CompletionResponse(content="Done."),
      
        499
                    CompletionResponse(content="Done."),
      
        500
                ]
      
        501
            )
      
        502
        
        503
            asked_questions: list[str] = []
      
        504
            answers = iter(
      
        505
                [
      
        506
                    "Make it nicer.",
      
        507
                    "Keep it scoped to src/loader/runtime/workflow_lanes.py and leave the CLI alone.",
      
        508
                ]
      
        509
            )
      
        510
        
        511
            async def answer(question: str, _: list[str] | None) -> str:
      
        512
                asked_questions.append(question)
      
        513
                return next(answers)
      
        514
        
        515
            run = await run_scenario(
      
        516
                "Tighten Loader runtime clarify behavior.",
      
        517
                backend,
      
        518
                config=non_streaming_clarify_config(),
      
        519
                project_root=temp_dir,
      
        520
                on_user_question=answer,
      
        521
            )
      
        522
        
        523
            round_two_prompt = backend.invocations[2].messages[-1].content
      
        524
            assert len(asked_questions) == 2
      
        525
            assert "concrete outcome" in asked_questions[1].lower()
      
        526
            assert "Focus slot: desired outcome" in round_two_prompt
      
        527
            assert [
      
        528
                event.tool_name
      
        529
                for event in run.events
      
        530
                if event.type == "tool_call" and event.tool_name
      
        531
            ][:2] == ["AskUserQuestion", "AskUserQuestion"]
      
        532
        
        533
        
        534
        @pytest.mark.asyncio
      
        535
        async def test_second_round_non_goal_prompt_uses_slot_aware_repo_facts(
      
        536
            temp_dir: Path,
      
        537
        ) -> None:
      
        538
            seed_runtime_workspace(temp_dir)
      
        539
            backend = ScriptedBackend(
      
        540
                completions=[
      
        541
                    CompletionResponse(
      
        542
                        content="I need one clarification before I proceed.",
      
        543
                        tool_calls=[
      
        544
                            ToolCall(
      
        545
                                id="ask-1",
      
        546
                                name="AskUserQuestion",
      
        547
                                arguments={
      
        548
                                    "question": "Which runtime file should I focus on first?",
      
        549
                                },
      
        550
                            )
      
        551
                        ],
      
        552
                    ),
      
        553
                    CompletionResponse(
      
        554
                        content="\n".join(
      
        555
                            [
      
        556
                                "## Task Statement",
      
        557
                                "Tighten Loader runtime clarify behavior.",
      
        558
                                "",
      
        559
                                "## Desired Outcome",
      
        560
                                "- Keep clarify behavior grounded in brownfield repo facts.",
      
        561
                                "",
      
        562
                                "## In Scope",
      
        563
                                "- Focus on runtime lane handling first.",
      
        564
                                "",
      
        565
                                "## Constraints",
      
        566
                                "- Stay within the current repository.",
      
        567
                                "",
      
        568
                                "## Likely Touchpoints",
      
        569
                                "- src/loader/runtime/workflow_lanes.py",
      
        570
                                "",
      
        571
                                "## Acceptance Criteria",
      
        572
                                "- The next round clarifies what stays unchanged.",
      
        573
                            ]
      
        574
                        )
      
        575
                    ),
      
        576
                    CompletionResponse(content=""),
      
        577
                    CompletionResponse(
      
        578
                        content="\n".join(
      
        579
                            [
      
        580
                                "## Task Statement",
      
        581
                                "Tighten Loader runtime clarify behavior.",
      
        582
                                "",
      
        583
                                "## Desired Outcome",
      
        584
                                "- Keep clarify behavior grounded in brownfield repo facts.",
      
        585
                                "",
      
        586
                                "## In Scope",
      
        587
                                "- Focus on runtime lane handling first.",
      
        588
                                "",
      
        589
                                "## Non Goals",
      
        590
                                "- Leave clarify strategy behavior unchanged for now.",
      
        591
                                "",
      
        592
                                "## Decision Boundaries",
      
        593
                                "- Stop and confirm before broadening beyond runtime lanes.",
      
        594
                                "",
      
        595
                                "## Constraints",
      
        596
                                "- Stay within the current repository.",
      
        597
                                "",
      
        598
                                "## Likely Touchpoints",
      
        599
                                "- src/loader/runtime/workflow_lanes.py",
      
        600
                                "",
      
        601
                                "## Acceptance Criteria",
      
        602
                                "- workflow_lanes.py remains the primary touchpoint.",
      
        603
                            ]
      
        604
                        )
      
        605
                    ),
      
        606
                    CompletionResponse(content="I can move forward now."),
      
        607
                    CompletionResponse(content="Done."),
      
        608
                    CompletionResponse(content="Done."),
      
        609
                ]
      
        610
            )
      
        611
        
        612
            asked_questions: list[str] = []
      
        613
            answers = iter(
      
        614
                [
      
        615
                    "Start with src/loader/runtime/workflow_lanes.py.",
      
        616
                    "Keep clarify_strategy.py unchanged while we tighten the workflow lanes.",
      
        617
                ]
      
        618
            )
      
        619
        
        620
            async def answer(question: str, _: list[str] | None) -> str:
      
        621
                asked_questions.append(question)
      
        622
                return next(answers)
      
        623
        
        624
            await run_scenario(
      
        625
                "Tighten Loader runtime clarify behavior.",
      
        626
                backend,
      
        627
                config=non_streaming_clarify_config(),
      
        628
                project_root=temp_dir,
      
        629
                on_user_question=answer,
      
        630
            )
      
        631
        
        632
            round_two_prompt = backend.invocations[2].messages[-1].content
      
        633
            assert "Focus slot: non-goals" in round_two_prompt
      
        634
            assert "Relevant workspace evidence:" in round_two_prompt
      
        635
            assert "workflow_lanes.py" in round_two_prompt
      
        636
            assert "clarify_strategy.py" in round_two_prompt
      
        637
            assert "Relevant repo facts:" in round_two_prompt
      
        638
            assert len(asked_questions) == 2
      
        639
            assert "clarify_strategy.py" in asked_questions[1]
      
        640
            assert "unchanged" in asked_questions[1].lower()
      
        641
        
        642
        
        643
        @pytest.mark.asyncio
      
        644
        async def test_third_round_example_pressure_question_grounds_non_goals_with_repo_facts(
      
        645
            temp_dir: Path,
      
        646
        ) -> None:
      
        647
            seed_runtime_workspace(temp_dir)
      
        648
            backend = ScriptedBackend(
      
        649
                completions=[
      
        650
                    CompletionResponse(
      
        651
                        content="I need one clarification before I proceed.",
      
        652
                        tool_calls=[
      
        653
                            ToolCall(
      
        654
                                id="ask-1",
      
        655
                                name="AskUserQuestion",
      
        656
                                arguments={
      
        657
                                    "question": "Which runtime file should I focus on first?",
      
        658
                                },
      
        659
                            )
      
        660
                        ],
      
        661
                    ),
      
        662
                    CompletionResponse(
      
        663
                        content="\n".join(
      
        664
                            [
      
        665
                                "## Task Statement",
      
        666
                                "Tighten Loader runtime clarify behavior.",
      
        667
                                "",
      
        668
                                "## Desired Outcome",
      
        669
                                "- Keep clarify follow-up grounded in brownfield repo evidence.",
      
        670
                                "",
      
        671
                                "## In Scope",
      
        672
                                "- Focus on runtime lane handling first.",
      
        673
                                "",
      
        674
                                "## Decision Boundaries",
      
        675
                                "- Stop and confirm before broadening beyond runtime lanes.",
      
        676
                                "",
      
        677
                                "## Constraints",
      
        678
                                "- Stay within the current repository.",
      
        679
                                "",
      
        680
                                "## Likely Touchpoints",
      
        681
                                "- src/loader/runtime/workflow_lanes.py",
      
        682
                                "",
      
        683
                                "## Acceptance Criteria",
      
        684
                                "- The next clarify round locks down what stays out of scope.",
      
        685
                            ]
      
        686
                        )
      
        687
                    ),
      
        688
                    CompletionResponse(content=""),
      
        689
                    CompletionResponse(
      
        690
                        content="\n".join(
      
        691
                            [
      
        692
                                "## Task Statement",
      
        693
                                "Tighten Loader runtime clarify behavior.",
      
        694
                                "",
      
        695
                                "## Desired Outcome",
      
        696
                                "- Keep clarify follow-up grounded in brownfield repo evidence.",
      
        697
                                "",
      
        698
                                "## In Scope",
      
        699
                                "- Focus on runtime lane handling first.",
      
        700
                                "",
      
        701
                                "## Decision Boundaries",
      
        702
                                "- Stop and confirm before broadening beyond runtime lanes.",
      
        703
                                "",
      
        704
                                "## Constraints",
      
        705
                                "- Stay within the current repository.",
      
        706
                                "",
      
        707
                                "## Likely Touchpoints",
      
        708
                                "- src/loader/runtime/workflow_lanes.py",
      
        709
                                "",
      
        710
                                "## Acceptance Criteria",
      
        711
                                "- The next clarify round still needs a concrete out-of-scope boundary.",
      
        712
                            ]
      
        713
                        )
      
        714
                    ),
      
        715
                    CompletionResponse(content=""),
      
        716
                    CompletionResponse(
      
        717
                        content="\n".join(
      
        718
                            [
      
        719
                                "## Task Statement",
      
        720
                                "Tighten Loader runtime clarify behavior.",
      
        721
                                "",
      
        722
                                "## Desired Outcome",
      
        723
                                "- Keep clarify follow-up grounded in brownfield repo evidence.",
      
        724
                                "",
      
        725
                                "## In Scope",
      
        726
                                "- Focus on runtime lane handling first.",
      
        727
                                "",
      
        728
                                "## Non Goals",
      
        729
                                "- Leave clarify_strategy.py unchanged while tightening workflow_lanes.py.",
      
        730
                                "",
      
        731
                                "## Decision Boundaries",
      
        732
                                "- Stop and confirm before broadening beyond runtime lanes.",
      
        733
                                "",
      
        734
                                "## Constraints",
      
        735
                                "- Stay within the current repository.",
      
        736
                                "",
      
        737
                                "## Likely Touchpoints",
      
        738
                                "- src/loader/runtime/workflow_lanes.py",
      
        739
                                "",
      
        740
                                "## Acceptance Criteria",
      
        741
                                (
      
        742
                                    "- workflow_lanes.py stays in scope and "
      
        743
                                    "clarify_strategy.py stays out of scope."
      
        744
                                ),
      
        745
                            ]
      
        746
                        )
      
        747
                    ),
      
        748
                    CompletionResponse(content="I can move forward now."),
      
        749
                    CompletionResponse(content="Done."),
      
        750
                    CompletionResponse(content="Done."),
      
        751
                ]
      
        752
            )
      
        753
        
        754
            asked_questions: list[str] = []
      
        755
            answers = iter(
      
        756
                [
      
        757
                    "Start with src/loader/runtime/workflow_lanes.py.",
      
        758
                    "Maybe something around the workflow lane area.",
      
        759
                    (
      
        760
                        "Changing workflow_lanes.py is in scope, but clarify_strategy.py "
      
        761
                        "should stay out of scope."
      
        762
                    ),
      
        763
                ]
      
        764
            )
      
        765
        
        766
            async def answer(question: str, _: list[str] | None) -> str:
      
        767
                asked_questions.append(question)
      
        768
                return next(answers)
      
        769
        
        770
            await run_scenario(
      
        771
                "Tighten Loader runtime clarify behavior.",
      
        772
                backend,
      
        773
                config=non_streaming_pressure_clarify_config(),
      
        774
                project_root=temp_dir,
      
        775
                on_user_question=answer,
      
        776
            )
      
        777
        
        778
            round_three_prompt = backend.invocations[4].messages[-1].content
      
        779
            assert "Focus slot: non-goals" in round_three_prompt
      
        780
            assert "Pressure pass: example" in round_three_prompt
      
        781
            assert "Relevant repo facts:" in round_three_prompt
      
        782
            assert "workflow_lanes.py" in round_three_prompt
      
        783
            assert "clarify_strategy.py" in round_three_prompt
      
        784
            assert len(asked_questions) == 3
      
        785
            assert "out of scope" in asked_questions[2].lower()
      
        786
            assert "workflow_lanes.py" in asked_questions[2]
      
        787
            assert "clarify_strategy.py" in asked_questions[2]
      
        788
        
        789
        
        790
        @pytest.mark.asyncio
      
        791
        async def test_third_round_tradeoff_pressure_question_uses_nearby_repo_fact(
      
        792
            temp_dir: Path,
      
        793
        ) -> None:
      
        794
            seed_runtime_workspace(temp_dir)
      
        795
            backend = ScriptedBackend(
      
        796
                completions=[
      
        797
                    CompletionResponse(
      
        798
                        content="I need one clarification before I proceed.",
      
        799
                        tool_calls=[
      
        800
                            ToolCall(
      
        801
                                id="ask-1",
      
        802
                                name="AskUserQuestion",
      
        803
                                arguments={
      
        804
                                    "question": "Which runtime file should I focus on first?",
      
        805
                                },
      
        806
                            )
      
        807
                        ],
      
        808
                    ),
      
        809
                    CompletionResponse(
      
        810
                        content="\n".join(
      
        811
                            [
      
        812
                                "## Task Statement",
      
        813
                                "Tighten Loader runtime clarify behavior.",
      
        814
                                "",
      
        815
                                "## Desired Outcome",
      
        816
                                "- Keep clarify behavior grounded in brownfield repo facts.",
      
        817
                                "",
      
        818
                                "## In Scope",
      
        819
                                "- Focus on runtime lane handling first.",
      
        820
                                "",
      
        821
                                "## Constraints",
      
        822
                                "- Stay within the current repository.",
      
        823
                                "",
      
        824
                                "## Likely Touchpoints",
      
        825
                                "- src/loader/runtime/workflow_lanes.py",
      
        826
                                "",
      
        827
                                "## Acceptance Criteria",
      
        828
                                "- The next round clarifies what stays unchanged.",
      
        829
                            ]
      
        830
                        )
      
        831
                    ),
      
        832
                    CompletionResponse(content=""),
      
        833
                    CompletionResponse(
      
        834
                        content="\n".join(
      
        835
                            [
      
        836
                                "## Task Statement",
      
        837
                                "Tighten Loader runtime clarify behavior.",
      
        838
                                "",
      
        839
                                "## Desired Outcome",
      
        840
                                "- Keep clarify behavior grounded in brownfield repo facts.",
      
        841
                                "",
      
        842
                                "## In Scope",
      
        843
                                "- Focus on runtime lane handling first.",
      
        844
                                "",
      
        845
                                "## Constraints",
      
        846
                                "- Stay within the current repository.",
      
        847
                                "",
      
        848
                                "## Likely Touchpoints",
      
        849
                                "- src/loader/runtime/workflow_lanes.py",
      
        850
                                "",
      
        851
                                "## Acceptance Criteria",
      
        852
                                "- The next round still needs a clearer stop boundary.",
      
        853
                            ]
      
        854
                        )
      
        855
                    ),
      
        856
                    CompletionResponse(content=""),
      
        857
                    CompletionResponse(
      
        858
                        content="\n".join(
      
        859
                            [
      
        860
                                "## Task Statement",
      
        861
                                "Tighten Loader runtime clarify behavior.",
      
        862
                                "",
      
        863
                                "## Desired Outcome",
      
        864
                                "- Keep clarify behavior grounded in brownfield repo facts.",
      
        865
                                "",
      
        866
                                "## In Scope",
      
        867
                                "- Focus on runtime lane handling first.",
      
        868
                                "",
      
        869
                                "## Non Goals",
      
        870
                                "- Leave clarify strategy behavior unchanged for now.",
      
        871
                                "",
      
        872
                                "## Decision Boundaries",
      
        873
                                "- Stop and confirm before broadening beyond runtime lanes.",
      
        874
                                "",
      
        875
                                "## Constraints",
      
        876
                                "- Stay within the current repository.",
      
        877
                                "",
      
        878
                                "## Likely Touchpoints",
      
        879
                                "- src/loader/runtime/workflow_lanes.py",
      
        880
                                "",
      
        881
                                "## Acceptance Criteria",
      
        882
                                "- workflow_lanes.py remains the primary touchpoint.",
      
        883
                            ]
      
        884
                        )
      
        885
                    ),
      
        886
                    CompletionResponse(content="I can move forward now."),
      
        887
                    CompletionResponse(content="Done."),
      
        888
                    CompletionResponse(content="Done."),
      
        889
                ]
      
        890
            )
      
        891
        
        892
            asked_questions: list[str] = []
      
        893
            answers = iter(
      
        894
                [
      
        895
                    "Start with src/loader/runtime/workflow_lanes.py.",
      
        896
                    "Scope it to the runtime lane code.",
      
        897
                    "Keep clarify_strategy.py unchanged while we tighten the workflow lanes.",
      
        898
                ]
      
        899
            )
      
        900
        
        901
            async def answer(question: str, _: list[str] | None) -> str:
      
        902
                asked_questions.append(question)
      
        903
                return next(answers)
      
        904
        
        905
            await run_scenario(
      
        906
                "Tighten Loader runtime clarify behavior.",
      
        907
                backend,
      
        908
                config=non_streaming_pressure_clarify_config(),
      
        909
                project_root=temp_dir,
      
        910
                on_user_question=answer,
      
        911
            )
      
        912
        
        913
            round_three_prompt = backend.invocations[4].messages[-1].content
      
        914
            assert "Focus slot: non-goals" in round_three_prompt
      
        915
            assert "Pressure pass: tradeoff" in round_three_prompt
      
        916
            assert "Relevant repo facts:" in round_three_prompt
      
        917
            assert "clarify_strategy.py" in round_three_prompt
      
        918
            assert len(asked_questions) == 3
      
        919
            assert "broader edits would be easier" in asked_questions[2]
      
        920
            assert "clarify_strategy.py" in asked_questions[2]
      
        921
        
        922
        
        923
        @pytest.mark.asyncio
      
        924
        async def test_third_round_assumption_question_challenges_desired_outcome_assumptions(
      
        925
            temp_dir: Path,
      
        926
        ) -> None:
      
        927
            seed_runtime_workspace(temp_dir)
      
        928
            backend = ScriptedBackend(
      
        929
                completions=[
      
        930
                    CompletionResponse(
      
        931
                        content="I need one clarification before I proceed.",
      
        932
                        tool_calls=[
      
        933
                            ToolCall(
      
        934
                                id="ask-1",
      
        935
                                name="AskUserQuestion",
      
        936
                                arguments={
      
        937
                                    "question": "Which runtime file should I focus on first?",
      
        938
                                },
      
        939
                            )
      
        940
                        ],
      
        941
                    ),
      
        942
                    CompletionResponse(
      
        943
                        content="\n".join(
      
        944
                            [
      
        945
                                "## Task Statement",
      
        946
                                "Tighten Loader runtime clarify behavior.",
      
        947
                                "",
      
        948
                                "## In Scope",
      
        949
                                "- Focus on runtime lane handling first.",
      
        950
                                "",
      
        951
                                "## Non Goals",
      
        952
                                "- Do not broaden into unrelated CLI changes.",
      
        953
                                "",
      
        954
                                "## Decision Boundaries",
      
        955
                                "- Stop and confirm before broadening beyond runtime lanes.",
      
        956
                                "",
      
        957
                                "## Constraints",
      
        958
                                "- Stay within the current repository.",
      
        959
                                "",
      
        960
                                "## Likely Touchpoints",
      
        961
                                "- src/loader/runtime/workflow_lanes.py",
      
        962
                                "",
      
        963
                                "## Acceptance Criteria",
      
        964
                                "- The next clarify round makes the intended outcome explicit.",
      
        965
                            ]
      
        966
                        )
      
        967
                    ),
      
        968
                    CompletionResponse(content=""),
      
        969
                    CompletionResponse(
      
        970
                        content="\n".join(
      
        971
                            [
      
        972
                                "## Task Statement",
      
        973
                                "Tighten Loader runtime clarify behavior.",
      
        974
                                "",
      
        975
                                "## In Scope",
      
        976
                                "- Focus on runtime lane handling first.",
      
        977
                                "",
      
        978
                                "## Non Goals",
      
        979
                                "- Do not broaden into unrelated CLI changes.",
      
        980
                                "",
      
        981
                                "## Decision Boundaries",
      
        982
                                "- Stop and confirm before broadening beyond runtime lanes.",
      
        983
                                "",
      
        984
                                "## Constraints",
      
        985
                                "- Stay within the current repository.",
      
        986
                                "",
      
        987
                                "## Likely Touchpoints",
      
        988
                                "- src/loader/runtime/workflow_lanes.py",
      
        989
                                "",
      
        990
                                "## Acceptance Criteria",
      
        991
                                "- The next clarify round still needs a more explicit finished outcome.",
      
        992
                            ]
      
        993
                        )
      
        994
                    ),
      
        995
                    CompletionResponse(content=""),
      
        996
                    CompletionResponse(
      
        997
                        content="\n".join(
      
        998
                            [
      
        999
                                "## Task Statement",
      
        1000
                                "Tighten Loader runtime clarify behavior.",
      
        1001
                                "",
      
        1002
                                "## Desired Outcome",
      
        1003
                                "- Make clarify follow-up cite repo evidence before planning.",
      
        1004
                                "",
      
        1005
                                "## In Scope",
      
        1006
                                "- Focus on runtime lane handling first.",
      
        1007
                                "",
      
        1008
                                "## Non Goals",
      
        1009
                                "- Do not broaden into unrelated CLI changes.",
      
        1010
                                "",
      
        1011
                                "## Decision Boundaries",
      
        1012
                                "- Stop and confirm before broadening beyond runtime lanes.",
      
        1013
                                "",
      
        1014
                                "## Constraints",
      
        1015
                                "- Stay within the current repository.",
      
        1016
                                "",
      
        1017
                                "## Likely Touchpoints",
      
        1018
                                "- src/loader/runtime/workflow_lanes.py",
      
        1019
                                "",
      
        1020
                                "## Acceptance Criteria",
      
        1021
                                "- The clarify outcome is explicit before execution begins.",
      
        1022
                            ]
      
        1023
                        )
      
        1024
                    ),
      
        1025
                    CompletionResponse(content="I can move forward now."),
      
        1026
                    CompletionResponse(content="Done."),
      
        1027
                    CompletionResponse(content="Done."),
      
        1028
                ]
      
        1029
            )
      
        1030
        
        1031
            asked_questions: list[str] = []
      
        1032
            answers = iter(
      
        1033
                [
      
        1034
                    "Start with src/loader/runtime/workflow_lanes.py.",
      
        1035
                    "Make clarify follow-up cite repo evidence before planning.",
      
        1036
                    (
      
        1037
                        "The risky assumption would be broader runtime cleanup instead of "
      
        1038
                        "just grounded clarify follow-up."
      
        1039
                    ),
      
        1040
                ]
      
        1041
            )
      
        1042
        
        1043
            async def answer(question: str, _: list[str] | None) -> str:
      
        1044
                asked_questions.append(question)
      
        1045
                return next(answers)
      
        1046
        
        1047
            await run_scenario(
      
        1048
                "Tighten Loader runtime clarify behavior.",
      
        1049
                backend,
      
        1050
                config=non_streaming_pressure_clarify_config(),
      
        1051
                project_root=temp_dir,
      
        1052
                on_user_question=answer,
      
        1053
            )
      
        1054
        
        1055
            round_three_prompt = backend.invocations[4].messages[-1].content
      
        1056
            assert "Focus slot: desired outcome" in round_three_prompt
      
        1057
            assert "Pressure pass: assumption" in round_three_prompt
      
        1058
            assert "Relevant repo facts:" in round_three_prompt
      
        1059
            assert "workflow_lanes.py" in round_three_prompt
      
        1060
            assert len(asked_questions) == 3
      
        1061
            assert "assumption" in asked_questions[2].lower()
      
        1062
            assert "get wrong" in asked_questions[2].lower()
      
        1063
        
        1064
        
        1065
        @pytest.mark.asyncio
      
        1066
        async def test_complex_prompt_routes_to_plan_and_uses_verification_artifact(
      
        1067
            temp_dir: Path,
      
        1068
        ) -> None:
      
        1069
            target = temp_dir / "planned.txt"
      
        1070
            backend = ScriptedBackend(
      
        1071
                completions=[
      
        1072
                    CompletionResponse(
      
        1073
                        content="\n".join(
      
        1074
                            [
      
        1075
                                "# Implementation Plan",
      
        1076
                                "",
      
        1077
                                "## File Changes",
      
        1078
                                f"- Create {target.name} in the workspace root.",
      
        1079
                                "",
      
        1080
                                "## Execution Order",
      
        1081
                                f"1. Write {target.name}.",
      
        1082
                                "2. Confirm the file exists.",
      
        1083
                                "",
      
        1084
                                "## Risks",
      
        1085
                                "- Writing the wrong file path.",
      
        1086
                                "",
      
        1087
                                "<<<VERIFICATION>>>",
      
        1088
                                "",
      
        1089
                                "# Verification Plan",
      
        1090
                                "",
      
        1091
                                "## Acceptance Criteria",
      
        1092
                                f"- {target.name} exists in the workspace root.",
      
        1093
                                "",
      
        1094
                                "## Verification Commands",
      
        1095
                                f"- `test -f {target}`",
      
        1096
                                "",
      
        1097
                                "## Notes",
      
        1098
                                "- Use a deterministic file existence check.",
      
        1099
                            ]
      
        1100
                        )
      
        1101
                    ),
      
        1102
                    CompletionResponse(
      
        1103
                        content="I'll create the file now.",
      
        1104
                        tool_calls=[
      
        1105
                            ToolCall(
      
        1106
                                id="write-1",
      
        1107
                                name="write",
      
        1108
                                arguments={
      
        1109
                                    "file_path": str(target),
      
        1110
                                    "content": "planned output\n",
      
        1111
                                },
      
        1112
                            )
      
        1113
                        ],
      
        1114
                    ),
      
        1115
                    CompletionResponse(content="The file is in place."),
      
        1116
                ]
      
        1117
            )
      
        1118
        
        1119
            run = await run_scenario(
      
        1120
                "Implement a persistent workflow mode router with clarify artifacts, "
      
        1121
                "planning artifacts, and verification-plan wiring in the runtime.",
      
        1122
                backend,
      
        1123
                config=non_streaming_config(),
      
        1124
                project_root=temp_dir,
      
        1125
            )
      
        1126
        
        1127
            dod = run.agent.last_turn_summary.definition_of_done
      
        1128
            assert dod is not None
      
        1129
            assert workflow_modes(run)[:3] == ["plan", "execute", "verify"]
      
        1130
            assert artifact_kinds(run) == ["implementation_plan", "verification_plan"]
      
        1131
            assert dod.implementation_plan is not None
      
        1132
            assert dod.verification_plan is not None
      
        1133
            assert Path(dod.implementation_plan).exists()
      
        1134
            assert Path(dod.verification_plan).exists()
      
        1135
            implementation_markdown = Path(dod.implementation_plan).read_text()
      
        1136
            verification_markdown = Path(dod.verification_plan).read_text()
      
        1137
            assert "single-pass planning artifact generation" in implementation_markdown
      
        1138
            assert "planner/critic consensus loop" in implementation_markdown
      
        1139
            assert "single-pass planning artifact generation" in verification_markdown
      
        1140
            assert run.agent.session.workflow_artifact_status == "active"
      
        1141
            assert run.agent.session.workflow_artifact_sources == [
      
        1142
                "implementation_plan",
      
        1143
                "verification_plan",
      
        1144
            ]
      
        1145
            assert not any(event.type == "decomposition" for event in run.events)
      
        1146
            assert not any(event.type == "subtask" for event in run.events)
      
        1147
            assert dod.verification_commands == [f"test -f {target}"]
      
        1148
            assert "## Plan Mode" in backend.invocations[0].messages[0].content
      
        1149
            assert run.agent.last_turn_summary is not None
      
        1150
            assert run.agent.last_turn_summary.workflow_mode == "verify"
      
        1151
            assert run.agent.last_turn_summary.workflow_reason_code == (
      
        1152
                "definition_of_done_requires_verification"
      
        1153
            )
      
        1154
            assert run.agent.last_turn_summary.workflow_decision_kind == "handoff"
      
        1155
            timeline = run.agent.last_turn_summary.workflow_timeline
      
        1156
            assert any(
      
        1157
                entry.mode == "execute"
      
        1158
                and entry.reason_code == "verification_planned"
      
        1159
                and entry.policy_outcome == "planned"
      
        1160
                for entry in timeline
      
        1161
            )
      
        1162
            assert any(
      
        1163
                entry.mode == "verify"
      
        1164
                and entry.reason_code == "verification_pending"
      
        1165
                and entry.policy_outcome == "pending"
      
        1166
                for entry in timeline
      
        1167
            )
      
        1168
            verify_calls = [
      
        1169
                event
      
        1170
                for event in run.events
      
        1171
                if event.type == "tool_call" and event.phase == "verification"
      
        1172
            ]
      
        1173
            assert [event.tool_args["command"] for event in verify_calls] == [f"test -f {target}"]
      
        1174
        
        1175
        
        1176
        @pytest.mark.asyncio
      
        1177
        async def test_verify_failure_returns_to_execute_without_retriggering_plan(
      
        1178
            temp_dir: Path,
      
        1179
        ) -> None:
      
        1180
            target = temp_dir / "retry.txt"
      
        1181
            backend = ScriptedBackend(
      
        1182
                completions=[
      
        1183
                    CompletionResponse(
      
        1184
                        content="\n".join(
      
        1185
                            [
      
        1186
                                "# Implementation Plan",
      
        1187
                                "",
      
        1188
                                "## File Changes",
      
        1189
                                f"- Create {target.name}.",
      
        1190
                                "",
      
        1191
                                "## Execution Order",
      
        1192
                                f"1. Write {target.name}.",
      
        1193
                                "2. Fix it if verification fails.",
      
        1194
                                "",
      
        1195
                                "## Risks",
      
        1196
                                "- Initial content may be wrong.",
      
        1197
                                "",
      
        1198
                                "<<<VERIFICATION>>>",
      
        1199
                                "",
      
        1200
                                "# Verification Plan",
      
        1201
                                "",
      
        1202
                                "## Acceptance Criteria",
      
        1203
                                "- The file contains the word fixed.",
      
        1204
                                "",
      
        1205
                                "## Verification Commands",
      
        1206
                                f"- `grep -q fixed {target}`",
      
        1207
                                "",
      
        1208
                                "## Notes",
      
        1209
                                "- Retry if the first write misses the target string.",
      
        1210
                            ]
      
        1211
                        )
      
        1212
                    ),
      
        1213
                    CompletionResponse(
      
        1214
                        content="I'll write the first draft.",
      
        1215
                        tool_calls=[
      
        1216
                            ToolCall(
      
        1217
                                id="write-1",
      
        1218
                                name="write",
      
        1219
                                arguments={
      
        1220
                                    "file_path": str(target),
      
        1221
                                    "content": "draft output\n",
      
        1222
                                },
      
        1223
                            )
      
        1224
                        ],
      
        1225
                    ),
      
        1226
                    CompletionResponse(content="First draft is written."),
      
        1227
                    CompletionResponse(
      
        1228
                        content="I'll correct the file.",
      
        1229
                        tool_calls=[
      
        1230
                            ToolCall(
      
        1231
                                id="write-2",
      
        1232
                                name="write",
      
        1233
                                arguments={
      
        1234
                                    "file_path": str(target),
      
        1235
                                    "content": "fixed output\n",
      
        1236
                                },
      
        1237
                            )
      
        1238
                        ],
      
        1239
                    ),
      
        1240
                    CompletionResponse(content="The file now contains the fixed output."),
      
        1241
                ]
      
        1242
            )
      
        1243
        
        1244
            run = await run_scenario(
      
        1245
                "Implement a persistent workflow mode router with clarify artifacts, "
      
        1246
                "planning artifacts, and verification-plan wiring in the runtime.",
      
        1247
                backend,
      
        1248
                config=non_streaming_config(),
      
        1249
                project_root=temp_dir,
      
        1250
            )
      
        1251
        
        1252
            modes = workflow_modes(run)
      
        1253
            assert modes.count("plan") == 1
      
        1254
            assert modes.count("clarify") == 0
      
        1255
            assert modes.count("execute") >= 2
      
        1256
            assert modes.count("verify") >= 2
      
        1257
            assert "fixed output" in target.read_text()
      
        1258
        
        1259
        
        1260
        @pytest.mark.asyncio
      
        1261
        async def test_plan_mode_recovers_verification_commands_from_legacy_separator(
      
        1262
            temp_dir: Path,
      
        1263
        ) -> None:
      
        1264
            target = temp_dir / "planned.txt"
      
        1265
            backend = ScriptedBackend(
      
        1266
                completions=[
      
        1267
                    CompletionResponse(
      
        1268
                        content="\n".join(
      
        1269
                            [
      
        1270
                                "# Implementation Plan",
      
        1271
                                "",
      
        1272
                                "## File Changes",
      
        1273
                                f"- Create {target.name} in the workspace root.",
      
        1274
                                "",
      
        1275
                                "## Execution Order",
      
        1276
                                f"1. Write {target.name}.",
      
        1277
                                "2. Verify the file exists.",
      
        1278
                                "",
      
        1279
                                "## Risks",
      
        1280
                                "- Losing the verification commands during parsing.",
      
        1281
                                "",
      
        1282
                                "# Verification Plan",
      
        1283
                                "",
      
        1284
                                "## Acceptance Criteria",
      
        1285
                                f"- {target.name} exists in the workspace root.",
      
        1286
                                "",
      
        1287
                                "## Verification Commands",
      
        1288
                                f"- `test -f {target}`",
      
        1289
                                "",
      
        1290
                                "## Notes",
      
        1291
                                "- This simulates a legacy separator emitted after the plan body.",
      
        1292
                                "",
      
        1293
                                "<<VERIFICATION>>",
      
        1294
                            ]
      
        1295
                        )
      
        1296
                    ),
      
        1297
                    CompletionResponse(
      
        1298
                        content="I'll create the planned artifact.",
      
        1299
                        tool_calls=[
      
        1300
                            ToolCall(
      
        1301
                                id="write-1",
      
        1302
                                name="write",
      
        1303
                                arguments={
      
        1304
                                    "file_path": str(target),
      
        1305
                                    "content": "planned output\n",
      
        1306
                                },
      
        1307
                            )
      
        1308
                        ],
      
        1309
                    ),
      
        1310
                    CompletionResponse(content="The planned artifact is in place."),
      
        1311
                ]
      
        1312
            )
      
        1313
        
        1314
            run = await run_scenario(
      
        1315
                "Implement a persistent workflow mode router with clarify artifacts, "
      
        1316
                "planning artifacts, and verification-plan wiring in the runtime.",
      
        1317
                backend,
      
        1318
                config=non_streaming_config(),
      
        1319
                project_root=temp_dir,
      
        1320
            )
      
        1321
        
        1322
            dod = run.agent.last_turn_summary.definition_of_done
      
        1323
            assert dod is not None
      
        1324
            assert dod.verification_commands == [f"test -f {target}"]
      
        1325
            assert verification_commands(run) == [f"test -f {target}"]
      
        1326
            assert Path(dod.verification_plan).read_text().count("## Verification Commands") == 1
      
        1327
        
        1328
        
        1329
        @pytest.mark.asyncio
      
        1330
        async def test_stale_plan_artifacts_trigger_targeted_plan_refresh(
      
        1331
            temp_dir: Path,
      
        1332
        ) -> None:
      
        1333
            target = temp_dir / "notes.txt"
      
        1334
            backend = ScriptedBackend(
      
        1335
                completions=[
      
        1336
                    CompletionResponse(
      
        1337
                        content="\n".join(
      
        1338
                            [
      
        1339
                                "# Implementation Plan",
      
        1340
                                "",
      
        1341
                                "## File Changes",
      
        1342
                                "- Create planned.txt in the workspace root.",
      
        1343
                                "",
      
        1344
                                "## Execution Order",
      
        1345
                                "1. Write planned.txt.",
      
        1346
                                "",
      
        1347
                                "## Risks",
      
        1348
                                "- Choosing the wrong file path.",
      
        1349
                                "",
      
        1350
                                "<<<VERIFICATION>>>",
      
        1351
                                "",
      
        1352
                                "# Verification Plan",
      
        1353
                                "",
      
        1354
                                "## Acceptance Criteria",
      
        1355
                                "- planned.txt exists.",
      
        1356
                                "",
      
        1357
                                "## Verification Commands",
      
        1358
                                f"- `test -f {temp_dir / 'planned.txt'}`",
      
        1359
                                "",
      
        1360
                                "## Notes",
      
        1361
                                "- Verify the originally planned file.",
      
        1362
                            ]
      
        1363
                        )
      
        1364
                    ),
      
        1365
                    CompletionResponse(
      
        1366
                        content="I'll create the audit notes file first.",
      
        1367
                        tool_calls=[
      
        1368
                            ToolCall(
      
        1369
                                id="write-1",
      
        1370
                                name="write",
      
        1371
                                arguments={
      
        1372
                                    "file_path": str(target),
      
        1373
                                    "content": "runtime notes\n",
      
        1374
                                },
      
        1375
                            )
      
        1376
                        ],
      
        1377
                    ),
      
        1378
                    CompletionResponse(
      
        1379
                        content="\n".join(
      
        1380
                            [
      
        1381
                                "# Implementation Plan",
      
        1382
                                "",
      
        1383
                                "## File Changes",
      
        1384
                                f"- Keep {target.name} as the runtime audit artifact.",
      
        1385
                                "",
      
        1386
                                "## Execution Order",
      
        1387
                                f"1. Confirm {target.name} is the intended output.",
      
        1388
                                "",
      
        1389
                                "## Risks",
      
        1390
                                "- Accidentally verifying the stale plan output.",
      
        1391
                                "",
      
        1392
                                "<<<VERIFICATION>>>",
      
        1393
                                "",
      
        1394
                                "# Verification Plan",
      
        1395
                                "",
      
        1396
                                "## Acceptance Criteria",
      
        1397
                                f"- {target.name} exists in the workspace root.",
      
        1398
                                "",
      
        1399
                                "## Verification Commands",
      
        1400
                                f"- `test -f {target}`",
      
        1401
                                "",
      
        1402
                                "## Notes",
      
        1403
                                "- Refresh the plan around the actual artifact.",
      
        1404
                            ]
      
        1405
                        )
      
        1406
                    ),
      
        1407
                    CompletionResponse(content="The refreshed plan matches the notes artifact."),
      
        1408
                ]
      
        1409
            )
      
        1410
        
        1411
            run = await run_scenario(
      
        1412
                "Implement a persistent workflow artifact with planning artifacts, "
      
        1413
                "verification commands, and plan refresh discipline so Loader can refresh stale plans.",
      
        1414
                backend,
      
        1415
                config=non_streaming_config(),
      
        1416
                project_root=temp_dir,
      
        1417
            )
      
        1418
        
        1419
            modes = workflow_modes(run)
      
        1420
            assert modes.count("plan") == 2
      
        1421
            assert modes.count("execute") >= 2
      
        1422
            assert modes[-1] == "verify"
      
        1423
            assert artifact_kinds(run).count("implementation_plan") == 2
      
        1424
            assert artifact_kinds(run).count("verification_plan") == 2
      
        1425
            assert target.read_text() == "runtime notes\n"
      
        1426
            assert any(
      
        1427
                entry.reason_code == "stale_plan_artifacts"
      
        1428
                for entry in run.agent.last_turn_summary.workflow_timeline
      
        1429
            )
      
        1430
            stale_entry = next(
      
        1431
                entry
      
        1432
                for entry in run.agent.last_turn_summary.workflow_timeline
      
        1433
                if entry.reason_code == "stale_plan_artifacts"
      
        1434
            )
      
        1435
            assert any("confirmed touchpoint" in item for item in stale_entry.evidence_summary)
      
        1436
            assert any("acceptance anchor" in item for item in stale_entry.evidence_summary)
      
        1437
            assert any(
      
        1438
                entry.reason_code == "plan_refresh_completed"
      
        1439
                for entry in run.agent.last_turn_summary.workflow_timeline
      
        1440
            )
      
        1441
            refresh_prompt = next(
      
        1442
                invocation.messages[-1].content
      
        1443
                for invocation in backend.invocations
      
        1444
                if "Refresh the existing planning artifacts instead of creating a fresh plan from scratch."
      
        1445
                in invocation.messages[-1].content
      
        1446
            )
      
        1447
            assert "Current execution progress:" in refresh_prompt
      
        1448
            assert "Already touched during execution:" in refresh_prompt
      
        1449
            assert f"- {target}" in refresh_prompt
      
        1450
            assert any(
      
        1451
                "Plan refresh preserved the progress already made." in message.content
      
        1452
                and "Do not restart from initial discovery" in message.content
      
        1453
                for invocation in backend.invocations
      
        1454
                for message in invocation.messages
      
        1455
            )
      
        1456
        
        1457
        
        1458
        @pytest.mark.asyncio
      
        1459
        async def test_full_replan_can_reenter_clarify_before_rebuilding_plan(
      
        1460
            temp_dir: Path,
      
        1461
        ) -> None:
      
        1462
            task = (
      
        1463
                "Don't assume the scope: improve Loader so it feels more like claw-code "
      
        1464
                "while tightening workflow artifacts."
      
        1465
            )
      
        1466
            target = temp_dir / "notes.txt"
      
        1467
            backend = ScriptedBackend(
      
        1468
                completions=[
      
        1469
                    CompletionResponse(
      
        1470
                        content="I need one clarification before planning.",
      
        1471
                        tool_calls=[
      
        1472
                            ToolCall(
      
        1473
                                id="ask-1",
      
        1474
                                name="AskUserQuestion",
      
        1475
                                arguments={
      
        1476
                                    "question": "What outcome matters most for this Loader improvement?",
      
        1477
                                },
      
        1478
                            )
      
        1479
                        ],
      
        1480
                    ),
      
        1481
                    CompletionResponse(
      
        1482
                        content="\n".join(
      
        1483
                            [
      
        1484
                                "## Task Statement",
      
        1485
                                task,
      
        1486
                                "",
      
        1487
                                "## Desired Outcome",
      
        1488
                                "- Improve the runtime workflow around the planned artifact.",
      
        1489
                                "",
      
        1490
                                "## Non Goals",
      
        1491
                                "- Do not redesign the CLI surface.",
      
        1492
                                "",
      
        1493
                                "## Decision Boundaries",
      
        1494
                                "- Escalate before broad UX changes.",
      
        1495
                                "",
      
        1496
                                "## Constraints",
      
        1497
                                "- Stay within the current repository conventions.",
      
        1498
                                "",
      
        1499
                                "## Likely Touchpoints",
      
        1500
                                "- planned.txt",
      
        1501
                                "",
      
        1502
                                "## Acceptance Criteria",
      
        1503
                                "- planned.txt exists in the workspace root.",
      
        1504
                            ]
      
        1505
                        )
      
        1506
                    ),
      
        1507
                    CompletionResponse(
      
        1508
                        content="\n".join(
      
        1509
                            [
      
        1510
                                "# Implementation Plan",
      
        1511
                                "",
      
        1512
                                "## File Changes",
      
        1513
                                "- Create planned.txt in the workspace root.",
      
        1514
                                "",
      
        1515
                                "## Execution Order",
      
        1516
                                "1. Write planned.txt.",
      
        1517
                                "",
      
        1518
                                "## Risks",
      
        1519
                                "- Choosing the wrong output artifact.",
      
        1520
                                "",
      
        1521
                                "<<<VERIFICATION>>>",
      
        1522
                                "",
      
        1523
                                "# Verification Plan",
      
        1524
                                "",
      
        1525
                                "## Acceptance Criteria",
      
        1526
                                "- planned.txt exists.",
      
        1527
                                "",
      
        1528
                                "## Verification Commands",
      
        1529
                                f"- `test -f {temp_dir / 'planned.txt'}`",
      
        1530
                                "",
      
        1531
                                "## Notes",
      
        1532
                                "- Verify the originally planned artifact.",
      
        1533
                            ]
      
        1534
                        )
      
        1535
                    ),
      
        1536
                    CompletionResponse(
      
        1537
                        content="I'll create the notes artifact first.",
      
        1538
                        tool_calls=[
      
        1539
                            ToolCall(
      
        1540
                                id="write-1",
      
        1541
                                name="write",
      
        1542
                                arguments={
      
        1543
                                    "file_path": str(target),
      
        1544
                                    "content": "runtime notes\n",
      
        1545
                                },
      
        1546
                            )
      
        1547
                        ],
      
        1548
                    ),
      
        1549
                    CompletionResponse(
      
        1550
                        content="I need one more clarification before rebuilding the plan.",
      
        1551
                        tool_calls=[
      
        1552
                            ToolCall(
      
        1553
                                id="ask-2",
      
        1554
                                name="AskUserQuestion",
      
        1555
                                arguments={
      
        1556
                                    "question": (
      
        1557
                                        "Which file should I actually focus on, "
      
        1558
                                        "and what should stay unchanged?"
      
        1559
                                    ),
      
        1560
                                },
      
        1561
                            )
      
        1562
                        ],
      
        1563
                    ),
      
        1564
                    CompletionResponse(
      
        1565
                        content="\n".join(
      
        1566
                            [
      
        1567
                                "## Task Statement",
      
        1568
                                task,
      
        1569
                                "",
      
        1570
                                "## Desired Outcome",
      
        1571
                                "- Keep the runtime artifact aligned with the actual work.",
      
        1572
                                "",
      
        1573
                                "## Non Goals",
      
        1574
                                "- Do not change the CLI surface.",
      
        1575
                                "",
      
        1576
                                "## Decision Boundaries",
      
        1577
                                "- Escalate before touching unrelated modules.",
      
        1578
                                "",
      
        1579
                                "## Constraints",
      
        1580
                                "- Stay within the repository.",
      
        1581
                                "",
      
        1582
                                "## Likely Touchpoints",
      
        1583
                                f"- {target.name}",
      
        1584
                                "",
      
        1585
                                "## Acceptance Criteria",
      
        1586
                                f"- {target.name} exists in the workspace root.",
      
        1587
                            ]
      
        1588
                        )
      
        1589
                    ),
      
        1590
                    CompletionResponse(
      
        1591
                        content="\n".join(
      
        1592
                            [
      
        1593
                                "# Implementation Plan",
      
        1594
                                "",
      
        1595
                                "## File Changes",
      
        1596
                                f"- Keep {target.name} as the runtime artifact.",
      
        1597
                                "",
      
        1598
                                "## Execution Order",
      
        1599
                                f"1. Confirm {target.name} remains the intended output.",
      
        1600
                                "",
      
        1601
                                "## Risks",
      
        1602
                                "- Accidentally verifying the stale artifact name.",
      
        1603
                                "",
      
        1604
                                "<<<VERIFICATION>>>",
      
        1605
                                "",
      
        1606
                                "# Verification Plan",
      
        1607
                                "",
      
        1608
                                "## Acceptance Criteria",
      
        1609
                                f"- {target.name} exists in the workspace root.",
      
        1610
                                "",
      
        1611
                                "## Verification Commands",
      
        1612
                                f"- `test -f {target}`",
      
        1613
                                "",
      
        1614
                                "## Notes",
      
        1615
                                "- Rebuild the plan around the actual runtime artifact.",
      
        1616
                            ]
      
        1617
                        )
      
        1618
                    ),
      
        1619
                    CompletionResponse(
      
        1620
                        content="The refreshed brief and plan now match the notes artifact."
      
        1621
                    ),
      
        1622
                    CompletionResponse(
      
        1623
                        content="The refreshed brief and plan now match the notes artifact."
      
        1624
                    ),
      
        1625
                ]
      
        1626
            )
      
        1627
        
        1628
            answers = iter(
      
        1629
                [
      
        1630
                    (
      
        1631
                        "Focus on the planned runtime artifact, keep the CLI unchanged, "
      
        1632
                        "and stop before broad UX changes."
      
        1633
                    ),
      
        1634
                    "Focus on notes.txt and keep the CLI unchanged.",
      
        1635
                ]
      
        1636
            )
      
        1637
        
        1638
            async def answer(_: str, __: list[str] | None) -> str:
      
        1639
                return next(answers)
      
        1640
        
        1641
            run = await run_scenario(
      
        1642
                task,
      
        1643
                backend,
      
        1644
                config=non_streaming_config(),
      
        1645
                project_root=temp_dir,
      
        1646
                on_user_question=answer,
      
        1647
            )
      
        1648
        
        1649
            modes = workflow_modes(run)
      
        1650
            assert modes.count("clarify") >= 2
      
        1651
            assert modes.count("plan") == 2
      
        1652
            assert modes.count("execute") >= 2
      
        1653
            assert modes[-1] == "verify"
      
        1654
            assert target.read_text() == "runtime notes\n"
      
        1655
            assert any(
      
        1656
                entry.reason_code == "full_replan_requires_clarify"
      
        1657
                for entry in run.agent.last_turn_summary.workflow_timeline
      
        1658
            )
      
        1659
            assert any(
      
        1660
                entry.reason_code == "full_replan_required"
      
        1661
                for entry in run.agent.last_turn_summary.workflow_timeline
      
        1662
            )
      
        1663
            assert any(
      
        1664
                item.status == "contradicted"
      
        1665
                for item in run.agent.session.workflow_ledger.assumptions
      
        1666
            )
      
        1667
            assert any(
      
        1668
                item.status == "changed"
      
        1669
                for item in run.agent.session.workflow_ledger.acceptance_anchors
      
        1670
            )