loader Public

Watch 0 Fork 0 Star 0
Python · 62119 bytes Raw Blame History
  
        1
        """Deterministic runtime parity coverage for the current Loader loop."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        import json
      
        6
        from pathlib import Path
      
        7
        
        8
        import pytest
      
        9
        
        10
        from loader.agent.loop import Agent, AgentConfig
      
        11
        from loader.llm.base import CompletionResponse, Role, StreamChunk, ToolCall
      
        12
        from loader.runtime.capabilities import resolve_capability_profile
      
        13
        from loader.runtime.permissions import PermissionMode
      
        14
        from tests.helpers.runtime_harness import (
      
        15
            ScriptedBackend,
      
        16
            run_explore_scenario,
      
        17
            run_scenario,
      
        18
        )
      
        19
        
        20
        SCENARIO_NAMES = [
      
        21
            "streaming_text",
      
        22
            "read_file_roundtrip",
      
        23
            "multi_tool_turn_roundtrip",
      
        24
            "turn_summary_smoke_for_multi_tool_turn",
      
        25
            "write_file_allowed",
      
        26
            "write_file_denied",
      
        27
            "bash_stdout_roundtrip",
      
        28
            "bash_confirmation_prompt_approved",
      
        29
            "bash_confirmation_prompt_denied",
      
        30
            "read_only_mode_denies_write",
      
        31
            "read_only_mode_denies_mutating_bash",
      
        32
            "read_only_mode_allows_safe_bash",
      
        33
            "workspace_write_denies_write_outside_root",
      
        34
            "danger_full_access_allows_dangerous_bash",
      
        35
            "prompt_mode_prompts_destructive_write",
      
        36
            "allow_mode_skips_prompt_for_destructive_write",
      
        37
            "deny_rule_blocks_allowed_mode",
      
        38
            "ask_rule_prompts_even_when_mode_would_allow",
      
        39
            "raw_json_tool_call_fallback",
      
        40
            "raw_json_todowrite_tool_call_fallback",
      
        41
            "raw_json_patch_tool_call_fallback",
      
        42
            "raw_json_ask_user_question_tool_call_fallback",
      
        43
            "raw_bracket_ask_user_question_tool_call_fallback",
      
        44
            "native_and_raw_tool_paths_share_executor_trace",
      
        45
            "backend_capability_probe_refreshes_native_tool_mode",
      
        46
            "run_streaming_delegates_to_primary_runtime",
      
        47
            "definition_of_done_verify_phase",
      
        48
            "verify_failure_routes_to_fix_loop",
      
        49
            "verify_retry_budget_exhaustion",
      
        50
            "ambiguous_prompt_routes_to_clarify",
      
        51
            "complex_prompt_routes_to_plan",
      
        52
            "verify_failure_fix_loop_does_not_reroute_workflow",
      
        53
            "conversational_task_skips_verify_phase",
      
        54
            "explore_mode_skips_dod_and_router",
      
        55
            "explore_mode_denies_write",
      
        56
            "explore_mode_ignores_global_allow_policy",
      
        57
            "non_mutating_completion_no_longer_forces_continuation",
      
        58
            "tool_result_contract_regression",
      
        59
        ]
      
        60
        
        61
        
        62
        def load_manifest() -> list[dict[str, str]]:
      
        63
            """Load the auditable parity scenario manifest."""
      
        64
        
        65
            manifest_path = Path(__file__).parent / "fixtures" / "runtime_parity_manifest.json"
      
        66
            return json.loads(manifest_path.read_text())
      
        67
        
        68
        
        69
        def non_streaming_config(*, completion_check: bool = False) -> AgentConfig:
      
        70
            """Shared config for deterministic complete() tests."""
      
        71
        
        72
            config = AgentConfig(auto_context=False, stream=False, max_iterations=8)
      
        73
            config.reasoning.completion_check = completion_check
      
        74
            return config
      
        75
        
        76
        
        77
        def native_tool_response(
      
        78
            *tool_calls: ToolCall,
      
        79
            content: str = "Using tools.",
      
        80
        ) -> CompletionResponse:
      
        81
            """Build a completion that includes native tool calls."""
      
        82
        
        83
            return CompletionResponse(content=content, tool_calls=list(tool_calls))
      
        84
        
        85
        
        86
        def final_response(content: str) -> CompletionResponse:
      
        87
            """Build a completion with no further tool calls."""
      
        88
        
        89
            return CompletionResponse(content=content)
      
        90
        
        91
        
        92
        def tool_event_names(run) -> list[str]:
      
        93
            """Return emitted tool event names in order."""
      
        94
        
        95
            return [
      
        96
                event.tool_name
      
        97
                for event in run.events
      
        98
                if event.type == "tool_call" and event.tool_name and event.phase != "verification"
      
        99
            ]
      
        100
        
        101
        
        102
        def tool_result_messages(run) -> list[str]:
      
        103
            """Return emitted tool result messages in order."""
      
        104
        
        105
            return [
      
        106
                event.content
      
        107
                for event in run.events
      
        108
                if event.type == "tool_result" and event.phase != "verification"
      
        109
            ]
      
        110
        
        111
        
        112
        def verification_commands(run) -> list[str]:
      
        113
            """Return verification-phase bash commands."""
      
        114
        
        115
            return [
      
        116
                str((event.tool_args or {}).get("command", ""))
      
        117
                for event in run.events
      
        118
                if event.type == "tool_call" and event.phase == "verification"
      
        119
            ]
      
        120
        
        121
        
        122
        def trace_event_names(run) -> list[str]:
      
        123
            """Return recorded runtime trace event names."""
      
        124
        
        125
            summary = run.agent.last_turn_summary
      
        126
            assert summary is not None
      
        127
            return [event.name for event in summary.trace]
      
        128
        
        129
        
        130
        def dod_statuses(run) -> list[str]:
      
        131
            """Return DoD statuses emitted during a run."""
      
        132
        
        133
            return [
      
        134
                event.dod_status
      
        135
                for event in run.events
      
        136
                if event.type == "dod_status" and event.dod_status
      
        137
            ]
      
        138
        
        139
        
        140
        def workflow_modes(run) -> list[str]:
      
        141
            """Return emitted workflow modes in order."""
      
        142
        
        143
            return [
      
        144
                event.workflow_mode
      
        145
                for event in run.events
      
        146
                if event.type == "workflow_mode" and event.workflow_mode
      
        147
            ]
      
        148
        
        149
        
        150
        def artifact_kinds(run) -> list[str]:
      
        151
            """Return emitted artifact kinds in order."""
      
        152
        
        153
            return [
      
        154
                event.artifact_kind
      
        155
                for event in run.events
      
        156
                if event.type == "artifact" and event.artifact_kind
      
        157
            ]
      
        158
        
        159
        
        160
        @pytest.mark.asyncio
      
        161
        async def test_runtime_parity_manifest_matches_implemented_cases() -> None:
      
        162
            manifest_names = [entry["name"] for entry in load_manifest()]
      
        163
            assert manifest_names == SCENARIO_NAMES
      
        164
        
        165
        
        166
        @pytest.mark.asyncio
      
        167
        async def test_streaming_text_scenario() -> None:
      
        168
            backend = ScriptedBackend(
      
        169
                streams=[
      
        170
                    [
      
        171
                        StreamChunk(content="Mock streaming ", is_done=False),
      
        172
                        StreamChunk(
      
        173
                            content="says hello from Loader.",
      
        174
                            full_content="Mock streaming says hello from Loader.",
      
        175
                            is_done=True,
      
        176
                        ),
      
        177
                    ]
      
        178
                ]
      
        179
            )
      
        180
        
        181
            run = await run_scenario("hello there", backend, config=AgentConfig(auto_context=False))
      
        182
        
        183
            assert run.response == "Mock streaming says hello from Loader."
      
        184
            assert [call.mode for call in run.invocations] == ["stream"]
      
        185
            assert not tool_event_names(run)
      
        186
        
        187
        
        188
        @pytest.mark.asyncio
      
        189
        async def test_read_file_roundtrip(temp_dir: Path) -> None:
      
        190
            fixture = temp_dir / "fixture.txt"
      
        191
            fixture.write_text("alpha parity line\nbeta line\n")
      
        192
        
        193
            backend = ScriptedBackend(
      
        194
                completions=[
      
        195
                    native_tool_response(
      
        196
                        ToolCall(id="read-1", name="read", arguments={"file_path": str(fixture)}),
      
        197
                        content="I'll inspect that file.",
      
        198
                    ),
      
        199
                    final_response("The file contains alpha parity line."),
      
        200
                ]
      
        201
            )
      
        202
        
        203
            run = await run_scenario(
      
        204
                "Read the fixture file and summarize it.",
      
        205
                backend,
      
        206
                config=non_streaming_config(),
      
        207
                project_root=temp_dir,
      
        208
            )
      
        209
        
        210
            assert "alpha parity line" in run.response
      
        211
            assert tool_event_names(run) == ["read"]
      
        212
            assert any("alpha parity line" in message for message in tool_result_messages(run))
      
        213
            assert len(run.invocations) == 2
      
        214
            assert any(message.role == Role.TOOL for message in run.invocations[1].messages)
      
        215
        
        216
        
        217
        @pytest.mark.asyncio
      
        218
        @pytest.mark.parametrize("alias_key", ["file", "filepath"])
      
        219
        async def test_read_file_alias_roundtrip(temp_dir: Path, alias_key: str) -> None:
      
        220
            fixture = temp_dir / "fixture.txt"
      
        221
            fixture.write_text("alpha parity line\nbeta line\n")
      
        222
        
        223
            backend = ScriptedBackend(
      
        224
                completions=[
      
        225
                    native_tool_response(
      
        226
                        ToolCall(id="read-1", name="read", arguments={alias_key: str(fixture)}),
      
        227
                        content="I'll inspect that file.",
      
        228
                    ),
      
        229
                    final_response("The file contains alpha parity line."),
      
        230
                ]
      
        231
            )
      
        232
        
        233
            run = await run_scenario(
      
        234
                "Read the fixture file and summarize it.",
      
        235
                backend,
      
        236
                config=non_streaming_config(),
      
        237
                project_root=temp_dir,
      
        238
            )
      
        239
        
        240
            assert "alpha parity line" in run.response
      
        241
            assert tool_event_names(run) == ["read"]
      
        242
            assert any("alpha parity line" in message for message in tool_result_messages(run))
      
        243
        
        244
        
        245
        @pytest.mark.asyncio
      
        246
        async def test_multi_tool_turn_roundtrip(temp_dir: Path) -> None:
      
        247
            fixture = temp_dir / "fixture.txt"
      
        248
            fixture.write_text("alpha parity line\nbeta line\ngamma parity line\n")
      
        249
        
        250
            backend = ScriptedBackend(
      
        251
                completions=[
      
        252
                    native_tool_response(
      
        253
                        ToolCall(id="read-1", name="read", arguments={"file_path": str(fixture)}),
      
        254
                        ToolCall(
      
        255
                            id="grep-1",
      
        256
                            name="grep",
      
        257
                            arguments={"pattern": "parity", "path": str(fixture)},
      
        258
                        ),
      
        259
                        content="I'll inspect the file and count parity matches.",
      
        260
                    ),
      
        261
                    final_response("The file has two parity lines, including alpha parity line."),
      
        262
                ]
      
        263
            )
      
        264
        
        265
            run = await run_scenario(
      
        266
                "Inspect the fixture and find parity lines.",
      
        267
                backend,
      
        268
                config=non_streaming_config(),
      
        269
                project_root=temp_dir,
      
        270
            )
      
        271
        
        272
            assert tool_event_names(run) == ["read", "grep"]
      
        273
            assert len(tool_result_messages(run)) == 2
      
        274
            assert "two parity lines" in run.response
      
        275
        
        276
        
        277
        @pytest.mark.asyncio
      
        278
        async def test_turn_summary_smoke_for_multi_tool_turn(temp_dir: Path) -> None:
      
        279
            fixture = temp_dir / "fixture.txt"
      
        280
            fixture.write_text("alpha parity line\nbeta line\ngamma parity line\n")
      
        281
        
        282
            backend = ScriptedBackend(
      
        283
                completions=[
      
        284
                    native_tool_response(
      
        285
                        ToolCall(id="read-1", name="read", arguments={"file_path": str(fixture)}),
      
        286
                        ToolCall(
      
        287
                            id="grep-1",
      
        288
                            name="grep",
      
        289
                            arguments={"pattern": "parity", "path": str(fixture)},
      
        290
                        ),
      
        291
                        content="I'll inspect the file and count parity matches.",
      
        292
                    ),
      
        293
                    final_response("The file has two parity lines, including alpha parity line."),
      
        294
                ]
      
        295
            )
      
        296
        
        297
            run = await run_scenario(
      
        298
                "Inspect the fixture and find parity lines.",
      
        299
                backend,
      
        300
                config=non_streaming_config(),
      
        301
                project_root=temp_dir,
      
        302
            )
      
        303
        
        304
            summary = run.agent.last_turn_summary
      
        305
            assert summary is not None
      
        306
            assert summary.final_response == run.response
      
        307
            assert summary.iterations == 2
      
        308
            assert len(summary.assistant_messages) == 2
      
        309
            assert len(summary.tool_result_messages) == 2
      
        310
            assert "assistant.tool_batch" in trace_event_names(run)
      
        311
        
        312
        
        313
        @pytest.mark.asyncio
      
        314
        async def test_write_file_allowed(temp_dir: Path) -> None:
      
        315
            target = temp_dir / "allowed.txt"
      
        316
            backend = ScriptedBackend(
      
        317
                completions=[
      
        318
                    native_tool_response(
      
        319
                        ToolCall(
      
        320
                            id="write-1",
      
        321
                            name="write",
      
        322
                            arguments={"file_path": str(target), "content": "hello from loader\n"},
      
        323
                        ),
      
        324
                        content="I'll create the file now.",
      
        325
                    ),
      
        326
                    final_response("Successfully created the file."),
      
        327
                ]
      
        328
            )
      
        329
        
        330
            run = await run_scenario(
      
        331
                "Create allowed.txt with a greeting.",
      
        332
                backend,
      
        333
                config=non_streaming_config(),
      
        334
                project_root=temp_dir,
      
        335
            )
      
        336
        
        337
            assert target.read_text() == "hello from loader\n"
      
        338
            assert "Successfully created the file." in run.response
      
        339
            assert tool_event_names(run) == ["write"]
      
        340
        
        341
        
        342
        @pytest.mark.asyncio
      
        343
        async def test_write_file_denied(temp_dir: Path) -> None:
      
        344
            target = temp_dir / "denied.txt"
      
        345
            config = non_streaming_config()
      
        346
            config.permission_mode = PermissionMode.PROMPT
      
        347
            backend = ScriptedBackend(
      
        348
                completions=[
      
        349
                    native_tool_response(
      
        350
                        ToolCall(
      
        351
                            id="write-1",
      
        352
                            name="write",
      
        353
                            arguments={"file_path": str(target), "content": "should not exist\n"},
      
        354
                        ),
      
        355
                        content="I'll create the file if you approve it.",
      
        356
                    ),
      
        357
                    final_response("I skipped the write as requested."),
      
        358
                ]
      
        359
            )
      
        360
        
        361
            async def deny_confirmation(tool_name: str, message: str, details: str) -> bool:
      
        362
                assert tool_name == "write"
      
        363
                assert "approval" in message.lower()
      
        364
                assert "active_mode=prompt" in details
      
        365
                return False
      
        366
        
        367
            run = await run_scenario(
      
        368
                "Create denied.txt with a greeting.",
      
        369
                backend,
      
        370
                config=config,
      
        371
                project_root=temp_dir,
      
        372
                on_confirmation=deny_confirmation,
      
        373
            )
      
        374
        
        375
            assert not target.exists()
      
        376
            assert "skipped the write" in run.response.lower()
      
        377
            assert any(event.type == "confirmation" for event in run.events)
      
        378
        
        379
        
        380
        @pytest.mark.asyncio
      
        381
        async def test_bash_stdout_roundtrip(temp_dir: Path, monkeypatch: pytest.MonkeyPatch) -> None:
      
        382
            monkeypatch.chdir(temp_dir)
      
        383
            backend = ScriptedBackend(
      
        384
                completions=[
      
        385
                    native_tool_response(
      
        386
                        ToolCall(id="bash-1", name="bash", arguments={"command": "pwd"}),
      
        387
                        content="I'll check the current directory.",
      
        388
                    ),
      
        389
                    final_response("Confirmed the working directory."),
      
        390
                ]
      
        391
            )
      
        392
        
        393
            run = await run_scenario(
      
        394
                "Tell me the current directory.",
      
        395
                backend,
      
        396
                config=non_streaming_config(),
      
        397
                project_root=temp_dir,
      
        398
            )
      
        399
        
        400
            assert str(temp_dir) in tool_result_messages(run)[0]
      
        401
            assert "Confirmed the working directory." in run.response
      
        402
        
        403
        
        404
        @pytest.mark.asyncio
      
        405
        async def test_bash_confirmation_prompt_approved(
      
        406
            temp_dir: Path,
      
        407
            monkeypatch: pytest.MonkeyPatch,
      
        408
        ) -> None:
      
        409
            monkeypatch.chdir(temp_dir)
      
        410
            target = temp_dir / "approved.txt"
      
        411
            config = non_streaming_config()
      
        412
            config.permission_mode = PermissionMode.PROMPT
      
        413
            backend = ScriptedBackend(
      
        414
                completions=[
      
        415
                    native_tool_response(
      
        416
                        ToolCall(id="bash-1", name="bash", arguments={"command": "touch approved.txt"}),
      
        417
                        content="I'll create the file after approval.",
      
        418
                    ),
      
        419
                    final_response("The shell command completed."),
      
        420
                ]
      
        421
            )
      
        422
        
        423
            async def approve_confirmation(tool_name: str, message: str, details: str) -> bool:
      
        424
                assert tool_name == "bash"
      
        425
                assert "approval" in message.lower()
      
        426
                assert "touch approved.txt" in details
      
        427
                return True
      
        428
        
        429
            run = await run_scenario(
      
        430
                "Create approved.txt using bash.",
      
        431
                backend,
      
        432
                config=config,
      
        433
                project_root=temp_dir,
      
        434
                on_confirmation=approve_confirmation,
      
        435
            )
      
        436
        
        437
            assert target.exists()
      
        438
            assert "shell command completed" in run.response.lower()
      
        439
            assert any(event.type == "confirmation" for event in run.events)
      
        440
        
        441
        
        442
        @pytest.mark.asyncio
      
        443
        async def test_bash_confirmation_prompt_denied(
      
        444
            temp_dir: Path,
      
        445
            monkeypatch: pytest.MonkeyPatch,
      
        446
        ) -> None:
      
        447
            monkeypatch.chdir(temp_dir)
      
        448
            target = temp_dir / "denied-bash.txt"
      
        449
            config = non_streaming_config()
      
        450
            config.permission_mode = PermissionMode.PROMPT
      
        451
            backend = ScriptedBackend(
      
        452
                completions=[
      
        453
                    native_tool_response(
      
        454
                        ToolCall(id="bash-1", name="bash", arguments={"command": "touch denied-bash.txt"}),
      
        455
                        content="I'll create the file if you allow it.",
      
        456
                    ),
      
        457
                    final_response("I left the shell command undone."),
      
        458
                ]
      
        459
            )
      
        460
        
        461
            async def deny_confirmation(tool_name: str, message: str, details: str) -> bool:
      
        462
                assert tool_name == "bash"
      
        463
                assert "touch denied-bash.txt" in details
      
        464
                return False
      
        465
        
        466
            run = await run_scenario(
      
        467
                "Create denied-bash.txt using bash.",
      
        468
                backend,
      
        469
                config=config,
      
        470
                project_root=temp_dir,
      
        471
                on_confirmation=deny_confirmation,
      
        472
            )
      
        473
        
        474
            assert not target.exists()
      
        475
            assert "left the shell command undone" in run.response.lower()
      
        476
            assert any(event.type == "confirmation" for event in run.events)
      
        477
        
        478
        
        479
        @pytest.mark.asyncio
      
        480
        async def test_read_only_mode_denies_write(temp_dir: Path) -> None:
      
        481
            config = non_streaming_config()
      
        482
            config.permission_mode = PermissionMode.READ_ONLY
      
        483
            config.auto_recover = False
      
        484
            target = temp_dir / "blocked-by-policy.txt"
      
        485
            backend = ScriptedBackend(
      
        486
                completions=[
      
        487
                    native_tool_response(
      
        488
                        ToolCall(
      
        489
                            id="write-1",
      
        490
                            name="write",
      
        491
                            arguments={"file_path": str(target), "content": "denied\n"},
      
        492
                        ),
      
        493
                        content="I'll create the file.",
      
        494
                    ),
      
        495
                    final_response("The write was blocked."),
      
        496
                ]
      
        497
            )
      
        498
        
        499
            run = await run_scenario(
      
        500
                "Create blocked-by-policy.txt.",
      
        501
                backend,
      
        502
                config=config,
      
        503
                project_root=temp_dir,
      
        504
            )
      
        505
        
        506
            assert not target.exists()
      
        507
            assert any("requires workspace-write" in message for message in tool_result_messages(run))
      
        508
        
        509
        
        510
        @pytest.mark.asyncio
      
        511
        async def test_read_only_mode_denies_mutating_bash(temp_dir: Path) -> None:
      
        512
            config = non_streaming_config()
      
        513
            config.permission_mode = PermissionMode.READ_ONLY
      
        514
            config.auto_recover = False
      
        515
            target = temp_dir / "bash-blocked.txt"
      
        516
            backend = ScriptedBackend(
      
        517
                completions=[
      
        518
                    native_tool_response(
      
        519
                        ToolCall(
      
        520
                            id="bash-1",
      
        521
                            name="bash",
      
        522
                            arguments={"command": f"touch {target}"},
      
        523
                        ),
      
        524
                        content="I'll create the file with bash.",
      
        525
                    ),
      
        526
                    final_response("The bash command was blocked."),
      
        527
                ]
      
        528
            )
      
        529
        
        530
            run = await run_scenario(
      
        531
                "Create bash-blocked.txt using bash.",
      
        532
                backend,
      
        533
                config=config,
      
        534
                project_root=temp_dir,
      
        535
            )
      
        536
        
        537
            assert not target.exists()
      
        538
            assert any("requires workspace-write" in message for message in tool_result_messages(run))
      
        539
        
        540
        
        541
        @pytest.mark.asyncio
      
        542
        async def test_read_only_mode_allows_safe_bash(temp_dir: Path) -> None:
      
        543
            config = non_streaming_config()
      
        544
            config.permission_mode = PermissionMode.READ_ONLY
      
        545
            backend = ScriptedBackend(
      
        546
                completions=[
      
        547
                    native_tool_response(
      
        548
                        ToolCall(id="bash-1", name="bash", arguments={"command": "pwd"}),
      
        549
                        content="I'll inspect the current directory.",
      
        550
                    ),
      
        551
                    final_response("Inspected the current directory."),
      
        552
                ]
      
        553
            )
      
        554
        
        555
            run = await run_scenario(
      
        556
                "Show the current directory.",
      
        557
                backend,
      
        558
                config=config,
      
        559
                project_root=temp_dir,
      
        560
            )
      
        561
        
        562
            assert tool_event_names(run) == ["bash"]
      
        563
            assert not any("requires" in message for message in tool_result_messages(run))
      
        564
        
        565
        
        566
        @pytest.mark.asyncio
      
        567
        async def test_workspace_write_denies_write_outside_root(temp_dir: Path) -> None:
      
        568
            config = non_streaming_config()
      
        569
            config.auto_recover = False
      
        570
            outside = temp_dir.parent / "outside-root.txt"
      
        571
            if outside.exists():
      
        572
                outside.unlink()
      
        573
        
        574
            backend = ScriptedBackend(
      
        575
                completions=[
      
        576
                    native_tool_response(
      
        577
                        ToolCall(
      
        578
                            id="write-1",
      
        579
                            name="write",
      
        580
                            arguments={"file_path": str(outside), "content": "outside\n"},
      
        581
                        ),
      
        582
                        content="I'll write outside the workspace.",
      
        583
                    ),
      
        584
                    final_response("The write was blocked."),
      
        585
                ]
      
        586
            )
      
        587
        
        588
            async def decline_confirmation(_name: str, _msg: str, _details: str) -> bool:
      
        589
                return False
      
        590
        
        591
            run = await run_scenario(
      
        592
                "Write a file outside the workspace.",
      
        593
                backend,
      
        594
                config=config,
      
        595
                project_root=temp_dir,
      
        596
                on_confirmation=decline_confirmation,
      
        597
            )
      
        598
        
        599
            assert not outside.exists()
      
        600
            assert any(
      
        601
                "declined" in message.lower() or "outside workspace" in message.lower()
      
        602
                for message in tool_result_messages(run)
      
        603
            )
      
        604
        
        605
        
        606
        @pytest.mark.asyncio
      
        607
        async def test_danger_full_access_allows_dangerous_bash(temp_dir: Path) -> None:
      
        608
            target = temp_dir / "mode.txt"
      
        609
            target.write_text("hello\n")
      
        610
            config = non_streaming_config()
      
        611
            config.permission_mode = PermissionMode.DANGER_FULL_ACCESS
      
        612
            backend = ScriptedBackend(
      
        613
                completions=[
      
        614
                    native_tool_response(
      
        615
                        ToolCall(
      
        616
                            id="bash-1",
      
        617
                            name="bash",
      
        618
                            arguments={"command": f"chmod 600 {target}"},
      
        619
                        ),
      
        620
                        content="I'll change the file permissions.",
      
        621
                    ),
      
        622
                    final_response("Updated the file permissions."),
      
        623
                ]
      
        624
            )
      
        625
        
        626
            run = await run_scenario(
      
        627
                "Lock down mode.txt permissions.",
      
        628
                backend,
      
        629
                config=config,
      
        630
                project_root=temp_dir,
      
        631
            )
      
        632
        
        633
            assert tool_event_names(run) == ["bash"]
      
        634
            assert not any("requires" in message for message in tool_result_messages(run))
      
        635
            assert not any(event.type == "confirmation" for event in run.events)
      
        636
        
        637
        
        638
        @pytest.mark.asyncio
      
        639
        async def test_prompt_mode_prompts_destructive_write(temp_dir: Path) -> None:
      
        640
            target = temp_dir / "prompted.txt"
      
        641
            config = non_streaming_config()
      
        642
            config.permission_mode = PermissionMode.PROMPT
      
        643
            backend = ScriptedBackend(
      
        644
                completions=[
      
        645
                    native_tool_response(
      
        646
                        ToolCall(
      
        647
                            id="write-1",
      
        648
                            name="write",
      
        649
                            arguments={"file_path": str(target), "content": "prompted\n"},
      
        650
                        ),
      
        651
                        content="I'll create the file after approval.",
      
        652
                    ),
      
        653
                    final_response("The file was created."),
      
        654
                ]
      
        655
            )
      
        656
            prompts: list[str] = []
      
        657
        
        658
            async def approve_confirmation(tool_name: str, message: str, details: str) -> bool:
      
        659
                assert tool_name == "write"
      
        660
                prompts.append(details)
      
        661
                return True
      
        662
        
        663
            run = await run_scenario(
      
        664
                "Create prompted.txt after approval.",
      
        665
                backend,
      
        666
                config=config,
      
        667
                project_root=temp_dir,
      
        668
                on_confirmation=approve_confirmation,
      
        669
            )
      
        670
        
        671
            assert target.read_text() == "prompted\n"
      
        672
            assert prompts and "active_mode=prompt" in prompts[0]
      
        673
            assert any(event.type == "confirmation" for event in run.events)
      
        674
        
        675
        
        676
        @pytest.mark.asyncio
      
        677
        async def test_allow_mode_skips_prompt_for_destructive_write(temp_dir: Path) -> None:
      
        678
            target = temp_dir / "allow-mode.txt"
      
        679
            config = non_streaming_config()
      
        680
            config.permission_mode = PermissionMode.ALLOW
      
        681
            backend = ScriptedBackend(
      
        682
                completions=[
      
        683
                    native_tool_response(
      
        684
                        ToolCall(
      
        685
                            id="write-1",
      
        686
                            name="write",
      
        687
                            arguments={"file_path": str(target), "content": "allow mode\n"},
      
        688
                        ),
      
        689
                        content="I'll create the file directly.",
      
        690
                    ),
      
        691
                    final_response("The file was created."),
      
        692
                ]
      
        693
            )
      
        694
            prompts: list[str] = []
      
        695
        
        696
            async def unexpected_confirmation(tool_name: str, message: str, details: str) -> bool:
      
        697
                prompts.append(tool_name)
      
        698
                return False
      
        699
        
        700
            run = await run_scenario(
      
        701
                "Create allow-mode.txt directly.",
      
        702
                backend,
      
        703
                config=config,
      
        704
                project_root=temp_dir,
      
        705
                on_confirmation=unexpected_confirmation,
      
        706
            )
      
        707
        
        708
            assert target.read_text() == "allow mode\n"
      
        709
            assert prompts == []
      
        710
            assert not any(event.type == "confirmation" for event in run.events)
      
        711
            assert "The file was created." in run.response
      
        712
        
        713
        
        714
        @pytest.mark.asyncio
      
        715
        async def test_deny_rule_blocks_allowed_mode(temp_dir: Path) -> None:
      
        716
            loader_root = temp_dir / ".loader"
      
        717
            loader_root.mkdir()
      
        718
            (loader_root / "permission-rules.json").write_text(
      
        719
                '{"deny": [{"tool": "write", "path_contains": "secrets"}]}\n'
      
        720
            )
      
        721
            target = temp_dir / "secrets.txt"
      
        722
            config = non_streaming_config()
      
        723
            config.permission_mode = PermissionMode.ALLOW
      
        724
            config.auto_recover = False
      
        725
            backend = ScriptedBackend(
      
        726
                completions=[
      
        727
                    native_tool_response(
      
        728
                        ToolCall(
      
        729
                            id="write-1",
      
        730
                            name="write",
      
        731
                            arguments={"file_path": str(target), "content": "denied\n"},
      
        732
                        ),
      
        733
                        content="I'll write the secret file.",
      
        734
                    ),
      
        735
                    final_response("The write was blocked by policy."),
      
        736
                ]
      
        737
            )
      
        738
        
        739
            run = await run_scenario(
      
        740
                "Create secrets.txt.",
      
        741
                backend,
      
        742
                config=config,
      
        743
                project_root=temp_dir,
      
        744
            )
      
        745
        
        746
            assert not target.exists()
      
        747
            assert any("denied by rule" in message for message in tool_result_messages(run))
      
        748
            assert "tool.permission_denied" in trace_event_names(run)
      
        749
        
        750
        
        751
        @pytest.mark.asyncio
      
        752
        async def test_ask_rule_prompts_even_when_mode_would_allow(temp_dir: Path) -> None:
      
        753
            loader_root = temp_dir / ".loader"
      
        754
            loader_root.mkdir()
      
        755
            (loader_root / "permission-rules.json").write_text(
      
        756
                '{"ask": [{"tool": "write", "path_contains": "README"}]}\n'
      
        757
            )
      
        758
            target = temp_dir / "README.md"
      
        759
            config = non_streaming_config()
      
        760
            config.permission_mode = PermissionMode.ALLOW
      
        761
            backend = ScriptedBackend(
      
        762
                completions=[
      
        763
                    native_tool_response(
      
        764
                        ToolCall(
      
        765
                            id="write-1",
      
        766
                            name="write",
      
        767
                            arguments={"file_path": str(target), "content": "ask rule\n"},
      
        768
                        ),
      
        769
                        content="I'll update the README if you approve it.",
      
        770
                    ),
      
        771
                    final_response("The write was declined."),
      
        772
                ]
      
        773
            )
      
        774
            prompts: list[str] = []
      
        775
        
        776
            async def deny_confirmation(tool_name: str, message: str, details: str) -> bool:
      
        777
                prompts.append(details)
      
        778
                return False
      
        779
        
        780
            run = await run_scenario(
      
        781
                "Update README.md.",
      
        782
                backend,
      
        783
                config=config,
      
        784
                project_root=temp_dir,
      
        785
                on_confirmation=deny_confirmation,
      
        786
            )
      
        787
        
        788
            assert not target.exists()
      
        789
            assert prompts and "matched_ask_rule=tool=write, path_contains=README" in prompts[0]
      
        790
            assert any(event.type == "confirmation" for event in run.events)
      
        791
            assert "declined" in run.response.lower()
      
        792
        
        793
        
        794
        @pytest.mark.asyncio
      
        795
        async def test_raw_json_tool_call_fallback(temp_dir: Path) -> None:
      
        796
            fixture = temp_dir / "fixture.txt"
      
        797
            fixture.write_text("alpha parity line\n")
      
        798
            raw_json = f'{{"name": "read", "arguments": {{"file_path": "{fixture}"}}}}'
      
        799
        
        800
            backend = ScriptedBackend(
      
        801
                streams=[
      
        802
                    [
      
        803
                        StreamChunk(content=raw_json[:25], is_done=False),
      
        804
                        StreamChunk(content=raw_json[25:], full_content=raw_json, is_done=True),
      
        805
                    ],
      
        806
                    [
      
        807
                        StreamChunk(
      
        808
                            content="Recovered the raw JSON tool call and read the file.",
      
        809
                            full_content="Recovered the raw JSON tool call and read the file.",
      
        810
                            is_done=True,
      
        811
                        )
      
        812
                    ],
      
        813
                ]
      
        814
            )
      
        815
        
        816
            run = await run_scenario(
      
        817
                "Read the fixture file.",
      
        818
                backend,
      
        819
                config=AgentConfig(auto_context=False, max_iterations=8),
      
        820
                project_root=temp_dir,
      
        821
            )
      
        822
        
        823
            assert tool_event_names(run) == ["read"]
      
        824
            assert any("alpha parity line" in message for message in tool_result_messages(run))
      
        825
            assert "Recovered the raw JSON tool call" in run.response
      
        826
        
        827
        
        828
        @pytest.mark.asyncio
      
        829
        async def test_raw_json_todowrite_tool_call_fallback(temp_dir: Path) -> None:
      
        830
            raw_json = json.dumps(
      
        831
                {
      
        832
                    "name": "TodoWrite",
      
        833
                    "arguments": {
      
        834
                        "todos": [
      
        835
                            {
      
        836
                                "content": "Run tests",
      
        837
                                "active_form": "Running tests",
      
        838
                                "status": "completed",
      
        839
                            }
      
        840
                        ]
      
        841
                    },
      
        842
                }
      
        843
            )
      
        844
            backend = ScriptedBackend(
      
        845
                completions=[
      
        846
                    CompletionResponse(content=raw_json),
      
        847
                    final_response("Tracked the current todo list."),
      
        848
                ]
      
        849
            )
      
        850
        
        851
            run = await run_scenario(
      
        852
                "Track the current work items.",
      
        853
                backend,
      
        854
                config=non_streaming_config(),
      
        855
                project_root=temp_dir,
      
        856
            )
      
        857
        
        858
            todo_store = temp_dir / ".loader" / "todos" / "active.json"
      
        859
            assert tool_event_names(run) == ["TodoWrite"]
      
        860
            assert json.loads(todo_store.read_text()) == []
      
        861
            assert "Tracked the current todo list." in run.response
      
        862
        
        863
        
        864
        @pytest.mark.asyncio
      
        865
        async def test_raw_json_patch_tool_call_fallback(temp_dir: Path) -> None:
      
        866
            target = temp_dir / "sample.txt"
      
        867
            target.write_text("alpha\nbeta\ngamma\n")
      
        868
            raw_json = json.dumps(
      
        869
                {
      
        870
                    "name": "patch",
      
        871
                    "arguments": {
      
        872
                        "file_path": str(target),
      
        873
                        "hunks": [
      
        874
                            {
      
        875
                                "old_start": 2,
      
        876
                                "old_lines": 1,
      
        877
                                "new_start": 2,
      
        878
                                "new_lines": 1,
      
        879
                                "lines": ["-beta", "+beta updated"],
      
        880
                            }
      
        881
                        ],
      
        882
                    },
      
        883
                }
      
        884
            )
      
        885
            backend = ScriptedBackend(
      
        886
                completions=[
      
        887
                    CompletionResponse(content=raw_json),
      
        888
                    final_response("Patched sample.txt."),
      
        889
                ]
      
        890
            )
      
        891
        
        892
            run = await run_scenario(
      
        893
                "Update sample.txt.",
      
        894
                backend,
      
        895
                config=non_streaming_config(),
      
        896
                project_root=temp_dir,
      
        897
            )
      
        898
        
        899
            assert tool_event_names(run) == ["patch"]
      
        900
            assert target.read_text() == "alpha\nbeta updated\ngamma\n"
      
        901
            assert "Patched sample.txt." in run.response
      
        902
        
        903
        
        904
        @pytest.mark.asyncio
      
        905
        async def test_raw_json_ask_user_question_tool_call_fallback(temp_dir: Path) -> None:
      
        906
            raw_json = json.dumps(
      
        907
                {
      
        908
                    "name": "AskUserQuestion",
      
        909
                    "arguments": {
      
        910
                        "title": "Path Choice",
      
        911
                        "context": "Choose the safer Loader cleanup path.",
      
        912
                        "question": "Which path should we take?",
      
        913
                        "options": [
      
        914
                            {
      
        915
                                "label": "Plan first",
      
        916
                                "description": "Keep the next move documented.",
      
        917
                            },
      
        918
                            {
      
        919
                                "label": "Execute now",
      
        920
                                "description": "Start changing code immediately.",
      
        921
                            },
      
        922
                        ],
      
        923
                    },
      
        924
                }
      
        925
            )
      
        926
            backend = ScriptedBackend(
      
        927
                completions=[
      
        928
                    CompletionResponse(content=raw_json),
      
        929
                    final_response("We'll execute now."),
      
        930
                ]
      
        931
            )
      
        932
        
        933
            async def answer(question: str, options: list[str] | None) -> str:
      
        934
                assert "Which path should we take?" in question
      
        935
                assert options == [
      
        936
                    "Plan first - Keep the next move documented.",
      
        937
                    "Execute now - Start changing code immediately.",
      
        938
                ]
      
        939
                return "2"
      
        940
        
        941
            run = await run_scenario(
      
        942
                "Decide the next path before changing code.",
      
        943
                backend,
      
        944
                config=non_streaming_config(),
      
        945
                project_root=temp_dir,
      
        946
                on_user_question=answer,
      
        947
            )
      
        948
        
        949
            assert tool_event_names(run) == ["AskUserQuestion"]
      
        950
            assert any("Execute now" in message for message in tool_result_messages(run))
      
        951
            assert "We'll execute now." in run.response
      
        952
        
        953
        
        954
        @pytest.mark.asyncio
      
        955
        async def test_raw_bracket_ask_user_question_tool_call_fallback(temp_dir: Path) -> None:
      
        956
            backend = ScriptedBackend(
      
        957
                streams=[
      
        958
                    [
      
        959
                        StreamChunk(
      
        960
                            content='[calls askuserquestion tool with: question="Which path should we take?"]',
      
        961
                            full_content='[calls askuserquestion tool with: question="Which path should we take?"]',
      
        962
                            is_done=True,
      
        963
                        )
      
        964
                    ],
      
        965
                    [
      
        966
                        StreamChunk(
      
        967
                            content="We'll plan first.",
      
        968
                            full_content="We'll plan first.",
      
        969
                            is_done=True,
      
        970
                        )
      
        971
                    ],
      
        972
                ]
      
        973
            )
      
        974
        
        975
            async def answer(question: str, options: list[str] | None) -> str:
      
        976
                assert "Which path should we take?" in question
      
        977
                assert options is None
      
        978
                return "Plan first"
      
        979
        
        980
            run = await run_scenario(
      
        981
                "Read the fixture file.",
      
        982
                backend,
      
        983
                config=AgentConfig(auto_context=False, max_iterations=8),
      
        984
                project_root=temp_dir,
      
        985
                on_user_question=answer,
      
        986
            )
      
        987
        
        988
            assert tool_event_names(run) == ["AskUserQuestion"]
      
        989
            assert any('"answer": "Plan first"' in message for message in tool_result_messages(run))
      
        990
            assert "We'll plan first." in run.response
      
        991
        
        992
        
        993
        @pytest.mark.asyncio
      
        994
        async def test_non_streaming_bracket_ask_user_question_tool_call_fallback(
      
        995
            temp_dir: Path,
      
        996
        ) -> None:
      
        997
            backend = ScriptedBackend(
      
        998
                completions=[
      
        999
                    CompletionResponse(
      
        1000
                        content='[calls askuserquestion tool with: question="Which path should we take?"]'
      
        1001
                    ),
      
        1002
                    final_response("We'll plan first."),
      
        1003
                ]
      
        1004
            )
      
        1005
        
        1006
            async def answer(question: str, options: list[str] | None) -> str:
      
        1007
                assert "Which path should we take?" in question
      
        1008
                assert options is None
      
        1009
                return "Plan first"
      
        1010
        
        1011
            run = await run_scenario(
      
        1012
                "Read the fixture file.",
      
        1013
                backend,
      
        1014
                config=non_streaming_config(),
      
        1015
                project_root=temp_dir,
      
        1016
                on_user_question=answer,
      
        1017
            )
      
        1018
        
        1019
            assert tool_event_names(run) == ["AskUserQuestion"]
      
        1020
            assert any('"answer": "Plan first"' in message for message in tool_result_messages(run))
      
        1021
            assert "We'll plan first." in run.response
      
        1022
        
        1023
        
        1024
        @pytest.mark.asyncio
      
        1025
        async def test_native_and_raw_tool_paths_share_executor_trace(temp_dir: Path) -> None:
      
        1026
            native_fixture = temp_dir / "native.txt"
      
        1027
            native_fixture.write_text("native parity line\n")
      
        1028
            native_backend = ScriptedBackend(
      
        1029
                completions=[
      
        1030
                    native_tool_response(
      
        1031
                        ToolCall(id="read-1", name="read", arguments={"file_path": str(native_fixture)}),
      
        1032
                        content="I'll inspect the native tool result.",
      
        1033
                    ),
      
        1034
                    final_response("Native read complete."),
      
        1035
                ]
      
        1036
            )
      
        1037
            native_run = await run_scenario(
      
        1038
                "Read native.txt.",
      
        1039
                native_backend,
      
        1040
                config=non_streaming_config(),
      
        1041
                project_root=temp_dir,
      
        1042
            )
      
        1043
        
        1044
            raw_fixture = temp_dir / "raw.txt"
      
        1045
            raw_fixture.write_text("raw parity line\n")
      
        1046
            raw_json = f'{{"name": "read", "arguments": {{"file_path": "{raw_fixture}"}}}}'
      
        1047
            raw_backend = ScriptedBackend(
      
        1048
                streams=[
      
        1049
                    [
      
        1050
                        StreamChunk(content=raw_json[:20], is_done=False),
      
        1051
                        StreamChunk(content=raw_json[20:], full_content=raw_json, is_done=True),
      
        1052
                    ],
      
        1053
                    [
      
        1054
                        StreamChunk(
      
        1055
                            content="Raw read complete.",
      
        1056
                            full_content="Raw read complete.",
      
        1057
                            is_done=True,
      
        1058
                        )
      
        1059
                    ],
      
        1060
                ]
      
        1061
            )
      
        1062
            raw_run = await run_scenario(
      
        1063
                "Read raw.txt.",
      
        1064
                raw_backend,
      
        1065
                config=AgentConfig(auto_context=False, max_iterations=8),
      
        1066
                project_root=temp_dir,
      
        1067
            )
      
        1068
        
        1069
            for run in (native_run, raw_run):
      
        1070
                names = trace_event_names(run)
      
        1071
                assert "assistant.tool_batch" in names
      
        1072
                assert "tool.received" in names
      
        1073
                assert "tool.executed" in names
      
        1074
        
        1075
            native_summary = native_run.agent.last_turn_summary
      
        1076
            raw_summary = raw_run.agent.last_turn_summary
      
        1077
            assert native_summary is not None
      
        1078
            assert raw_summary is not None
      
        1079
            assert any(
      
        1080
                event.name == "tool.received" and event.data["source"] == "native"
      
        1081
                for event in native_summary.trace
      
        1082
            )
      
        1083
            assert any(
      
        1084
                event.name == "tool.received" and event.data["source"] == "raw_text"
      
        1085
                for event in raw_summary.trace
      
        1086
            )
      
        1087
        
        1088
        
        1089
        @pytest.mark.asyncio
      
        1090
        async def test_backend_capability_probe_refreshes_native_tool_mode(
      
        1091
            temp_dir: Path,
      
        1092
        ) -> None:
      
        1093
            fixture = temp_dir / "fixture.txt"
      
        1094
            fixture.write_text("capability probe line\n")
      
        1095
        
        1096
            class LazyCapabilityBackend(ScriptedBackend):
      
        1097
                def __init__(self, completions: list[CompletionResponse]) -> None:
      
        1098
                    super().__init__(completions=completions, supports_native_tools=False)
      
        1099
                    self.model = "custom-qwen-build"
      
        1100
                    self._described = False
      
        1101
        
        1102
                async def describe_model(self) -> dict[str, dict[str, list[str]]]:
      
        1103
                    self._described = True
      
        1104
                    return {"details": {"families": ["qwen2.5"]}}
      
        1105
        
        1106
                def capability_profile(self):
      
        1107
                    model_details = (
      
        1108
                        {"details": {"families": ["qwen2.5"]}} if self._described else None
      
        1109
                    )
      
        1110
                    return resolve_capability_profile(
      
        1111
                        self.model,
      
        1112
                        model_details=model_details,
      
        1113
                    )
      
        1114
        
        1115
            backend = LazyCapabilityBackend(
      
        1116
                completions=[
      
        1117
                    native_tool_response(
      
        1118
                        ToolCall(id="read-1", name="read", arguments={"file_path": str(fixture)}),
      
        1119
                        content="I'll inspect that file after probing capabilities.",
      
        1120
                    ),
      
        1121
                    final_response("Capability probing enabled the native read."),
      
        1122
                ]
      
        1123
            )
      
        1124
        
        1125
            run = await run_scenario(
      
        1126
                "Read the fixture file after checking model capabilities.",
      
        1127
                backend,
      
        1128
                config=non_streaming_config(),
      
        1129
                project_root=temp_dir,
      
        1130
            )
      
        1131
        
        1132
            assert backend._described
      
        1133
            assert not run.agent.use_react
      
        1134
            assert run.invocations[0].tools is not None
      
        1135
            assert tool_event_names(run) == ["read"]
      
        1136
            assert "Capability probing enabled the native read." in run.response
      
        1137
        
        1138
        
        1139
        @pytest.mark.asyncio
      
        1140
        async def test_run_streaming_delegates_to_primary_runtime(temp_dir: Path) -> None:
      
        1141
            fixture = temp_dir / "streaming.txt"
      
        1142
            fixture.write_text("streamed runtime line\n")
      
        1143
            backend = ScriptedBackend(
      
        1144
                streams=[
      
        1145
                    [
      
        1146
                        StreamChunk(
      
        1147
                            content="I'll inspect the file now.",
      
        1148
                            full_content="I'll inspect the file now.",
      
        1149
                            tool_calls=[
      
        1150
                                ToolCall(id="read-1", name="read", arguments={"file_path": str(fixture)})
      
        1151
                            ],
      
        1152
                            is_done=True,
      
        1153
                        )
      
        1154
                    ],
      
        1155
                    [
      
        1156
                        StreamChunk(
      
        1157
                            content="Finished reading the streamed fixture.",
      
        1158
                            full_content="Finished reading the streamed fixture.",
      
        1159
                            is_done=True,
      
        1160
                        )
      
        1161
                    ],
      
        1162
                ]
      
        1163
            )
      
        1164
            agent = Agent(
      
        1165
                backend=backend,
      
        1166
                config=AgentConfig(auto_context=False, max_iterations=8),
      
        1167
                project_root=temp_dir,
      
        1168
            )
      
        1169
        
        1170
            events = [event async for event in agent.run_streaming("Read the streamed fixture file.")]
      
        1171
        
        1172
            assert any(event.type == "tool_call" and event.tool_name == "read" for event in events)
      
        1173
            assert any(
      
        1174
                event.type == "tool_result" and "streamed runtime line" in event.content
      
        1175
                for event in events
      
        1176
            )
      
        1177
            assert agent.last_turn_summary is not None
      
        1178
            assert agent.last_turn_summary.final_response.startswith(
      
        1179
                "Finished reading the streamed fixture."
      
        1180
            )
      
        1181
        
        1182
        
        1183
        @pytest.mark.asyncio
      
        1184
        async def test_definition_of_done_verify_phase(temp_dir: Path) -> None:
      
        1185
            target = temp_dir / "verified.txt"
      
        1186
            backend = ScriptedBackend(
      
        1187
                completions=[
      
        1188
                    native_tool_response(
      
        1189
                        ToolCall(
      
        1190
                            id="write-1",
      
        1191
                            name="write",
      
        1192
                            arguments={"file_path": str(target), "content": "verified\n"},
      
        1193
                        ),
      
        1194
                        content="I'll create the file now.",
      
        1195
                    ),
      
        1196
                    final_response("Created verified.txt."),
      
        1197
                ]
      
        1198
            )
      
        1199
        
        1200
            run = await run_scenario(
      
        1201
                "Create verified.txt with a line of text.",
      
        1202
                backend,
      
        1203
                config=non_streaming_config(),
      
        1204
                project_root=temp_dir,
      
        1205
            )
      
        1206
        
        1207
            assert verification_commands(run) == [f"test -f {target}"]
      
        1208
            assert dod_statuses(run) == ["draft", "verifying", "done"]
      
        1209
            assert "Verification:" in run.response
      
        1210
            assert run.agent.last_turn_summary is not None
      
        1211
            assert run.agent.last_turn_summary.verification_status == "passed"
      
        1212
            assert run.agent.last_turn_summary.definition_of_done is not None
      
        1213
        
        1214
        
        1215
        @pytest.mark.asyncio
      
        1216
        async def test_verify_failure_routes_to_fix_loop(
      
        1217
            temp_dir: Path,
      
        1218
            monkeypatch: pytest.MonkeyPatch,
      
        1219
        ) -> None:
      
        1220
            monkeypatch.chdir(temp_dir)
      
        1221
            target = temp_dir / "broken.py"
      
        1222
            backend = ScriptedBackend(
      
        1223
                completions=[
      
        1224
                    native_tool_response(
      
        1225
                        ToolCall(
      
        1226
                            id="write-1",
      
        1227
                            name="write",
      
        1228
                            arguments={"file_path": str(target), "content": "print(\n"},
      
        1229
                        ),
      
        1230
                        content="I'll create the script.",
      
        1231
                    ),
      
        1232
                    final_response("Created broken.py."),
      
        1233
                    native_tool_response(
      
        1234
                        ToolCall(
      
        1235
                            id="write-2",
      
        1236
                            name="write",
      
        1237
                            arguments={
      
        1238
                                "file_path": str(target),
      
        1239
                                "content": "print('fixed from verify loop')\n",
      
        1240
                            },
      
        1241
                        ),
      
        1242
                        content="I'll fix the verification failure.",
      
        1243
                    ),
      
        1244
                    final_response("Fixed broken.py."),
      
        1245
                ]
      
        1246
            )
      
        1247
        
        1248
            run = await run_scenario(
      
        1249
                "Create broken.py and make sure it runs.",
      
        1250
                backend,
      
        1251
                config=non_streaming_config(),
      
        1252
                project_root=temp_dir,
      
        1253
            )
      
        1254
        
        1255
            assert target.read_text() == "print('fixed from verify loop')\n"
      
        1256
            assert verification_commands(run) == ["python broken.py", "python broken.py"]
      
        1257
            assert "fixing" in dod_statuses(run)
      
        1258
            assert "Verification:" in run.response
      
        1259
            assert run.agent.last_turn_summary is not None
      
        1260
            assert run.agent.last_turn_summary.verification_status == "passed"
      
        1261
        
        1262
        
        1263
        @pytest.mark.asyncio
      
        1264
        async def test_verify_retry_budget_exhaustion(
      
        1265
            temp_dir: Path,
      
        1266
            monkeypatch: pytest.MonkeyPatch,
      
        1267
        ) -> None:
      
        1268
            monkeypatch.chdir(temp_dir)
      
        1269
            target = temp_dir / "still-broken.py"
      
        1270
            config = non_streaming_config()
      
        1271
            config.verification_retry_budget = 1
      
        1272
            backend = ScriptedBackend(
      
        1273
                completions=[
      
        1274
                    native_tool_response(
      
        1275
                        ToolCall(
      
        1276
                            id="write-1",
      
        1277
                            name="write",
      
        1278
                            arguments={"file_path": str(target), "content": "print(\n"},
      
        1279
                        ),
      
        1280
                        content="I'll create the script.",
      
        1281
                    ),
      
        1282
                    final_response("Created still-broken.py."),
      
        1283
                    native_tool_response(
      
        1284
                        ToolCall(
      
        1285
                            id="write-2",
      
        1286
                            name="write",
      
        1287
                            arguments={"file_path": str(target), "content": "print(\n"},
      
        1288
                        ),
      
        1289
                        content="I'll try one more fix.",
      
        1290
                    ),
      
        1291
                    final_response("Tried to fix still-broken.py."),
      
        1292
                ]
      
        1293
            )
      
        1294
        
        1295
            run = await run_scenario(
      
        1296
                "Create still-broken.py and make sure it runs.",
      
        1297
                backend,
      
        1298
                config=config,
      
        1299
                project_root=temp_dir,
      
        1300
            )
      
        1301
        
        1302
            assert "couldn't verify" in run.response.lower()
      
        1303
            assert dod_statuses(run)[-1] == "failed"
      
        1304
            assert run.agent.last_turn_summary is not None
      
        1305
            assert run.agent.last_turn_summary.verification_status == "failed"
      
        1306
        
        1307
        
        1308
        @pytest.mark.asyncio
      
        1309
        async def test_ambiguous_prompt_routes_to_clarify(temp_dir: Path) -> None:
      
        1310
            backend = ScriptedBackend(
      
        1311
                completions=[
      
        1312
                    native_tool_response(
      
        1313
                        ToolCall(
      
        1314
                            id="ask-1",
      
        1315
                            name="AskUserQuestion",
      
        1316
                            arguments={
      
        1317
                                "question": (
      
        1318
                                    "What outcome matters most, and what should stay out of scope?"
      
        1319
                                )
      
        1320
                            },
      
        1321
                        ),
      
        1322
                        content="I need one clarification first.",
      
        1323
                    ),
      
        1324
                    final_response(
      
        1325
                        "\n".join(
      
        1326
                            [
      
        1327
                                "## Task Statement",
      
        1328
                                "Improve Loader so it feels more like claw-code.",
      
        1329
                                "",
      
        1330
                                "## Desired Outcome",
      
        1331
                                "- Make Loader more reliable without broad redesign.",
      
        1332
                                "",
      
        1333
                                "## In Scope",
      
        1334
                                "- Tighten the runtime workflow around the user-facing goal.",
      
        1335
                                "",
      
        1336
                                "## Non Goals",
      
        1337
                                "- Rebuild unrelated subsystems.",
      
        1338
                                "",
      
        1339
                                "## Decision Boundaries",
      
        1340
                                "- Escalate before changing unrelated UX patterns.",
      
        1341
                                "",
      
        1342
                                "## Constraints",
      
        1343
                                "- Stay inside the current repository.",
      
        1344
                                "",
      
        1345
                                "## Likely Touchpoints",
      
        1346
                                "- Runtime entry points and prompt behavior.",
      
        1347
                                "",
      
        1348
                                "## Assumptions",
      
        1349
                                "- The user wants a narrow runtime-quality improvement.",
      
        1350
                                "",
      
        1351
                                "## Acceptance Criteria",
      
        1352
                                "- The improvement stays focused on runtime behavior.",
      
        1353
                            ]
      
        1354
                        )
      
        1355
                    ),
      
        1356
                    final_response("I have the brief and can move forward."),
      
        1357
                ]
      
        1358
            )
      
        1359
        
        1360
            async def answer(question: str, options: list[str] | None) -> str:
      
        1361
                assert "outcome matters most" in question.lower()
      
        1362
                assert options is None
      
        1363
                return "Do not redesign the whole interface."
      
        1364
        
        1365
            run = await run_scenario(
      
        1366
                "Improve Loader so it feels more like claw-code.",
      
        1367
                backend,
      
        1368
                config=non_streaming_config(),
      
        1369
                project_root=temp_dir,
      
        1370
                on_user_question=answer,
      
        1371
            )
      
        1372
        
        1373
            dod = run.agent.last_turn_summary.definition_of_done
      
        1374
            assert dod is not None
      
        1375
            assert workflow_modes(run)[:2] == ["clarify", "execute"]
      
        1376
            assert artifact_kinds(run) == ["clarify_brief"]
      
        1377
            assert dod.clarify_brief is not None
      
        1378
            assert Path(dod.clarify_brief).exists()
      
        1379
        
        1380
        
        1381
        @pytest.mark.asyncio
      
        1382
        async def test_complex_prompt_routes_to_plan(temp_dir: Path) -> None:
      
        1383
            target = temp_dir / "planned.txt"
      
        1384
            backend = ScriptedBackend(
      
        1385
                completions=[
      
        1386
                    final_response(
      
        1387
                        "\n".join(
      
        1388
                            [
      
        1389
                                "# Implementation Plan",
      
        1390
                                "",
      
        1391
                                "## File Changes",
      
        1392
                                f"- Create {target.name} in the workspace root.",
      
        1393
                                "",
      
        1394
                                "## Execution Order",
      
        1395
                                f"1. Write {target.name}.",
      
        1396
                                "2. Confirm the file exists.",
      
        1397
                                "",
      
        1398
                                "## Risks",
      
        1399
                                "- Writing the wrong file path.",
      
        1400
                                "",
      
        1401
                                "<<<VERIFICATION>>>",
      
        1402
                                "",
      
        1403
                                "# Verification Plan",
      
        1404
                                "",
      
        1405
                                "## Acceptance Criteria",
      
        1406
                                f"- {target.name} exists in the workspace root.",
      
        1407
                                "",
      
        1408
                                "## Verification Commands",
      
        1409
                                f"- `test -f {target}`",
      
        1410
                                "",
      
        1411
                                "## Notes",
      
        1412
                                "- Use a deterministic file existence check.",
      
        1413
                            ]
      
        1414
                        )
      
        1415
                    ),
      
        1416
                    native_tool_response(
      
        1417
                        ToolCall(
      
        1418
                            id="write-1",
      
        1419
                            name="write",
      
        1420
                            arguments={"file_path": str(target), "content": "planned output\n"},
      
        1421
                        ),
      
        1422
                        content="I'll create the file now.",
      
        1423
                    ),
      
        1424
                    final_response("The file is in place."),
      
        1425
                ]
      
        1426
            )
      
        1427
        
        1428
            run = await run_scenario(
      
        1429
                "Implement a persistent workflow mode router with clarify artifacts, "
      
        1430
                "planning artifacts, and verification-plan wiring in the runtime.",
      
        1431
                backend,
      
        1432
                config=non_streaming_config(),
      
        1433
                project_root=temp_dir,
      
        1434
            )
      
        1435
        
        1436
            dod = run.agent.last_turn_summary.definition_of_done
      
        1437
            assert dod is not None
      
        1438
            assert workflow_modes(run)[:3] == ["plan", "execute", "verify"]
      
        1439
            assert artifact_kinds(run) == ["implementation_plan", "verification_plan"]
      
        1440
            assert not any(event.type == "decomposition" for event in run.events)
      
        1441
            assert not any(event.type == "subtask" for event in run.events)
      
        1442
            assert dod.verification_commands == [f"test -f {target}"]
      
        1443
            assert verification_commands(run) == [f"test -f {target}"]
      
        1444
        
        1445
        
        1446
        @pytest.mark.asyncio
      
        1447
        async def test_verify_failure_fix_loop_does_not_reroute_workflow(temp_dir: Path) -> None:
      
        1448
            target = temp_dir / "retry.txt"
      
        1449
            backend = ScriptedBackend(
      
        1450
                completions=[
      
        1451
                    final_response(
      
        1452
                        "\n".join(
      
        1453
                            [
      
        1454
                                "# Implementation Plan",
      
        1455
                                "",
      
        1456
                                "## File Changes",
      
        1457
                                f"- Create {target.name}.",
      
        1458
                                "",
      
        1459
                                "## Execution Order",
      
        1460
                                f"1. Write {target.name}.",
      
        1461
                                "2. Fix it if verification fails.",
      
        1462
                                "",
      
        1463
                                "## Risks",
      
        1464
                                "- Initial content may be wrong.",
      
        1465
                                "",
      
        1466
                                "<<<VERIFICATION>>>",
      
        1467
                                "",
      
        1468
                                "# Verification Plan",
      
        1469
                                "",
      
        1470
                                "## Acceptance Criteria",
      
        1471
                                "- The file contains the word fixed.",
      
        1472
                                "",
      
        1473
                                "## Verification Commands",
      
        1474
                                f"- `grep -q fixed {target}`",
      
        1475
                                "",
      
        1476
                                "## Notes",
      
        1477
                                "- Retry if the first write misses the target string.",
      
        1478
                            ]
      
        1479
                        )
      
        1480
                    ),
      
        1481
                    native_tool_response(
      
        1482
                        ToolCall(
      
        1483
                            id="write-1",
      
        1484
                            name="write",
      
        1485
                            arguments={"file_path": str(target), "content": "draft output\n"},
      
        1486
                        ),
      
        1487
                        content="I'll write the first draft.",
      
        1488
                    ),
      
        1489
                    final_response("First draft is written."),
      
        1490
                    native_tool_response(
      
        1491
                        ToolCall(
      
        1492
                            id="write-2",
      
        1493
                            name="write",
      
        1494
                            arguments={"file_path": str(target), "content": "fixed output\n"},
      
        1495
                        ),
      
        1496
                        content="I'll correct the file.",
      
        1497
                    ),
      
        1498
                    final_response("The file now contains the fixed output."),
      
        1499
                ]
      
        1500
            )
      
        1501
        
        1502
            run = await run_scenario(
      
        1503
                "Implement a persistent workflow mode router with clarify artifacts, "
      
        1504
                "planning artifacts, and verification-plan wiring in the runtime.",
      
        1505
                backend,
      
        1506
                config=non_streaming_config(),
      
        1507
                project_root=temp_dir,
      
        1508
            )
      
        1509
        
        1510
            modes = workflow_modes(run)
      
        1511
            assert modes.count("plan") == 1
      
        1512
            assert modes.count("clarify") == 0
      
        1513
            assert modes.count("execute") >= 2
      
        1514
            assert modes.count("verify") >= 2
      
        1515
        
        1516
        
        1517
        @pytest.mark.asyncio
      
        1518
        async def test_conversational_task_skips_verify_phase() -> None:
      
        1519
            backend = ScriptedBackend(
      
        1520
                streams=[
      
        1521
                    [
      
        1522
                        StreamChunk(content="Hello there.", full_content="Hello there.", is_done=True),
      
        1523
                    ]
      
        1524
                ]
      
        1525
            )
      
        1526
        
        1527
            run = await run_scenario("hello there", backend, config=AgentConfig(auto_context=False))
      
        1528
        
        1529
            assert run.response == "Hello there."
      
        1530
            assert not dod_statuses(run)
      
        1531
            assert run.agent.last_turn_summary is None
      
        1532
        
        1533
        
        1534
        @pytest.mark.asyncio
      
        1535
        async def test_explore_mode_skips_dod_and_router(temp_dir: Path) -> None:
      
        1536
            target = temp_dir / "feature.py"
      
        1537
            target.write_text("def important_helper():\n    return 1\n")
      
        1538
            backend = ScriptedBackend(
      
        1539
                completions=[
      
        1540
                    native_tool_response(
      
        1541
                        ToolCall(
      
        1542
                            id="grep-1",
      
        1543
                            name="grep",
      
        1544
                            arguments={
      
        1545
                                "pattern": "important_helper",
      
        1546
                                "path": str(temp_dir),
      
        1547
                                "include": "*.py",
      
        1548
                            },
      
        1549
                        ),
      
        1550
                        content="I'll search for that helper.",
      
        1551
                    ),
      
        1552
                    final_response("important_helper is defined in feature.py."),
      
        1553
                ]
      
        1554
            )
      
        1555
        
        1556
            run = await run_explore_scenario(
      
        1557
                "Where is important_helper defined?",
      
        1558
                backend,
      
        1559
                config=non_streaming_config(),
      
        1560
                project_root=temp_dir,
      
        1561
            )
      
        1562
        
        1563
            assert "feature.py" in run.response
      
        1564
            assert tool_event_names(run) == ["grep"]
      
        1565
            assert not dod_statuses(run)
      
        1566
            assert not workflow_modes(run)
      
        1567
            assert run.agent.last_turn_summary is not None
      
        1568
            assert run.agent.last_turn_summary.definition_of_done is None
      
        1569
            assert run.agent.last_turn_summary.workflow_mode == "explore"
      
        1570
            assert "explore.completed" in trace_event_names(run)
      
        1571
            assert not (temp_dir / ".loader" / "dod").exists()
      
        1572
            assert run.invocations[0].tools is not None
      
        1573
            assert "write" not in {tool["name"] for tool in run.invocations[0].tools or []}
      
        1574
        
        1575
        
        1576
        @pytest.mark.asyncio
      
        1577
        async def test_explore_mode_denies_write(temp_dir: Path) -> None:
      
        1578
            target = temp_dir / "new.txt"
      
        1579
            config = non_streaming_config()
      
        1580
            config.permission_mode = PermissionMode.WORKSPACE_WRITE
      
        1581
            backend = ScriptedBackend(
      
        1582
                completions=[
      
        1583
                    native_tool_response(
      
        1584
                        ToolCall(
      
        1585
                            id="write-1",
      
        1586
                            name="write",
      
        1587
                            arguments={
      
        1588
                                "file_path": str(target),
      
        1589
                                "content": "not allowed\n",
      
        1590
                            },
      
        1591
                        ),
      
        1592
                        content="I'll write a file.",
      
        1593
                    ),
      
        1594
                    final_response("Explore mode is read-only, so I cannot make that change here."),
      
        1595
                ]
      
        1596
            )
      
        1597
        
        1598
            run = await run_explore_scenario(
      
        1599
                "Create a new file anyway.",
      
        1600
                backend,
      
        1601
                config=config,
      
        1602
                project_root=temp_dir,
      
        1603
            )
      
        1604
        
        1605
            assert not target.exists()
      
        1606
            assert tool_event_names(run) == ["write"]
      
        1607
            assert any("read-only" in message.lower() for message in tool_result_messages(run))
      
        1608
            assert "cannot make that change" in run.response.lower()
      
        1609
            assert "tool.permission_denied" in trace_event_names(run)
      
        1610
            assert not dod_statuses(run)
      
        1611
            assert not workflow_modes(run)
      
        1612
            assert not (temp_dir / ".loader" / "dod").exists()
      
        1613
        
        1614
        
        1615
        @pytest.mark.asyncio
      
        1616
        async def test_explore_mode_ignores_global_allow_policy(temp_dir: Path) -> None:
      
        1617
            loader_root = temp_dir / ".loader"
      
        1618
            loader_root.mkdir()
      
        1619
            (loader_root / "permission-rules.json").write_text(
      
        1620
                '{"allow": [{"tool": "write", "path_contains": "new.txt"}]}\n'
      
        1621
            )
      
        1622
            target = temp_dir / "new.txt"
      
        1623
            config = non_streaming_config()
      
        1624
            config.permission_mode = PermissionMode.ALLOW
      
        1625
            backend = ScriptedBackend(
      
        1626
                completions=[
      
        1627
                    native_tool_response(
      
        1628
                        ToolCall(
      
        1629
                            id="write-1",
      
        1630
                            name="write",
      
        1631
                            arguments={
      
        1632
                                "file_path": str(target),
      
        1633
                                "content": "still denied\n",
      
        1634
                            },
      
        1635
                        ),
      
        1636
                        content="I'll write a file.",
      
        1637
                    ),
      
        1638
                    final_response("Explore mode is read-only, so I cannot make that change here."),
      
        1639
                ]
      
        1640
            )
      
        1641
        
        1642
            run = await run_explore_scenario(
      
        1643
                "Create a new file anyway.",
      
        1644
                backend,
      
        1645
                config=config,
      
        1646
                project_root=temp_dir,
      
        1647
            )
      
        1648
        
        1649
            assert not target.exists()
      
        1650
            assert any("read-only" in message.lower() for message in tool_result_messages(run))
      
        1651
            assert "tool.permission_denied" in trace_event_names(run)
      
        1652
            assert not dod_statuses(run)
      
        1653
            assert not workflow_modes(run)
      
        1654
        
        1655
        
        1656
        @pytest.mark.asyncio
      
        1657
        async def test_informational_completion_allows_explicit_done_without_continuation(
      
        1658
            temp_dir: Path,
      
        1659
            monkeypatch: pytest.MonkeyPatch,
      
        1660
        ) -> None:
      
        1661
            monkeypatch.chdir(temp_dir)
      
        1662
            target = temp_dir / "hello.py"
      
        1663
            backend = ScriptedBackend(
      
        1664
                completions=[
      
        1665
                    final_response("Done."),
      
        1666
                ]
      
        1667
            )
      
        1668
            config = non_streaming_config(completion_check=True)
      
        1669
        
        1670
            run = await run_scenario(
      
        1671
                "Explain how a hello.py file would work.",
      
        1672
                backend,
      
        1673
                config=config,
      
        1674
                project_root=temp_dir,
      
        1675
            )
      
        1676
        
        1677
            assert not target.exists()
      
        1678
            assert not any(event.type == "completion_check" for event in run.events)
      
        1679
            assert tool_event_names(run) == []
      
        1680
            assert run.response == "Done."
      
        1681
        
        1682
        
        1683
        @pytest.mark.asyncio
      
        1684
        async def test_tool_result_contract_regression() -> None:
      
        1685
            errors: list[str] = []
      
        1686
            duplicate_path = "/tmp/already-created.txt"
      
        1687
        
        1688
            duplicate_backend = ScriptedBackend(
      
        1689
                completions=[
      
        1690
                    native_tool_response(
      
        1691
                        ToolCall(
      
        1692
                            id="dup-1",
      
        1693
                            name="write",
      
        1694
                            arguments={"file_path": duplicate_path, "content": "already there\n"},
      
        1695
                        ),
      
        1696
                        content="I'll create the file again.",
      
        1697
                    ),
      
        1698
                    final_response("Skipped the duplicate write."),
      
        1699
                ]
      
        1700
            )
      
        1701
            duplicate_agent = Agent(duplicate_backend, config=non_streaming_config())
      
        1702
            duplicate_agent.safeguards.record_action(
      
        1703
                "write",
      
        1704
                {"file_path": duplicate_path, "content": "already there\n"},
      
        1705
            )
      
        1706
        
        1707
            try:
      
        1708
                await duplicate_agent.run("Create /tmp/already-created.txt again.")
      
        1709
            except TypeError as exc:
      
        1710
                errors.append(f"duplicate branch raised {exc}")
      
        1711
        
        1712
            validation_backend = ScriptedBackend(
      
        1713
                completions=[
      
        1714
                    native_tool_response(
      
        1715
                        ToolCall(id="invalid-1", name="bash", arguments={"command": ""}),
      
        1716
                        content="I'll run that command.",
      
        1717
                    ),
      
        1718
                    final_response("Blocked the invalid command."),
      
        1719
                ]
      
        1720
            )
      
        1721
            validation_agent = Agent(validation_backend, config=non_streaming_config())
      
        1722
        
        1723
            try:
      
        1724
                await validation_agent.run("Run an empty command.")
      
        1725
            except TypeError as exc:
      
        1726
                errors.append(f"validation branch raised {exc}")
      
        1727
        
        1728
            assert not errors, "\n".join(errors)
      
        1729
        
        1730
        
        1731
        @pytest.mark.asyncio
      
        1732
        async def test_duplicate_read_is_skipped_without_intervening_mutation(
      
        1733
            temp_dir: Path,
      
        1734
        ) -> None:
      
        1735
            fixture = temp_dir / "index.html"
      
        1736
            fixture.write_text("alpha parity line\n")
      
        1737
        
        1738
            backend = ScriptedBackend(
      
        1739
                completions=[
      
        1740
                    native_tool_response(
      
        1741
                        ToolCall(id="read-1", name="read", arguments={"file_path": str(fixture)}),
      
        1742
                        content="I'll inspect the file.",
      
        1743
                    ),
      
        1744
                    native_tool_response(
      
        1745
                        ToolCall(id="read-2", name="read", arguments={"file_path": str(fixture)}),
      
        1746
                        content="I'll reread the same file.",
      
        1747
                    ),
      
        1748
                    final_response("I'll use the existing file contents instead of rereading."),
      
        1749
                ]
      
        1750
            )
      
        1751
        
        1752
            run = await run_scenario(
      
        1753
                "Inspect index.html and keep moving.",
      
        1754
                backend,
      
        1755
                config=non_streaming_config(),
      
        1756
                project_root=temp_dir,
      
        1757
            )
      
        1758
        
        1759
            assert tool_event_names(run) == ["read", "read"]
      
        1760
            messages = tool_result_messages(run)
      
        1761
            assert any("alpha parity line" in message for message in messages)
      
        1762
            assert any(
      
        1763
                "Skipped - duplicate action" in message and "Already read" in message
      
        1764
                for message in messages
      
        1765
            )
      
        1766
            assert "existing file contents" in run.response
      
        1767
        
        1768
        
        1769
        @pytest.mark.asyncio
      
        1770
        async def test_interleaved_reread_is_allowed_once_without_intervening_mutation(
      
        1771
            temp_dir: Path,
      
        1772
        ) -> None:
      
        1773
            index_file = temp_dir / "index.html"
      
        1774
            chapter_file = temp_dir / "chapter-1.html"
      
        1775
            index_file.write_text("table of contents\n")
      
        1776
            chapter_file.write_text("chapter body\n")
      
        1777
        
        1778
            backend = ScriptedBackend(
      
        1779
                completions=[
      
        1780
                    native_tool_response(
      
        1781
                        ToolCall(
      
        1782
                            id="read-1",
      
        1783
                            name="read",
      
        1784
                            arguments={"file_path": str(index_file)},
      
        1785
                        ),
      
        1786
                        content="I'll inspect the index first.",
      
        1787
                    ),
      
        1788
                    native_tool_response(
      
        1789
                        ToolCall(
      
        1790
                            id="read-2",
      
        1791
                            name="read",
      
        1792
                            arguments={"file_path": str(chapter_file)},
      
        1793
                        ),
      
        1794
                        content="I'll inspect the chapter next.",
      
        1795
                    ),
      
        1796
                    native_tool_response(
      
        1797
                        ToolCall(
      
        1798
                            id="read-3",
      
        1799
                            name="read",
      
        1800
                            arguments={"file_path": str(index_file)},
      
        1801
                        ),
      
        1802
                        content="I'll reopen the index to reconcile the findings.",
      
        1803
                    ),
      
        1804
                    final_response("I re-opened the index after checking the chapter."),
      
        1805
                ]
      
        1806
            )
      
        1807
        
        1808
            run = await run_scenario(
      
        1809
                "Inspect the index, inspect a chapter, then return to the index.",
      
        1810
                backend,
      
        1811
                config=non_streaming_config(),
      
        1812
                project_root=temp_dir,
      
        1813
            )
      
        1814
        
        1815
            assert tool_event_names(run) == ["read", "read", "read"]
      
        1816
            messages = tool_result_messages(run)
      
        1817
            assert not any("Skipped - duplicate action" in message for message in messages)
      
        1818
            assert sum("table of contents" in message for message in messages) == 2
      
        1819
            assert any("chapter body" in message for message in messages)
      
        1820
        
        1821
        
        1822
        @pytest.mark.asyncio
      
        1823
        async def test_repeated_bash_probe_is_allowed_after_mutation(
      
        1824
            temp_dir: Path,
      
        1825
        ) -> None:
      
        1826
            target = temp_dir / "notes.txt"
      
        1827
            target.write_text("old value\n")
      
        1828
            list_command = f"ls -1 {temp_dir}"
      
        1829
        
        1830
            backend = ScriptedBackend(
      
        1831
                completions=[
      
        1832
                    native_tool_response(
      
        1833
                        ToolCall(id="bash-1", name="bash", arguments={"command": list_command}),
      
        1834
                        content="I'll inspect the directory first.",
      
        1835
                    ),
      
        1836
                    native_tool_response(
      
        1837
                        ToolCall(
      
        1838
                            id="edit-1",
      
        1839
                            name="edit",
      
        1840
                            arguments={
      
        1841
                                "file_path": str(target),
      
        1842
                                "old_string": "old value",
      
        1843
                                "new_string": "new value",
      
        1844
                            },
      
        1845
                        ),
      
        1846
                        content="I'll update the file.",
      
        1847
                    ),
      
        1848
                    native_tool_response(
      
        1849
                        ToolCall(id="bash-2", name="bash", arguments={"command": list_command}),
      
        1850
                        content="I'll list the directory again after the edit.",
      
        1851
                    ),
      
        1852
                    final_response("I re-ran ls after the edit without hitting duplicate rejection."),
      
        1853
                ]
      
        1854
            )
      
        1855
        
        1856
            run = await run_scenario(
      
        1857
                "Inspect the directory, edit the file, then inspect again.",
      
        1858
                backend,
      
        1859
                config=non_streaming_config(),
      
        1860
                project_root=temp_dir,
      
        1861
            )
      
        1862
        
        1863
            assert tool_event_names(run) == ["bash", "edit", "bash"]
      
        1864
            messages = tool_result_messages(run)
      
        1865
            assert not any("Skipped - duplicate action" in message for message in messages)
      
        1866
            assert sum("notes.txt" in message for message in messages) >= 2
      
        1867
            assert target.read_text() == "new value\n"