loader Public

Watch 0 Fork 0 Star 0
Python · 34096 bytes Raw Blame History
  
        1
        """Direct tests for tool-batch confidence, verification, and recovery helpers."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        from pathlib import Path
      
        6
        from types import SimpleNamespace
      
        7
        
        8
        import pytest
      
        9
        
        10
        from loader.llm.base import Message, Role, ToolCall
      
        11
        from loader.runtime.context import RuntimeContext
      
        12
        from loader.runtime.events import AgentEvent
      
        13
        from loader.runtime.executor import ToolExecutionOutcome, ToolExecutionState
      
        14
        from loader.runtime.permissions import (
      
        15
            PermissionMode,
      
        16
            build_permission_policy,
      
        17
            load_permission_rules,
      
        18
        )
      
        19
        from loader.runtime.reasoning_types import (
      
        20
            ActionVerification,
      
        21
            ConfidenceAssessment,
      
        22
            ConfidenceLevel,
      
        23
        )
      
        24
        from loader.runtime.recovery import RecoveryContext
      
        25
        from loader.runtime.tool_batch_checks import (
      
        26
            ToolBatchConfidenceGate,
      
        27
            ToolBatchVerificationGate,
      
        28
        )
      
        29
        from loader.runtime.tool_batch_recovery import ToolBatchRecoveryController
      
        30
        from loader.tools.base import ToolResult as RegistryToolResult
      
        31
        from loader.tools.base import create_default_registry
      
        32
        from tests.helpers.runtime_harness import ScriptedBackend
      
        33
        
        34
        
        35
        class FakeSession:
      
        36
            def __init__(self, messages: list[Message]) -> None:
      
        37
                self.messages = list(messages)
      
        38
        
        39
            def append(self, message: Message) -> None:
      
        40
                self.messages.append(message)
      
        41
        
        42
        
        43
        class FakeCodeFilter:
      
        44
            def reset(self) -> None:
      
        45
                return None
      
        46
        
        47
        
        48
        class FakeSafeguards:
      
        49
            def __init__(self) -> None:
      
        50
                self.action_tracker = object()
      
        51
                self.validator = object()
      
        52
                self.code_filter = FakeCodeFilter()
      
        53
        
        54
            def filter_stream_chunk(self, content: str) -> str:
      
        55
                return content
      
        56
        
        57
            def filter_complete_content(self, content: str) -> str:
      
        58
                return content
      
        59
        
        60
            def should_steer(self) -> bool:
      
        61
                return False
      
        62
        
        63
            def get_steering_message(self) -> str | None:
      
        64
                return None
      
        65
        
        66
            def record_response(self, content: str) -> None:
      
        67
                return None
      
        68
        
        69
        
        70
        def build_context(
      
        71
            *,
      
        72
            temp_dir: Path,
      
        73
            messages: list[Message],
      
        74
            assess_confidence,
      
        75
            verify_action,
      
        76
            recovery_context: RecoveryContext | None = None,
      
        77
            confidence_scoring: bool = False,
      
        78
            verification: bool = False,
      
        79
            min_confidence_for_action: int = 3,
      
        80
        ) -> RuntimeContext:
      
        81
            registry = create_default_registry(temp_dir)
      
        82
            registry.configure_workspace_root(temp_dir)
      
        83
            rule_status = load_permission_rules(temp_dir)
      
        84
            policy = build_permission_policy(
      
        85
                active_mode=PermissionMode.WORKSPACE_WRITE,
      
        86
                workspace_root=temp_dir,
      
        87
                tool_requirements=registry.get_tool_requirements(),
      
        88
                rules=rule_status.rules,
      
        89
            )
      
        90
            return RuntimeContext(
      
        91
                project_root=temp_dir,
      
        92
                backend=ScriptedBackend(),
      
        93
                registry=registry,
      
        94
                session=FakeSession(messages),  # type: ignore[arg-type]
      
        95
                config=SimpleNamespace(
      
        96
                    force_react=False,
      
        97
                    max_recovery_attempts=2,
      
        98
                    auto_recover=True,
      
        99
                    reasoning=SimpleNamespace(
      
        100
                        rollback=False,
      
        101
                        show_rollback_plan=False,
      
        102
                        completion_check=True,
      
        103
                        max_continuation_prompts=5,
      
        104
                        self_critique=False,
      
        105
                        confidence_scoring=confidence_scoring,
      
        106
                        min_confidence_for_action=min_confidence_for_action,
      
        107
                        verification=verification,
      
        108
                    ),
      
        109
                ),
      
        110
                capability_profile=SimpleNamespace(supports_native_tools=True),  # type: ignore[arg-type]
      
        111
                project_context=None,
      
        112
                permission_policy=policy,
      
        113
                permission_config_status=rule_status,
      
        114
                workflow_mode="execute",
      
        115
                safeguards=FakeSafeguards(),
      
        116
                reasoning=SimpleNamespace(
      
        117
                    assess_confidence=assess_confidence,
      
        118
                    verify_action=verify_action,
      
        119
                ),
      
        120
                recovery_context=recovery_context,
      
        121
            )
      
        122
        
        123
        
        124
        def tool_outcome(
      
        125
            *,
      
        126
            tool_call: ToolCall,
      
        127
            output: str,
      
        128
            is_error: bool,
      
        129
        ) -> ToolExecutionOutcome:
      
        130
            return ToolExecutionOutcome(
      
        131
                tool_call=tool_call,
      
        132
                state=ToolExecutionState.EXECUTED,
      
        133
                message=Message.tool_result_message(
      
        134
                    tool_call_id=tool_call.id,
      
        135
                    display_content=output,
      
        136
                    result_content=output,
      
        137
                    is_error=is_error,
      
        138
                ),
      
        139
                event_content=output,
      
        140
                is_error=is_error,
      
        141
                result_output=output,
      
        142
                registry_result=RegistryToolResult(output=output, is_error=is_error),
      
        143
            )
      
        144
        
        145
        
        146
        @pytest.mark.asyncio
      
        147
        async def test_tool_batch_confidence_gate_skips_low_confidence_actions(
      
        148
            temp_dir: Path,
      
        149
        ) -> None:
      
        150
            captured: dict[str, str] = {}
      
        151
        
        152
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        153
                captured["context"] = context
      
        154
                return ConfidenceAssessment(
      
        155
                    action=f"{tool_name} with {tool_args}",
      
        156
                    tool_name=tool_name,
      
        157
                    tool_args=tool_args,
      
        158
                    level=ConfidenceLevel.LOW,
      
        159
                    reasoning="Need more context first.",
      
        160
                    risks=["Unknown file contents"],
      
        161
                )
      
        162
        
        163
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        164
                raise AssertionError("Verification should not run here")
      
        165
        
        166
            context = build_context(
      
        167
                temp_dir=temp_dir,
      
        168
                messages=[
      
        169
                    Message(role=Role.USER, content="Inspect the README."),
      
        170
                    Message(role=Role.ASSISTANT, content="I'll read it next."),
      
        171
                ],
      
        172
                assess_confidence=assess_confidence,
      
        173
                verify_action=verify_action,
      
        174
                confidence_scoring=True,
      
        175
            )
      
        176
            gate = ToolBatchConfidenceGate(context)
      
        177
            tool_call = ToolCall(id="read-1", name="read", arguments={"file_path": "README.md"})
      
        178
            events: list[AgentEvent] = []
      
        179
        
        180
            async def emit(event: AgentEvent) -> None:
      
        181
                events.append(event)
      
        182
        
        183
            should_skip = await gate.should_skip(tool_call=tool_call, emit=emit)
      
        184
        
        185
            assert should_skip is True
      
        186
            assert "Inspect the README." in captured["context"]
      
        187
            assert context.session.messages[-1].role == Role.USER
      
        188
            assert "[LOW CONFIDENCE WARNING]" in context.session.messages[-1].content
      
        189
            assert [event.type for event in events] == ["confidence"]
      
        190
        
        191
        
        192
        @pytest.mark.asyncio
      
        193
        async def test_tool_batch_verification_gate_requests_correction(
      
        194
            temp_dir: Path,
      
        195
        ) -> None:
      
        196
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        197
                raise AssertionError("Confidence should not run here")
      
        198
        
        199
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        200
                return ActionVerification(
      
        201
                    tool_name=tool_name,
      
        202
                    tool_args=tool_args,
      
        203
                    expected_outcome="Success",
      
        204
                    actual_result=result,
      
        205
                    verified=True,
      
        206
                    discrepancies=["Output did not match the requested content"],
      
        207
                    needs_correction=True,
      
        208
                    correction_suggestion="Read the file before editing again.",
      
        209
                )
      
        210
        
        211
            context = build_context(
      
        212
                temp_dir=temp_dir,
      
        213
                messages=[],
      
        214
                assess_confidence=assess_confidence,
      
        215
                verify_action=verify_action,
      
        216
                verification=True,
      
        217
            )
      
        218
            gate = ToolBatchVerificationGate(context)
      
        219
            tool_call = ToolCall(id="read-1", name="read", arguments={"file_path": "README.md"})
      
        220
            outcome = tool_outcome(tool_call=tool_call, output="unexpected contents", is_error=False)
      
        221
            events: list[AgentEvent] = []
      
        222
        
        223
            async def emit(event: AgentEvent) -> None:
      
        224
                events.append(event)
      
        225
        
        226
            should_continue = await gate.should_continue(
      
        227
                tool_call=tool_call,
      
        228
                outcome=outcome,
      
        229
                emit=emit,
      
        230
            )
      
        231
        
        232
            assert should_continue is True
      
        233
            assert context.session.messages[-1].role == Role.USER
      
        234
            assert "[VERIFICATION FAILED]" in context.session.messages[-1].content
      
        235
            assert [event.type for event in events] == ["verification"]
      
        236
        
        237
        
        238
        @pytest.mark.asyncio
      
        239
        async def test_tool_batch_recovery_controller_returns_follow_up(
      
        240
            temp_dir: Path,
      
        241
        ) -> None:
      
        242
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        243
                raise AssertionError("Confidence should not run here")
      
        244
        
        245
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        246
                raise AssertionError("Verification should not run here")
      
        247
        
        248
            context = build_context(
      
        249
                temp_dir=temp_dir,
      
        250
                messages=[],
      
        251
                assess_confidence=assess_confidence,
      
        252
                verify_action=verify_action,
      
        253
            )
      
        254
            context.session.current_task = (
      
        255
                "Update index.html so every chapter link and title matches the real HTML files in chapters/."
      
        256
            )
      
        257
            controller = ToolBatchRecoveryController(context)
      
        258
            tool_call = ToolCall(id="bash-1", name="bash", arguments={"command": "pytest"})
      
        259
            outcome = tool_outcome(tool_call=tool_call, output="command failed", is_error=True)
      
        260
            events: list[AgentEvent] = []
      
        261
        
        262
            async def emit(event: AgentEvent) -> None:
      
        263
                events.append(event)
      
        264
        
        265
            follow_up = await controller.build_follow_up(
      
        266
                tool_call=tool_call,
      
        267
                outcome=outcome,
      
        268
                emit=emit,
      
        269
            )
      
        270
        
        271
            assert follow_up is not None
      
        272
            assert context.recovery_context is not None
      
        273
            assert "Previous attempts:" in follow_up.content
      
        274
            assert any(event.type == "recovery" for event in events)
      
        275
        
        276
        
        277
        @pytest.mark.asyncio
      
        278
        async def test_tool_batch_recovery_controller_includes_known_state_for_missing_file(
      
        279
            temp_dir: Path,
      
        280
        ) -> None:
      
        281
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        282
                raise AssertionError("Confidence should not run here")
      
        283
        
        284
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        285
                raise AssertionError("Verification should not run here")
      
        286
        
        287
            messages = [
      
        288
                Message(
      
        289
                    role=Role.TOOL,
      
        290
                    content=(
      
        291
                        "Observation [glob]: Result: "
      
        292
                        "/Users/mfwolffe/Loader/guides/fortran/chapters/01-introduction.html\n"
      
        293
                        "/Users/mfwolffe/Loader/guides/fortran/chapters/02-setup.html\n"
      
        294
                        "/Users/mfwolffe/Loader/guides/fortran/chapters/03-basics.html\n"
      
        295
                        "/Users/mfwolffe/Loader/guides/fortran/chapters/04-variables.html"
      
        296
                    ),
      
        297
                    tool_results=[],
      
        298
                ),
      
        299
                Message(
      
        300
                    role=Role.ASSISTANT,
      
        301
                    content="I already inspected the setup chapter.",
      
        302
                    tool_calls=[
      
        303
                        ToolCall(
      
        304
                            id="read-setup",
      
        305
                            name="read",
      
        306
                            arguments={"file_path": "~/Loader/guides/fortran/chapters/02-setup.html"},
      
        307
                        )
      
        308
                    ],
      
        309
                ),
      
        310
                Message.tool_result_message(
      
        311
                    tool_call_id="read-setup",
      
        312
                    display_content="<h1>Chapter 2: Setting Up Fortran</h1>\n",
      
        313
                    result_content="<h1>Chapter 2: Setting Up Fortran</h1>\n",
      
        314
                ),
      
        315
                Message(
      
        316
                    role=Role.TOOL,
      
        317
                    content=(
      
        318
                        "Observation [notepad_write_working]: Result: "
      
        319
                        "- 02-basic-syntax.html -> 02-setup.html\n"
      
        320
                        "- 03-variables-data-types.html -> 03-basics.html\n"
      
        321
                        "- 04-operators-expressions.html -> 04-variables.html"
      
        322
                    ),
      
        323
                    tool_results=[],
      
        324
                ),
      
        325
                Message(
      
        326
                    role=Role.ASSISTANT,
      
        327
                    content="I should update the index now.",
      
        328
                    tool_calls=[
      
        329
                        ToolCall(
      
        330
                            id="read-index",
      
        331
                            name="read",
      
        332
                            arguments={"file_path": "~/Loader/guides/fortran/index.html"},
      
        333
                        )
      
        334
                    ],
      
        335
                ),
      
        336
            ]
      
        337
            context = build_context(
      
        338
                temp_dir=temp_dir,
      
        339
                messages=messages,
      
        340
                assess_confidence=assess_confidence,
      
        341
                verify_action=verify_action,
      
        342
            )
      
        343
            context.session.current_task = (
      
        344
                "Update ~/Loader/guides/fortran/index.html with the right chapter links."
      
        345
            )
      
        346
            controller = ToolBatchRecoveryController(context)
      
        347
            tool_call = ToolCall(
      
        348
                id="read-missing",
      
        349
                name="read",
      
        350
                arguments={"file_path": "~/Loader/guides/fortran/chapters/04-data-types.html"},
      
        351
            )
      
        352
            outcome = tool_outcome(
      
        353
                tool_call=tool_call,
      
        354
                output="File not found: ~/Loader/guides/fortran/chapters/04-data-types.html",
      
        355
                is_error=True,
      
        356
            )
      
        357
        
        358
            events: list[AgentEvent] = []
      
        359
        
        360
            async def emit(event: AgentEvent) -> None:
      
        361
                events.append(event)
      
        362
        
        363
            follow_up = await controller.build_follow_up(
      
        364
                tool_call=tool_call,
      
        365
                outcome=outcome,
      
        366
                emit=emit,
      
        367
            )
      
        368
        
        369
            assert follow_up is not None
      
        370
            assert "## CONTINUE FROM KNOWN STATE" in follow_up.content
      
        371
            assert "apply the fix using confirmed findings" in follow_up.content
      
        372
            assert "## ACTION BIAS FOR THIS RECOVERY" in follow_up.content
      
        373
            assert "Prefer edit/write/patch on the target file" in follow_up.content
      
        374
            assert "04-variables.html" in follow_up.content
      
        375
            assert "02-basic-syntax.html -> 02-setup.html" in follow_up.content
      
        376
            assert any(event.type == "recovery" for event in events)
      
        377
        
        378
        
        379
        @pytest.mark.asyncio
      
        380
        async def test_tool_batch_recovery_controller_suggests_known_sibling_files(
      
        381
            temp_dir: Path,
      
        382
        ) -> None:
      
        383
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        384
                raise AssertionError("Confidence should not run here")
      
        385
        
        386
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        387
                raise AssertionError("Verification should not run here")
      
        388
        
        389
            chapters = temp_dir / "chapters"
      
        390
            chapters.mkdir()
      
        391
            (chapters / "04-variables.html").write_text(
      
        392
                "<h1>Chapter 4: Variables and Data Types</h1>\n"
      
        393
            )
      
        394
            (chapters / "05-input-output.html").write_text(
      
        395
                "<h1>Chapter 5: Input and Output</h1>\n"
      
        396
            )
      
        397
        
        398
            messages: list[Message] = []
      
        399
            context = build_context(
      
        400
                temp_dir=temp_dir,
      
        401
                messages=messages,
      
        402
                assess_confidence=assess_confidence,
      
        403
                verify_action=verify_action,
      
        404
            )
      
        405
            controller = ToolBatchRecoveryController(context)
      
        406
            tool_call = ToolCall(
      
        407
                id="read-missing",
      
        408
                name="read",
      
        409
                arguments={"file_path": str(chapters / "04-data-types.html")},
      
        410
            )
      
        411
            outcome = tool_outcome(
      
        412
                tool_call=tool_call,
      
        413
                output=f"File not found: {chapters / '04-data-types.html'}",
      
        414
                is_error=True,
      
        415
            )
      
        416
        
        417
            events: list[AgentEvent] = []
      
        418
        
        419
            async def emit(event: AgentEvent) -> None:
      
        420
                events.append(event)
      
        421
        
        422
            follow_up = await controller.build_follow_up(
      
        423
                tool_call=tool_call,
      
        424
                outcome=outcome,
      
        425
                emit=emit,
      
        426
            )
      
        427
        
        428
            assert follow_up is not None
      
        429
            assert "## LIKELY FILE CANDIDATES" in follow_up.content
      
        430
            assert "`04-variables.html`" in follow_up.content
      
        431
            assert "instead of retrying the missing path" in follow_up.content
      
        432
        
        433
        
        434
        @pytest.mark.asyncio
      
        435
        async def test_tool_batch_recovery_controller_includes_current_html_target_excerpt(
      
        436
            temp_dir: Path,
      
        437
        ) -> None:
      
        438
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        439
                raise AssertionError("Confidence should not run here")
      
        440
        
        441
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        442
                raise AssertionError("Verification should not run here")
      
        443
        
        444
            chapters = temp_dir / "chapters"
      
        445
            chapters.mkdir()
      
        446
            (chapters / "01-introduction.html").write_text(
      
        447
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        448
            )
      
        449
            (chapters / "02-setup.html").write_text(
      
        450
                "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
      
        451
            )
      
        452
            index = temp_dir / "index.html"
      
        453
            index.write_text(
      
        454
                "<h2>Table of Contents</h2>\n"
      
        455
                "<ul class=\"chapter-list\">\n"
      
        456
                "    <li><a href=\"chapters/01-introduction.html\">Chapter 1: Introduction to Fortran</a></li>\n"
      
        457
                "    <li><a href=\"chapters/02-basic-syntax.html\">Chapter 2: Basic Syntax</a></li>\n"
      
        458
                "</ul>\n"
      
        459
            )
      
        460
        
        461
            context = build_context(
      
        462
                temp_dir=temp_dir,
      
        463
                messages=[],
      
        464
                assess_confidence=assess_confidence,
      
        465
                verify_action=verify_action,
      
        466
            )
      
        467
            context.session.current_task = (
      
        468
                "Update index.html so every chapter link and title matches the real HTML files in chapters/."
      
        469
            )
      
        470
            controller = ToolBatchRecoveryController(context)
      
        471
            tool_call = ToolCall(
      
        472
                id="patch-index",
      
        473
                name="patch",
      
        474
                arguments={
      
        475
                    "file_path": str(index),
      
        476
                    "hunks": [
      
        477
                        {
      
        478
                            "old_start": 1,
      
        479
                            "old_lines": 1,
      
        480
                            "new_start": 1,
      
        481
                            "new_lines": 1,
      
        482
                            "lines": ["-bad", "+good"],
      
        483
                        }
      
        484
                    ],
      
        485
                },
      
        486
            )
      
        487
            outcome = tool_outcome(
      
        488
                tool_call=tool_call,
      
        489
                output="Patch failed: hunk did not apply cleanly",
      
        490
                is_error=True,
      
        491
            )
      
        492
        
        493
            events: list[AgentEvent] = []
      
        494
        
        495
            async def emit(event: AgentEvent) -> None:
      
        496
                events.append(event)
      
        497
        
        498
            follow_up = await controller.build_follow_up(
      
        499
                tool_call=tool_call,
      
        500
                outcome=outcome,
      
        501
                emit=emit,
      
        502
            )
      
        503
        
        504
            assert follow_up is not None
      
        505
            assert "## CURRENT TARGET EXCERPT" in follow_up.content
      
        506
            assert "- Target file:" in follow_up.content
      
        507
            assert "index.html" in follow_up.content
      
        508
            assert (
      
        509
                "Closest on-disk block to the requested patch:" in follow_up.content
      
        510
                or "Current file contents near the requested patch location:" in follow_up.content
      
        511
            )
      
        512
            assert '1 | <h2>Table of Contents</h2>' in follow_up.content
      
        513
            assert (
      
        514
                '3 |     <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>'
      
        515
                in follow_up.content
      
        516
            )
      
        517
            assert "Use the exact on-disk text above" in follow_up.content
      
        518
            assert "Verified chapter inventory:" not in follow_up.content
      
        519
        
        520
        
        521
        @pytest.mark.asyncio
      
        522
        async def test_tool_batch_recovery_controller_includes_current_target_excerpt_for_edit_mismatch(
      
        523
            temp_dir: Path,
      
        524
        ) -> None:
      
        525
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        526
                raise AssertionError("Confidence should not run here")
      
        527
        
        528
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        529
                raise AssertionError("Verification should not run here")
      
        530
        
        531
            guide = temp_dir / "guide.md"
      
        532
            guide.write_text(
      
        533
                "# Loader Guide\n"
      
        534
                "\n"
      
        535
                "## Overview\n"
      
        536
                "Loader helps agentic coding workflows.\n"
      
        537
                "\n"
      
        538
                "## Status\n"
      
        539
                "The runtime is stable.\n"
      
        540
            )
      
        541
        
        542
            context = build_context(
      
        543
                temp_dir=temp_dir,
      
        544
                messages=[],
      
        545
                assess_confidence=assess_confidence,
      
        546
                verify_action=verify_action,
      
        547
            )
      
        548
            context.session.current_task = "Update guide.md to mention the runtime is resilient."
      
        549
            controller = ToolBatchRecoveryController(context)
      
        550
            tool_call = ToolCall(
      
        551
                id="edit-guide",
      
        552
                name="edit",
      
        553
                arguments={
      
        554
                    "file_path": str(guide),
      
        555
                    "old_string": "## Runtime\nThe runtime is stable.\n",
      
        556
                    "new_string": "## Runtime\nThe runtime is resilient.\n",
      
        557
                },
      
        558
            )
      
        559
            outcome = tool_outcome(
      
        560
                tool_call=tool_call,
      
        561
                output="old_string not found in file. Make sure it matches exactly.",
      
        562
                is_error=True,
      
        563
            )
      
        564
        
        565
            follow_up = await controller.build_follow_up(
      
        566
                tool_call=tool_call,
      
        567
                outcome=outcome,
      
        568
                emit=lambda event: _noop_emit(event),
      
        569
            )
      
        570
        
        571
            assert follow_up is not None
      
        572
            assert "## CURRENT TARGET EXCERPT" in follow_up.content
      
        573
            assert "- Target file:" in follow_up.content
      
        574
            assert "guide.md" in follow_up.content
      
        575
            assert "Closest on-disk block to the requested edit:" in follow_up.content
      
        576
            assert "6 | ## Status" in follow_up.content
      
        577
            assert "7 | The runtime is stable." in follow_up.content
      
        578
            assert "replace the containing block in one edit" in follow_up.content
      
        579
            assert "## STALE EDIT RECOVERY" in follow_up.content
      
        580
            assert "do not retry it from memory" in follow_up.content
      
        581
            assert "complete replacement file" in follow_up.content
      
        582
        
        583
        
        584
        @pytest.mark.asyncio
      
        585
        async def test_tool_batch_recovery_controller_scopes_known_state_to_active_target(
      
        586
            temp_dir: Path,
      
        587
        ) -> None:
      
        588
            async def assess_confidence(
      
        589
                tool_name: str,
      
        590
                tool_args: dict,
      
        591
                context: str,
      
        592
            ) -> ConfidenceAssessment:
      
        593
                raise AssertionError("Confidence should not run here")
      
        594
        
        595
            async def verify_action(
      
        596
                tool_name: str,
      
        597
                tool_args: dict,
      
        598
                result: str,
      
        599
                expected: str = "",
      
        600
            ) -> ActionVerification:
      
        601
                raise AssertionError("Verification should not run here")
      
        602
        
        603
            nginx_chapters = temp_dir / "nginx" / "chapters"
      
        604
            nginx_chapters.mkdir(parents=True)
      
        605
            nginx_index = temp_dir / "nginx" / "index.html"
      
        606
            nginx_index.write_text(
      
        607
                "<h2>Table of Contents</h2>\n"
      
        608
                "<ul>\n"
      
        609
                '    <li><a href="chapters/01_getting_started.html">Getting Started with NGINX</a></li>\n'
      
        610
                '    <li><a href="chapters/02_installation.html">Installation</a></li>\n'
      
        611
                "</ul>\n"
      
        612
            )
      
        613
            (nginx_chapters / "01_getting_started.html").write_text(
      
        614
                "<h1>Getting Started with NGINX</h1>\n"
      
        615
            )
      
        616
        
        617
            context = build_context(
      
        618
                temp_dir=temp_dir,
      
        619
                messages=[
      
        620
                    Message(
      
        621
                        role=Role.TOOL,
      
        622
                        content=(
      
        623
                            "Observation [read]: Result: "
      
        624
                            f"{temp_dir / 'fortran' / 'index.html'}\n"
      
        625
                            "Semantic verification preview: validated 12 toc links in index.html"
      
        626
                        ),
      
        627
                    ),
      
        628
                ],
      
        629
                assess_confidence=assess_confidence,
      
        630
                verify_action=verify_action,
      
        631
            )
      
        632
            context.session.current_task = (  # type: ignore[attr-defined]
      
        633
                "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
      
        634
                "for the structure and cadence of the guide. We are going to make an all "
      
        635
                "new equally thorough guide on how to use the nginx tool. It will live in "
      
        636
                "~/Loader/guides/nginx/index.html and ~/Loader/guides/nginx/chapters/."
      
        637
            )
      
        638
            controller = ToolBatchRecoveryController(context)
      
        639
            tool_call = ToolCall(
      
        640
                id="edit-nginx",
      
        641
                name="edit",
      
        642
                arguments={
      
        643
                    "file_path": str(nginx_index),
      
        644
                    "old_string": "<ul>\n</ul>",
      
        645
                    "new_string": "<ul class=\"chapter-list\">\n</ul>",
      
        646
                },
      
        647
            )
      
        648
            outcome = tool_outcome(
      
        649
                tool_call=tool_call,
      
        650
                output=(
      
        651
                    "Tool execution error: EditTool.execute() missing 1 required positional "
      
        652
                    "argument: 'new_string'"
      
        653
                ),
      
        654
                is_error=True,
      
        655
            )
      
        656
        
        657
            events: list[AgentEvent] = []
      
        658
        
        659
            async def emit(event: AgentEvent) -> None:
      
        660
                events.append(event)
      
        661
        
        662
            follow_up = await controller.build_follow_up(
      
        663
                tool_call=tool_call,
      
        664
                outcome=outcome,
      
        665
                emit=emit,
      
        666
            )
      
        667
        
        668
            assert follow_up is not None
      
        669
            assert (
      
        670
                "Preferred next step: Update "
      
        671
                f"`{temp_dir / 'fortran' / 'index.html'}`"
      
        672
            ) not in follow_up.content
      
        673
        
        674
        
        675
        @pytest.mark.asyncio
      
        676
        async def test_tool_batch_recovery_controller_prioritizes_active_verification_repair_target(
      
        677
            temp_dir: Path,
      
        678
        ) -> None:
      
        679
            async def assess_confidence(
      
        680
                tool_name: str,
      
        681
                tool_args: dict,
      
        682
                context: str,
      
        683
            ) -> ConfidenceAssessment:
      
        684
                raise AssertionError("Confidence should not run here")
      
        685
        
        686
            async def verify_action(
      
        687
                tool_name: str,
      
        688
                tool_args: dict,
      
        689
                result: str,
      
        690
                expected: str = "",
      
        691
            ) -> ActionVerification:
      
        692
                raise AssertionError("Verification should not run here")
      
        693
        
        694
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        695
            chapters = nginx_root / "chapters"
      
        696
            chapters.mkdir(parents=True)
      
        697
            index = nginx_root / "index.html"
      
        698
            index.write_text(
      
        699
                "<ul>\n"
      
        700
                '  <li><a href="chapters/01-introduction.html">Introduction</a></li>\n'
      
        701
                "</ul>\n"
      
        702
            )
      
        703
            (chapters / "01-getting-started.html").write_text("<h1>Getting Started</h1>\n")
      
        704
        
        705
            repair_message = (
      
        706
                "[DEFINITION OF DONE CHECK FAILED]\n"
      
        707
                "Repair focus:\n"
      
        708
                f"- Fix the broken local reference `chapters/01-introduction.html` in `{index}`.\n"
      
        709
                f"- Immediate next step: edit `{index}`.\n"
      
        710
                f"- If the broken reference should remain, create `{chapters / '01-introduction.html'}`; "
      
        711
                "otherwise remove or replace `chapters/01-introduction.html`.\n"
      
        712
                "- Do not reread unrelated reference materials or restart discovery while this "
      
        713
                "concrete repair target is unresolved.\n"
      
        714
            )
      
        715
        
        716
            context = build_context(
      
        717
                temp_dir=temp_dir,
      
        718
                messages=[
      
        719
                    Message(role=Role.USER, content=repair_message),
      
        720
                    Message(
      
        721
                        role=Role.TOOL,
      
        722
                        content=(
      
        723
                            "Observation [glob]: Result: "
      
        724
                            f"{chapters / '01-getting-started.html'}"
      
        725
                        ),
      
        726
                    ),
      
        727
                ],
      
        728
                assess_confidence=assess_confidence,
      
        729
                verify_action=verify_action,
      
        730
            )
      
        731
            context.session.current_task = (  # type: ignore[attr-defined]
      
        732
                "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
      
        733
                "for the structure and cadence of the guide. We are going to make an all "
      
        734
                "new equally thorough guide on how to use the nginx tool."
      
        735
            )
      
        736
            controller = ToolBatchRecoveryController(context)
      
        737
            tool_call = ToolCall(
      
        738
                id="read-bad-path",
      
        739
                name="read",
      
        740
                arguments={"path": "~/nginx-guide/chapter1.html"},
      
        741
            )
      
        742
            outcome = tool_outcome(
      
        743
                tool_call=tool_call,
      
        744
                output="File not found: ~/nginx-guide/chapter1.html",
      
        745
                is_error=True,
      
        746
            )
      
        747
        
        748
            follow_up = await controller.build_follow_up(
      
        749
                tool_call=tool_call,
      
        750
                outcome=outcome,
      
        751
                emit=lambda event: _noop_emit(event),
      
        752
            )
      
        753
        
        754
            assert follow_up is not None
      
        755
            assert "## ACTIVE REPAIR TARGET" in follow_up.content
      
        756
            assert str(index) in follow_up.content
      
        757
            assert "chapters/01-introduction.html" in follow_up.content
      
        758
            assert "Do not go back to the original reference guide" in follow_up.content
      
        759
            assert "Current task: Have a look at ~/Loader/guides/fortran" not in follow_up.content
      
        760
            assert "~/nginx-guide/chapter1.html" in follow_up.content
      
        761
        
        762
        
        763
        @pytest.mark.asyncio
      
        764
        async def test_tool_batch_recovery_controller_reuses_context_for_related_missing_files(
      
        765
            temp_dir: Path,
      
        766
        ) -> None:
      
        767
            async def assess_confidence(
      
        768
                tool_name: str,
      
        769
                tool_args: dict,
      
        770
                context: str,
      
        771
            ) -> ConfidenceAssessment:
      
        772
                raise AssertionError("Confidence should not run here")
      
        773
        
        774
            async def verify_action(
      
        775
                tool_name: str,
      
        776
                tool_args: dict,
      
        777
                result: str,
      
        778
                expected: str = "",
      
        779
            ) -> ActionVerification:
      
        780
                raise AssertionError("Verification should not run here")
      
        781
        
        782
            existing = RecoveryContext(
      
        783
                original_tool="read",
      
        784
                original_args={"file_path": "~/Loader/guides/fortran/chapters/04-data-types.html"},
      
        785
                max_retries=3,
      
        786
            )
      
        787
            existing.add_attempt(
      
        788
                "read",
      
        789
                {"file_path": "~/Loader/guides/fortran/chapters/04-data-types.html"},
      
        790
                "File not found: ~/Loader/guides/fortran/chapters/04-data-types.html",
      
        791
            )
      
        792
            context = build_context(
      
        793
                temp_dir=temp_dir,
      
        794
                messages=[],
      
        795
                assess_confidence=assess_confidence,
      
        796
                verify_action=verify_action,
      
        797
                recovery_context=existing,
      
        798
            )
      
        799
            controller = ToolBatchRecoveryController(context)
      
        800
            tool_call = ToolCall(
      
        801
                id="read-missing-2",
      
        802
                name="read",
      
        803
                arguments={"file_path": "~/Loader/guides/fortran/chapters/02-basic-syntax.html"},
      
        804
            )
      
        805
            outcome = tool_outcome(
      
        806
                tool_call=tool_call,
      
        807
                output="File not found: ~/Loader/guides/fortran/chapters/02-basic-syntax.html",
      
        808
                is_error=True,
      
        809
            )
      
        810
        
        811
            follow_up = await controller.build_follow_up(
      
        812
                tool_call=tool_call,
      
        813
                outcome=outcome,
      
        814
                emit=lambda event: _noop_emit(event),
      
        815
            )
      
        816
        
        817
            assert follow_up is not None
      
        818
            assert context.recovery_context is existing
      
        819
            assert len(existing.attempts) == 2
      
        820
            assert "## Current attempt: 2/3" in follow_up.content
      
        821
            assert "02-basic-syntax.html" in follow_up.content
      
        822
        
        823
        
        824
        @pytest.mark.asyncio
      
        825
        async def test_tool_batch_recovery_controller_uses_generic_loop_guidance(
      
        826
            temp_dir: Path,
      
        827
        ) -> None:
      
        828
            async def assess_confidence(
      
        829
                tool_name: str,
      
        830
                tool_args: dict,
      
        831
                context: str,
      
        832
            ) -> ConfidenceAssessment:
      
        833
                raise AssertionError("Confidence should not run here")
      
        834
        
        835
            async def verify_action(
      
        836
                tool_name: str,
      
        837
                tool_args: dict,
      
        838
                result: str,
      
        839
                expected: str = "",
      
        840
            ) -> ActionVerification:
      
        841
                raise AssertionError("Verification should not run here")
      
        842
        
        843
            existing = RecoveryContext(
      
        844
                original_tool="read",
      
        845
                original_args={"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
      
        846
                max_retries=3,
      
        847
            )
      
        848
            existing.add_attempt(
      
        849
                "read",
      
        850
                {"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
      
        851
                "File not found: ~/Loader/guides/nginx/chapters/01-introduction.html",
      
        852
            )
      
        853
            context = build_context(
      
        854
                temp_dir=temp_dir,
      
        855
                messages=[],
      
        856
                assess_confidence=assess_confidence,
      
        857
                verify_action=verify_action,
      
        858
                recovery_context=existing,
      
        859
            )
      
        860
            controller = ToolBatchRecoveryController(context)
      
        861
            tool_call = ToolCall(
      
        862
                id="read-missing-repeat",
      
        863
                name="read",
      
        864
                arguments={"file_path": "~/Loader/guides/nginx/chapters/01-introduction.html"},
      
        865
            )
      
        866
            outcome = tool_outcome(
      
        867
                tool_call=tool_call,
      
        868
                output="File not found: ~/Loader/guides/nginx/chapters/01-introduction.html",
      
        869
                is_error=True,
      
        870
            )
      
        871
            events: list[AgentEvent] = []
      
        872
        
        873
            async def emit(event: AgentEvent) -> None:
      
        874
                events.append(event)
      
        875
        
        876
            follow_up = await controller.build_follow_up(
      
        877
                tool_call=tool_call,
      
        878
                outcome=outcome,
      
        879
                emit=emit,
      
        880
            )
      
        881
        
        882
            assert follow_up is not None
      
        883
            assert any(event.type == "error" for event in events)
      
        884
            error_event = next(event for event in events if event.type == "error")
      
        885
            assert "read a config file first" not in error_event.content
      
        886
            assert "verify the current result" in error_event.content
      
        887
        
        888
        
        889
        @pytest.mark.asyncio
      
        890
        async def test_tool_batch_recovery_controller_surfaces_missing_write_payload_fix(
      
        891
            temp_dir: Path,
      
        892
        ) -> None:
      
        893
            async def assess_confidence(
      
        894
                tool_name: str,
      
        895
                tool_args: dict,
      
        896
                context: str,
      
        897
            ) -> ConfidenceAssessment:
      
        898
                raise AssertionError("Confidence should not run here")
      
        899
        
        900
            async def verify_action(
      
        901
                tool_name: str,
      
        902
                tool_args: dict,
      
        903
                result: str,
      
        904
                expected: str = "",
      
        905
            ) -> ActionVerification:
      
        906
                raise AssertionError("Verification should not run here")
      
        907
        
        908
            context = build_context(
      
        909
                temp_dir=temp_dir,
      
        910
                messages=[
      
        911
                    Message(
      
        912
                        role=Role.USER,
      
        913
                        content="Create ~/Loader/guides/nginx/index.html",
      
        914
                    )
      
        915
                ],
      
        916
                assess_confidence=assess_confidence,
      
        917
                verify_action=verify_action,
      
        918
            )
      
        919
            controller = ToolBatchRecoveryController(context)
      
        920
            tool_call = ToolCall(
      
        921
                id="write-metadata-only",
      
        922
                name="write",
      
        923
                arguments={
      
        924
                    "file_path": "~/Loader/guides/nginx/index.html",
      
        925
                    "content_chars": 1354,
      
        926
                    "content_lines": 30,
      
        927
                },
      
        928
            )
      
        929
            outcome = tool_outcome(
      
        930
                tool_call=tool_call,
      
        931
                output=(
      
        932
                    "[Validation warning] Writing empty content to file\n"
      
        933
                    "Tool execution error: WriteTool.execute() missing 1 required "
      
        934
                    "positional argument: 'content'"
      
        935
                ),
      
        936
                is_error=True,
      
        937
            )
      
        938
        
        939
            follow_up = await controller.build_follow_up(
      
        940
                tool_call=tool_call,
      
        941
                outcome=outcome,
      
        942
                emit=lambda event: _noop_emit(event),
      
        943
            )
      
        944
        
        945
            assert follow_up is not None
      
        946
            assert "## PAYLOAD FORMAT FIX" in follow_up.content
      
        947
            assert "content_chars" in follow_up.content
      
        948
            assert "write(file_path=..., content='...')" in follow_up.content
      
        949
            assert "index.html" in follow_up.content
      
        950
        
        951
        
        952
        @pytest.mark.asyncio
      
        953
        async def test_tool_batch_recovery_controller_resets_context_for_unrelated_failures(
      
        954
            temp_dir: Path,
      
        955
        ) -> None:
      
        956
            async def assess_confidence(
      
        957
                tool_name: str,
      
        958
                tool_args: dict,
      
        959
                context: str,
      
        960
            ) -> ConfidenceAssessment:
      
        961
                raise AssertionError("Confidence should not run here")
      
        962
        
        963
            async def verify_action(
      
        964
                tool_name: str,
      
        965
                tool_args: dict,
      
        966
                result: str,
      
        967
                expected: str = "",
      
        968
            ) -> ActionVerification:
      
        969
                raise AssertionError("Verification should not run here")
      
        970
        
        971
            existing = RecoveryContext(
      
        972
                original_tool="read",
      
        973
                original_args={"file_path": "~/Loader/guides/fortran/chapters/04-data-types.html"},
      
        974
                max_retries=3,
      
        975
            )
      
        976
            existing.add_attempt(
      
        977
                "read",
      
        978
                {"file_path": "~/Loader/guides/fortran/chapters/04-data-types.html"},
      
        979
                "File not found: ~/Loader/guides/fortran/chapters/04-data-types.html",
      
        980
            )
      
        981
            context = build_context(
      
        982
                temp_dir=temp_dir,
      
        983
                messages=[],
      
        984
                assess_confidence=assess_confidence,
      
        985
                verify_action=verify_action,
      
        986
                recovery_context=existing,
      
        987
            )
      
        988
            controller = ToolBatchRecoveryController(context)
      
        989
            tool_call = ToolCall(
      
        990
                id="bash-timeout",
      
        991
                name="bash",
      
        992
                arguments={"command": "pytest"},
      
        993
            )
      
        994
            outcome = tool_outcome(
      
        995
                tool_call=tool_call,
      
        996
                output="command failed",
      
        997
                is_error=True,
      
        998
            )
      
        999
        
        1000
            follow_up = await controller.build_follow_up(
      
        1001
                tool_call=tool_call,
      
        1002
                outcome=outcome,
      
        1003
                emit=lambda event: _noop_emit(event),
      
        1004
            )
      
        1005
        
        1006
            assert follow_up is not None
      
        1007
            assert context.recovery_context is not None
      
        1008
            assert context.recovery_context is not existing
      
        1009
            assert len(context.recovery_context.attempts) == 1
      
        1010
            assert "## Current attempt: 1/2" in follow_up.content
      
        1011
        
        1012
        
        1013
        async def _noop_emit(event: AgentEvent) -> None:
      
        1014
            return None