loader Public

Watch 0 Fork 0 Star 0
Python · 231591 bytes Raw Blame History
  
        1
        """Tests for tool-batch execution on RuntimeContext."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        from pathlib import Path
      
        6
        from types import SimpleNamespace
      
        7
        
        8
        import pytest
      
        9
        
        10
        from loader.llm.base import Message, Role, ToolCall
      
        11
        from loader.runtime.context import RuntimeContext
      
        12
        from loader.runtime.dod import (
      
        13
            DefinitionOfDoneStore,
      
        14
            VerificationEvidence,
      
        15
            create_definition_of_done,
      
        16
        )
      
        17
        from loader.runtime.events import AgentEvent, TurnSummary
      
        18
        from loader.runtime.executor import ToolExecutionOutcome, ToolExecutionState
      
        19
        from loader.runtime.path_display import display_runtime_path
      
        20
        from loader.runtime.permissions import (
      
        21
            PermissionMode,
      
        22
            build_permission_policy,
      
        23
            load_permission_rules,
      
        24
        )
      
        25
        from loader.runtime.reasoning_types import (
      
        26
            ActionVerification,
      
        27
            ConfidenceAssessment,
      
        28
            ConfidenceLevel,
      
        29
        )
      
        30
        from loader.runtime.recovery import RecoveryContext
      
        31
        from loader.runtime.tool_batches import (
      
        32
            ToolBatchRunner,
      
        33
        )
      
        34
        from loader.runtime.tool_batches import (
      
        35
            _should_prioritize_missing_artifact as tool_batches_should_prioritize_missing_artifact,
      
        36
        )
      
        37
        from loader.runtime.workflow import sync_todos_to_definition_of_done
      
        38
        from loader.tools.base import ToolResult as RegistryToolResult
      
        39
        from loader.tools.base import create_default_registry
      
        40
        from tests.helpers.runtime_harness import ScriptedBackend
      
        41
        
        42
        
        43
        class FakeSession:
      
        44
            def __init__(self, messages: list[Message]) -> None:
      
        45
                self.messages = list(messages)
      
        46
                self.workflow_timeline = []
      
        47
        
        48
            def append(self, message: Message) -> None:
      
        49
                self.messages.append(message)
      
        50
        
        51
            def append_workflow_timeline_entry(self, entry) -> None:
      
        52
                self.workflow_timeline.append(entry)
      
        53
        
        54
        
        55
        class FakeCodeFilter:
      
        56
            def reset(self) -> None:
      
        57
                return None
      
        58
        
        59
        
        60
        class FakeSafeguards:
      
        61
            def __init__(self, *, detect_loop_result: tuple[bool, str] = (False, "")) -> None:
      
        62
                self.action_tracker = object()
      
        63
                self.validator = object()
      
        64
                self.code_filter = FakeCodeFilter()
      
        65
                self._detect_loop_result = detect_loop_result
      
        66
        
        67
            def filter_stream_chunk(self, content: str) -> str:
      
        68
                return content
      
        69
        
        70
            def filter_complete_content(self, content: str) -> str:
      
        71
                return content
      
        72
        
        73
            def should_steer(self) -> bool:
      
        74
                return False
      
        75
        
        76
            def get_steering_message(self) -> str | None:
      
        77
                return None
      
        78
        
        79
            def record_response(self, content: str) -> None:
      
        80
                return None
      
        81
        
        82
            def detect_text_loop(self, content: str) -> tuple[bool, str]:
      
        83
                return False, ""
      
        84
        
        85
            def detect_loop(self) -> tuple[bool, str]:
      
        86
                return self._detect_loop_result
      
        87
        
        88
        
        89
        class FakeExecutor:
      
        90
            def __init__(self, outcomes: list[ToolExecutionOutcome]) -> None:
      
        91
                self._outcomes = list(outcomes)
      
        92
                self.calls: list[ToolCall] = []
      
        93
        
        94
            async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
      
        95
                self.calls.append(tool_call)
      
        96
                if not self._outcomes:
      
        97
                    raise AssertionError("No fake tool outcome queued")
      
        98
                return self._outcomes.pop(0)
      
        99
        
        100
        
        101
        def build_context(
      
        102
            *,
      
        103
            temp_dir: Path,
      
        104
            messages: list[Message],
      
        105
            safeguards: FakeSafeguards,
      
        106
            assess_confidence,
      
        107
            verify_action,
      
        108
            recovery_context: RecoveryContext | None = None,
      
        109
            confidence_scoring: bool = False,
      
        110
            verification: bool = False,
      
        111
            auto_recover: bool = True,
      
        112
            min_confidence_for_action: int = 3,
      
        113
        ) -> RuntimeContext:
      
        114
            registry = create_default_registry(temp_dir)
      
        115
            registry.configure_workspace_root(temp_dir)
      
        116
            rule_status = load_permission_rules(temp_dir)
      
        117
            policy = build_permission_policy(
      
        118
                active_mode=PermissionMode.WORKSPACE_WRITE,
      
        119
                workspace_root=temp_dir,
      
        120
                tool_requirements=registry.get_tool_requirements(),
      
        121
                rules=rule_status.rules,
      
        122
            )
      
        123
            context = RuntimeContext(
      
        124
                project_root=temp_dir,
      
        125
                backend=ScriptedBackend(),
      
        126
                registry=registry,
      
        127
                session=FakeSession(messages),  # type: ignore[arg-type]
      
        128
                config=SimpleNamespace(
      
        129
                    force_react=False,
      
        130
                    max_recovery_attempts=2,
      
        131
                    auto_recover=auto_recover,
      
        132
                    reasoning=SimpleNamespace(
      
        133
                        rollback=False,
      
        134
                        show_rollback_plan=False,
      
        135
                        completion_check=True,
      
        136
                        max_continuation_prompts=5,
      
        137
                        self_critique=False,
      
        138
                        confidence_scoring=confidence_scoring,
      
        139
                        min_confidence_for_action=min_confidence_for_action,
      
        140
                        verification=verification,
      
        141
                    ),
      
        142
                ),
      
        143
                capability_profile=SimpleNamespace(supports_native_tools=True),  # type: ignore[arg-type]
      
        144
                project_context=None,
      
        145
                permission_policy=policy,
      
        146
                permission_config_status=rule_status,
      
        147
                workflow_mode="execute",
      
        148
                safeguards=safeguards,
      
        149
                reasoning=SimpleNamespace(
      
        150
                    assess_confidence=assess_confidence,
      
        151
                    verify_action=verify_action,
      
        152
                ),
      
        153
                recovery_context=recovery_context,
      
        154
            )
      
        155
            return context
      
        156
        
        157
        
        158
        def tool_outcome(
      
        159
            *,
      
        160
            tool_call: ToolCall,
      
        161
            output: str,
      
        162
            is_error: bool,
      
        163
            state: ToolExecutionState = ToolExecutionState.EXECUTED,
      
        164
            metadata: dict[str, object] | None = None,
      
        165
        ) -> ToolExecutionOutcome:
      
        166
            return ToolExecutionOutcome(
      
        167
                tool_call=tool_call,
      
        168
                state=state,
      
        169
                message=Message.tool_result_message(
      
        170
                    tool_call_id=tool_call.id,
      
        171
                    display_content=output,
      
        172
                    result_content=output,
      
        173
                    is_error=is_error,
      
        174
                ),
      
        175
                event_content=output,
      
        176
                is_error=is_error,
      
        177
                result_output=output,
      
        178
                registry_result=RegistryToolResult(
      
        179
                    output=output,
      
        180
                    is_error=is_error,
      
        181
                    metadata=metadata or {},
      
        182
                ),
      
        183
            )
      
        184
        
        185
        
        186
        @pytest.mark.asyncio
      
        187
        async def test_tool_batch_runner_uses_context_for_confidence_gate(temp_dir: Path) -> None:
      
        188
            captured: dict[str, str] = {}
      
        189
        
        190
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        191
                captured["context"] = context
      
        192
                return ConfidenceAssessment(
      
        193
                    action=f"{tool_name} with {tool_args}",
      
        194
                    tool_name=tool_name,
      
        195
                    tool_args=tool_args,
      
        196
                    level=ConfidenceLevel.LOW,
      
        197
                    reasoning="Need to inspect the target first.",
      
        198
                    risks=["Unknown target file"],
      
        199
                )
      
        200
        
        201
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        202
                raise AssertionError("Verification should not run for skipped actions")
      
        203
        
        204
            context = build_context(
      
        205
                temp_dir=temp_dir,
      
        206
                messages=[
      
        207
                    Message(role=Role.USER, content="Please inspect the project."),
      
        208
                    Message(role=Role.ASSISTANT, content="I will read the file next."),
      
        209
                ],
      
        210
                safeguards=FakeSafeguards(),
      
        211
                assess_confidence=assess_confidence,
      
        212
                verify_action=verify_action,
      
        213
                confidence_scoring=True,
      
        214
                min_confidence_for_action=3,
      
        215
            )
      
        216
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        217
            tool_call = ToolCall(id="read-1", name="read", arguments={"file_path": "README.md"})
      
        218
            events: list[AgentEvent] = []
      
        219
        
        220
            async def emit(event: AgentEvent) -> None:
      
        221
                events.append(event)
      
        222
        
        223
            executor = FakeExecutor([tool_outcome(tool_call=tool_call, output="unused", is_error=False)])
      
        224
            result = await runner.execute_batch(
      
        225
                tool_calls=[tool_call],
      
        226
                tool_source="assistant",
      
        227
                pending_tool_calls_seen=set(),
      
        228
                emit=emit,
      
        229
                summary=TurnSummary(final_response=""),
      
        230
                dod=create_definition_of_done("Read the docs"),
      
        231
                executor=executor,  # type: ignore[arg-type]
      
        232
                on_confirmation=None,
      
        233
                on_user_question=None,
      
        234
                emit_confirmation=None,
      
        235
                consecutive_errors=0,
      
        236
            )
      
        237
        
        238
            assert result.actions_taken == []
      
        239
            assert executor.calls == []
      
        240
            assert "Please inspect the project." in captured["context"]
      
        241
            assert context.session.messages[-1].role == Role.USER
      
        242
            assert "[LOW CONFIDENCE WARNING]" in context.session.messages[-1].content
      
        243
            event_types = [event.type for event in events]
      
        244
            assert "confidence" in event_types
      
        245
        
        246
        
        247
        @pytest.mark.asyncio
      
        248
        async def test_tool_batch_runner_tracks_recovery_with_legacy_context(temp_dir: Path) -> None:
      
        249
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        250
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        251
        
        252
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        253
                raise AssertionError("Verification should not run for failed actions")
      
        254
        
        255
            context = build_context(
      
        256
                temp_dir=temp_dir,
      
        257
                messages=[],
      
        258
                safeguards=FakeSafeguards(),
      
        259
                assess_confidence=assess_confidence,
      
        260
                verify_action=verify_action,
      
        261
                auto_recover=True,
      
        262
            )
      
        263
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        264
            tool_call = ToolCall(id="bash-1", name="bash", arguments={"command": "pytest"})
      
        265
            executor = FakeExecutor([tool_outcome(tool_call=tool_call, output="command failed", is_error=True)])
      
        266
            summary = TurnSummary(final_response="")
      
        267
            events: list[AgentEvent] = []
      
        268
        
        269
            async def emit(event: AgentEvent) -> None:
      
        270
                events.append(event)
      
        271
        
        272
            await runner.execute_batch(
      
        273
                tool_calls=[tool_call],
      
        274
                tool_source="assistant",
      
        275
                pending_tool_calls_seen=set(),
      
        276
                emit=emit,
      
        277
                summary=summary,
      
        278
                dod=create_definition_of_done("Run tests"),
      
        279
                executor=executor,  # type: ignore[arg-type]
      
        280
                on_confirmation=None,
      
        281
                on_user_question=None,
      
        282
                emit_confirmation=None,
      
        283
                consecutive_errors=0,
      
        284
            )
      
        285
        
        286
            assert context.recovery_context is not None
      
        287
            assert summary.tool_result_messages
      
        288
            assert context.session.messages[-1] == summary.tool_result_messages[-1]
      
        289
            assert any(event.type == "recovery" for event in events)
      
        290
        
        291
        
        292
        @pytest.mark.asyncio
      
        293
        async def test_tool_batch_runner_emits_tool_metadata(temp_dir: Path) -> None:
      
        294
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        295
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        296
        
        297
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        298
                raise AssertionError("Verification should not run for this scenario")
      
        299
        
        300
            context = build_context(
      
        301
                temp_dir=temp_dir,
      
        302
                messages=[],
      
        303
                safeguards=FakeSafeguards(),
      
        304
                assess_confidence=assess_confidence,
      
        305
                verify_action=verify_action,
      
        306
                auto_recover=False,
      
        307
            )
      
        308
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        309
            tool_call = ToolCall(
      
        310
                id="bash-1",
      
        311
                name="bash",
      
        312
                arguments={"command": "python -m http.server 8000", "background": True},
      
        313
            )
      
        314
            metadata = {
      
        315
                "job_id": "bash-1",
      
        316
                "status": "running",
      
        317
                "background": True,
      
        318
            }
      
        319
            executor = FakeExecutor(
      
        320
                [
      
        321
                    tool_outcome(
      
        322
                        tool_call=tool_call,
      
        323
                        output="Started bash job bash-1",
      
        324
                        is_error=False,
      
        325
                        metadata=metadata,
      
        326
                    )
      
        327
                ]
      
        328
            )
      
        329
            events: list[AgentEvent] = []
      
        330
        
        331
            async def emit(event: AgentEvent) -> None:
      
        332
                events.append(event)
      
        333
        
        334
            await runner.execute_batch(
      
        335
                tool_calls=[tool_call],
      
        336
                tool_source="assistant",
      
        337
                pending_tool_calls_seen=set(),
      
        338
                emit=emit,
      
        339
                summary=TurnSummary(final_response=""),
      
        340
                dod=create_definition_of_done("Launch a preview server"),
      
        341
                executor=executor,  # type: ignore[arg-type]
      
        342
                on_confirmation=None,
      
        343
                on_user_question=None,
      
        344
                emit_confirmation=None,
      
        345
                consecutive_errors=0,
      
        346
            )
      
        347
        
        348
            tool_result = next(event for event in events if event.type == "tool_result")
      
        349
            assert tool_result.tool_metadata == metadata
      
        350
        
        351
        
        352
        @pytest.mark.asyncio
      
        353
        async def test_tool_batch_runner_verifies_with_context_services(temp_dir: Path) -> None:
      
        354
            verification_calls: list[str] = []
      
        355
        
        356
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        357
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        358
        
        359
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        360
                verification_calls.append(result)
      
        361
                return ActionVerification(
      
        362
                    tool_name=tool_name,
      
        363
                    tool_args=tool_args,
      
        364
                    expected_outcome="Success",
      
        365
                    actual_result=result,
      
        366
                    verified=False,
      
        367
                    discrepancies=["File contents did not match"],
      
        368
                    needs_correction=True,
      
        369
                    correction_suggestion="Read the file before editing again.",
      
        370
                )
      
        371
        
        372
            existing_recovery = RecoveryContext(
      
        373
                original_tool="edit",
      
        374
                original_args={"file_path": "README.md"},
      
        375
            )
      
        376
            context = build_context(
      
        377
                temp_dir=temp_dir,
      
        378
                messages=[],
      
        379
                safeguards=FakeSafeguards(),
      
        380
                assess_confidence=assess_confidence,
      
        381
                verify_action=verify_action,
      
        382
                recovery_context=existing_recovery,
      
        383
                verification=True,
      
        384
            )
      
        385
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        386
            tool_call = ToolCall(id="read-1", name="read", arguments={"file_path": "README.md"})
      
        387
            executor = FakeExecutor([tool_outcome(tool_call=tool_call, output="file contents", is_error=False)])
      
        388
            events: list[AgentEvent] = []
      
        389
        
        390
            async def emit(event: AgentEvent) -> None:
      
        391
                events.append(event)
      
        392
        
        393
            await runner.execute_batch(
      
        394
                tool_calls=[tool_call],
      
        395
                tool_source="assistant",
      
        396
                pending_tool_calls_seen=set(),
      
        397
                emit=emit,
      
        398
                summary=TurnSummary(final_response=""),
      
        399
                dod=create_definition_of_done("Read the docs"),
      
        400
                executor=executor,  # type: ignore[arg-type]
      
        401
                on_confirmation=None,
      
        402
                on_user_question=None,
      
        403
                emit_confirmation=None,
      
        404
                consecutive_errors=0,
      
        405
            )
      
        406
        
        407
            assert verification_calls == ["file contents"]
      
        408
            assert context.recovery_context is existing_recovery
      
        409
            assert existing_recovery.successful_steps == [
      
        410
                ("read", {"file_path": "README.md"})
      
        411
            ]
      
        412
            assert context.session.messages[-1].role == Role.TOOL
      
        413
            assert context.session.messages[-1].content == "file contents"
      
        414
            assert any(event.type == "verification" for event in events)
      
        415
        
        416
        
        417
        @pytest.mark.asyncio
      
        418
        async def test_tool_batch_runner_preserves_recovery_context_across_diagnostic_success(
      
        419
            temp_dir: Path,
      
        420
        ) -> None:
      
        421
            async def assess_confidence(
      
        422
                tool_name: str,
      
        423
                tool_args: dict,
      
        424
                context: str,
      
        425
            ) -> ConfidenceAssessment:
      
        426
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        427
        
        428
            async def verify_action(
      
        429
                tool_name: str,
      
        430
                tool_args: dict,
      
        431
                result: str,
      
        432
                expected: str = "",
      
        433
            ) -> ActionVerification:
      
        434
                raise AssertionError("Verification should not run for this scenario")
      
        435
        
        436
            existing_recovery = RecoveryContext(
      
        437
                original_tool="read",
      
        438
                original_args={"file_path": "chapters/04-data-types.html"},
      
        439
            )
      
        440
            existing_recovery.add_attempt(
      
        441
                "read",
      
        442
                {"file_path": "chapters/04-data-types.html"},
      
        443
                "File not found",
      
        444
            )
      
        445
            context = build_context(
      
        446
                temp_dir=temp_dir,
      
        447
                messages=[],
      
        448
                safeguards=FakeSafeguards(),
      
        449
                assess_confidence=assess_confidence,
      
        450
                verify_action=verify_action,
      
        451
                recovery_context=existing_recovery,
      
        452
                auto_recover=False,
      
        453
            )
      
        454
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        455
            tool_call = ToolCall(
      
        456
                id="bash-1",
      
        457
                name="bash",
      
        458
                arguments={"command": "ls chapters"},
      
        459
            )
      
        460
            executor = FakeExecutor(
      
        461
                [tool_outcome(tool_call=tool_call, output="01-introduction.html", is_error=False)]
      
        462
            )
      
        463
        
        464
            summary = TurnSummary(final_response="")
      
        465
            await runner.execute_batch(
      
        466
                tool_calls=[tool_call],
      
        467
                tool_source="assistant",
      
        468
                pending_tool_calls_seen=set(),
      
        469
                emit=_noop_emit,
      
        470
                summary=summary,
      
        471
                dod=create_definition_of_done("Fix the chapter links"),
      
        472
                executor=executor,  # type: ignore[arg-type]
      
        473
                on_confirmation=None,
      
        474
                on_user_question=None,
      
        475
                emit_confirmation=None,
      
        476
                consecutive_errors=0,
      
        477
            )
      
        478
        
        479
            assert context.recovery_context is existing_recovery
      
        480
            assert existing_recovery.successful_steps == [
      
        481
                ("bash", {"command": "ls chapters"})
      
        482
            ]
      
        483
        
        484
        
        485
        @pytest.mark.asyncio
      
        486
        async def test_tool_batch_runner_clears_recovery_context_after_successful_mutation(
      
        487
            temp_dir: Path,
      
        488
        ) -> None:
      
        489
            async def assess_confidence(
      
        490
                tool_name: str,
      
        491
                tool_args: dict,
      
        492
                context: str,
      
        493
            ) -> ConfidenceAssessment:
      
        494
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        495
        
        496
            async def verify_action(
      
        497
                tool_name: str,
      
        498
                tool_args: dict,
      
        499
                result: str,
      
        500
                expected: str = "",
      
        501
            ) -> ActionVerification:
      
        502
                raise AssertionError("Verification should not run for this scenario")
      
        503
        
        504
            existing_recovery = RecoveryContext(
      
        505
                original_tool="read",
      
        506
                original_args={"file_path": "chapters/04-data-types.html"},
      
        507
            )
      
        508
            existing_recovery.add_attempt(
      
        509
                "read",
      
        510
                {"file_path": "chapters/04-data-types.html"},
      
        511
                "File not found",
      
        512
            )
      
        513
            context = build_context(
      
        514
                temp_dir=temp_dir,
      
        515
                messages=[],
      
        516
                safeguards=FakeSafeguards(),
      
        517
                assess_confidence=assess_confidence,
      
        518
                verify_action=verify_action,
      
        519
                recovery_context=existing_recovery,
      
        520
                auto_recover=False,
      
        521
            )
      
        522
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        523
            tool_call = ToolCall(
      
        524
                id="patch-1",
      
        525
                name="patch",
      
        526
                arguments={
      
        527
                    "file_path": "index.html",
      
        528
                    "hunks": [{"old_start": 1, "old_lines": 1, "new_start": 1, "new_lines": 1, "lines": ["-a", "+b"]}],
      
        529
                },
      
        530
            )
      
        531
            executor = FakeExecutor(
      
        532
                [tool_outcome(tool_call=tool_call, output="Patched index.html", is_error=False)]
      
        533
            )
      
        534
        
        535
            summary = TurnSummary(final_response="")
      
        536
            await runner.execute_batch(
      
        537
                tool_calls=[tool_call],
      
        538
                tool_source="assistant",
      
        539
                pending_tool_calls_seen=set(),
      
        540
                emit=_noop_emit,
      
        541
                summary=summary,
      
        542
                dod=create_definition_of_done("Fix the chapter links"),
      
        543
                executor=executor,  # type: ignore[arg-type]
      
        544
                on_confirmation=None,
      
        545
                on_user_question=None,
      
        546
                emit_confirmation=None,
      
        547
                consecutive_errors=0,
      
        548
            )
      
        549
        
        550
            assert context.recovery_context is None
      
        551
        
        552
        
        553
        @pytest.mark.asyncio
      
        554
        async def test_tool_batch_runner_queues_duplicate_observation_nudge(
      
        555
            temp_dir: Path,
      
        556
        ) -> None:
      
        557
            async def assess_confidence(
      
        558
                tool_name: str,
      
        559
                tool_args: dict,
      
        560
                context: str,
      
        561
            ) -> ConfidenceAssessment:
      
        562
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        563
        
        564
            async def verify_action(
      
        565
                tool_name: str,
      
        566
                tool_args: dict,
      
        567
                result: str,
      
        568
                expected: str = "",
      
        569
            ) -> ActionVerification:
      
        570
                raise AssertionError("Verification should not run for this scenario")
      
        571
        
        572
            messages = [
      
        573
                Message(
      
        574
                    role=Role.TOOL,
      
        575
                    content=(
      
        576
                        "Observation [glob]: Result: "
      
        577
                        f"{temp_dir}/chapters/01-introduction.html\n"
      
        578
                        f"{temp_dir}/chapters/02-setup.html\n"
      
        579
                        f"{temp_dir}/chapters/03-basics.html"
      
        580
                    ),
      
        581
                    tool_results=[],
      
        582
                ),
      
        583
                Message(
      
        584
                    role=Role.ASSISTANT,
      
        585
                    content="I already inspected the first chapter title.",
      
        586
                    tool_calls=[
      
        587
                        ToolCall(
      
        588
                            id="read-ch1",
      
        589
                            name="read",
      
        590
                            arguments={"file_path": str(temp_dir / 'chapters' / '01-introduction.html')},
      
        591
                        )
      
        592
                    ],
      
        593
                ),
      
        594
                Message.tool_result_message(
      
        595
                    tool_call_id="read-ch1",
      
        596
                    display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
      
        597
                    result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
      
        598
                ),
      
        599
                Message(
      
        600
                    role=Role.ASSISTANT,
      
        601
                    content="I should update the index now.",
      
        602
                    tool_calls=[
      
        603
                        ToolCall(
      
        604
                            id="read-index",
      
        605
                            name="read",
      
        606
                            arguments={"file_path": str(temp_dir / 'index.html')},
      
        607
                        )
      
        608
                    ],
      
        609
                ),
      
        610
            ]
      
        611
            context = build_context(
      
        612
                temp_dir=temp_dir,
      
        613
                messages=messages,
      
        614
                safeguards=FakeSafeguards(),
      
        615
                assess_confidence=assess_confidence,
      
        616
                verify_action=verify_action,
      
        617
                auto_recover=False,
      
        618
            )
      
        619
            (temp_dir / "chapters").mkdir()
      
        620
            (temp_dir / "index.html").write_text("<ul></ul>\n")
      
        621
            (temp_dir / "chapters" / "01-introduction.html").write_text("<h1>Intro</h1>\n")
      
        622
            (temp_dir / "chapters" / "02-setup.html").write_text("<h1>Setup</h1>\n")
      
        623
            (temp_dir / "chapters" / "03-basics.html").write_text("<h1>Basics</h1>\n")
      
        624
            implementation_plan = temp_dir / "implementation.md"
      
        625
            implementation_plan.write_text(
      
        626
                "\n".join(
      
        627
                    [
      
        628
                        "# Implementation Plan",
      
        629
                        "",
      
        630
                        "## File Changes",
      
        631
                        f"- `{temp_dir / 'index.html'}`",
      
        632
                        f"- `{temp_dir / 'chapters' / '01-introduction.html'}`",
      
        633
                        f"- `{temp_dir / 'chapters' / '02-setup.html'}`",
      
        634
                        f"- `{temp_dir / 'chapters' / '03-basics.html'}`",
      
        635
                        f"- `{temp_dir / 'chapters' / '04-variables.html'}`",
      
        636
                    ]
      
        637
                )
      
        638
            )
      
        639
            context.session.current_task = (
      
        640
                f"Update {temp_dir / 'index.html'} with the right chapter links."
      
        641
            )
      
        642
            persistent_messages: list[str] = []
      
        643
            ephemeral_messages: list[str] = []
      
        644
            context.queue_steering_message_callback = persistent_messages.append
      
        645
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        646
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        647
            tool_call = ToolCall(
      
        648
                id="read-dup",
      
        649
                name="read",
      
        650
                arguments={"file_path": str(temp_dir / "index.html")},
      
        651
            )
      
        652
            duplicate_message = (
      
        653
                "[Skipped - duplicate action: Already read "
      
        654
                f"{temp_dir / 'index.html'} recently without any intervening changes; "
      
        655
                "reuse the earlier read result instead of rereading]"
      
        656
            )
      
        657
            executor = FakeExecutor(
      
        658
                [
      
        659
                    ToolExecutionOutcome(
      
        660
                        tool_call=tool_call,
      
        661
                        state=ToolExecutionState.DUPLICATE,
      
        662
                        message=Message.tool_result_message(
      
        663
                            tool_call_id=tool_call.id,
      
        664
                            display_content=duplicate_message,
      
        665
                            result_content=duplicate_message,
      
        666
                        ),
      
        667
                        event_content=duplicate_message,
      
        668
                        is_error=False,
      
        669
                        result_output=duplicate_message,
      
        670
                    )
      
        671
                ]
      
        672
            )
      
        673
        
        674
            summary = TurnSummary(final_response="")
      
        675
            dod = create_definition_of_done("Fix the chapter links")
      
        676
            dod.implementation_plan = str(implementation_plan)
      
        677
            dod.pending_items.append("Create the remaining chapter files")
      
        678
            await runner.execute_batch(
      
        679
                tool_calls=[tool_call],
      
        680
                tool_source="assistant",
      
        681
                pending_tool_calls_seen=set(),
      
        682
                emit=_noop_emit,
      
        683
                summary=summary,
      
        684
                dod=dod,
      
        685
                executor=executor,  # type: ignore[arg-type]
      
        686
                on_confirmation=None,
      
        687
                on_user_question=None,
      
        688
                emit_confirmation=None,
      
        689
                consecutive_errors=0,
      
        690
            )
      
        691
        
        692
            assert len(persistent_messages) == 1
      
        693
            assert "Reuse the earlier observation instead of repeating it." in persistent_messages[0]
      
        694
            assert "A declared output artifact is still missing." in persistent_messages[0]
      
        695
            assert "Resume by creating `04-variables.html` now." in persistent_messages[0]
      
        696
            assert (
      
        697
                "Prefer one `write` call for "
      
        698
                f"`{display_runtime_path(temp_dir / 'chapters' / '04-variables.html')}` instead of more rereads."
      
        699
                in persistent_messages[0]
      
        700
            )
      
        701
            assert ephemeral_messages == []
      
        702
        
        703
        
        704
        @pytest.mark.asyncio
      
        705
        async def test_tool_batch_runner_duplicate_read_keeps_root_declared_missing_html_output_active(
      
        706
            temp_dir: Path,
      
        707
        ) -> None:
      
        708
            async def assess_confidence(
      
        709
                tool_name: str,
      
        710
                tool_args: dict,
      
        711
                context: str,
      
        712
            ) -> ConfidenceAssessment:
      
        713
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        714
        
        715
            async def verify_action(
      
        716
                tool_name: str,
      
        717
                tool_args: dict,
      
        718
                result: str,
      
        719
                expected: str = "",
      
        720
            ) -> ActionVerification:
      
        721
                raise AssertionError("Verification should not run for this scenario")
      
        722
        
        723
            guide_root = temp_dir / "guide"
      
        724
            chapters = guide_root / "chapters"
      
        725
            chapters.mkdir(parents=True)
      
        726
            index = guide_root / "index.html"
      
        727
            chapter_one = chapters / "01-introduction.html"
      
        728
            index.write_text(
      
        729
                '<a href="chapters/01-introduction.html">Intro</a>\n'
      
        730
                '<a href="chapters/02-installation.html">Install</a>\n'
      
        731
            )
      
        732
            chapter_one.write_text("<h1>Intro</h1>\n")
      
        733
        
        734
            implementation_plan = temp_dir / "implementation.md"
      
        735
            implementation_plan.write_text(
      
        736
                "\n".join(
      
        737
                    [
      
        738
                        "# Implementation Plan",
      
        739
                        "",
      
        740
                        "## File Changes",
      
        741
                        f"- `{index}`",
      
        742
                        f"- `{chapters}/` (directory for chapter files)",
      
        743
                    ]
      
        744
                )
      
        745
            )
      
        746
        
        747
            messages = [
      
        748
                Message(
      
        749
                    role=Role.ASSISTANT,
      
        750
                    content="I should keep building the guide.",
      
        751
                    tool_calls=[
      
        752
                        ToolCall(
      
        753
                            id="read-index",
      
        754
                            name="read",
      
        755
                            arguments={"file_path": str(index)},
      
        756
                        )
      
        757
                    ],
      
        758
                ),
      
        759
            ]
      
        760
            context = build_context(
      
        761
                temp_dir=temp_dir,
      
        762
                messages=messages,
      
        763
                safeguards=FakeSafeguards(),
      
        764
                assess_confidence=assess_confidence,
      
        765
                verify_action=verify_action,
      
        766
                auto_recover=False,
      
        767
            )
      
        768
            context.session.current_task = f"Build the guide rooted at {index}."
      
        769
            persistent_messages: list[str] = []
      
        770
            ephemeral_messages: list[str] = []
      
        771
            context.queue_steering_message_callback = persistent_messages.append
      
        772
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        773
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        774
            tool_call = ToolCall(
      
        775
                id="read-dup-rooted",
      
        776
                name="read",
      
        777
                arguments={"file_path": str(index)},
      
        778
            )
      
        779
            duplicate_message = (
      
        780
                "[Skipped - duplicate action: Already read "
      
        781
                f"{index} recently without any intervening changes; "
      
        782
                "reuse the earlier read result instead of rereading]"
      
        783
            )
      
        784
            executor = FakeExecutor(
      
        785
                [
      
        786
                    ToolExecutionOutcome(
      
        787
                        tool_call=tool_call,
      
        788
                        state=ToolExecutionState.DUPLICATE,
      
        789
                        message=Message.tool_result_message(
      
        790
                            tool_call_id=tool_call.id,
      
        791
                            display_content=duplicate_message,
      
        792
                            result_content=duplicate_message,
      
        793
                        ),
      
        794
                        event_content=duplicate_message,
      
        795
                        is_error=False,
      
        796
                        result_output=duplicate_message,
      
        797
                    )
      
        798
                ]
      
        799
            )
      
        800
        
        801
            summary = TurnSummary(final_response="")
      
        802
            dod = create_definition_of_done("Create a multi-file HTML guide with chapters.")
      
        803
            dod.implementation_plan = str(implementation_plan)
      
        804
            dod.touched_files = [str(index), str(chapter_one)]
      
        805
            dod.completed_items = ["Create chapter files with appropriate content"]
      
        806
            dod.pending_items.append("Create the remaining chapter files")
      
        807
        
        808
            await runner.execute_batch(
      
        809
                tool_calls=[tool_call],
      
        810
                tool_source="assistant",
      
        811
                pending_tool_calls_seen=set(),
      
        812
                emit=_noop_emit,
      
        813
                summary=summary,
      
        814
                dod=dod,
      
        815
                executor=executor,  # type: ignore[arg-type]
      
        816
                on_confirmation=None,
      
        817
                on_user_question=None,
      
        818
                emit_confirmation=None,
      
        819
                consecutive_errors=0,
      
        820
            )
      
        821
        
        822
            assert len(persistent_messages) == 1
      
        823
            assert "Create the remaining chapter files" in persistent_messages[0]
      
        824
            assert "Resume by creating `02-installation.html` now." in persistent_messages[0]
      
        825
            assert "All explicitly planned artifacts already exist on disk." not in persistent_messages[0]
      
        826
            assert ephemeral_messages == []
      
        827
        
        828
        
        829
        @pytest.mark.asyncio
      
        830
        async def test_tool_batch_runner_todo_write_does_not_regress_completed_file_todo(
      
        831
            temp_dir: Path,
      
        832
        ) -> None:
      
        833
            async def assess_confidence(
      
        834
                tool_name: str,
      
        835
                tool_args: dict,
      
        836
                context: str,
      
        837
            ) -> ConfidenceAssessment:
      
        838
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        839
        
        840
            async def verify_action(
      
        841
                tool_name: str,
      
        842
                tool_args: dict,
      
        843
                result: str,
      
        844
                expected: str = "",
      
        845
            ) -> ActionVerification:
      
        846
                raise AssertionError("Verification should not run for this scenario")
      
        847
        
        848
            context = build_context(
      
        849
                temp_dir=temp_dir,
      
        850
                messages=[],
      
        851
                safeguards=FakeSafeguards(),
      
        852
                assess_confidence=assess_confidence,
      
        853
                verify_action=verify_action,
      
        854
                auto_recover=False,
      
        855
            )
      
        856
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        857
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        858
            sync_todos_to_definition_of_done(
      
        859
                dod,
      
        860
                [
      
        861
                    {
      
        862
                        "content": "Create 03-first-website.html",
      
        863
                        "active_form": "Creating 03-first-website.html",
      
        864
                        "status": "pending",
      
        865
                    },
      
        866
                    {
      
        867
                        "content": "Create 04-configuration-basics.html",
      
        868
                        "active_form": "Creating 04-configuration-basics.html",
      
        869
                        "status": "pending",
      
        870
                    },
      
        871
                ],
      
        872
            )
      
        873
        
        874
            chapter_path = temp_dir / "guides" / "nginx" / "chapters" / "03-first-website.html"
      
        875
            chapter_path.parent.mkdir(parents=True)
      
        876
            write_call = ToolCall(
      
        877
                id="write-ch3",
      
        878
                name="write",
      
        879
                arguments={"file_path": str(chapter_path), "content": "<html></html>\n"},
      
        880
            )
      
        881
            stale_todo_call = ToolCall(
      
        882
                id="todo-stale",
      
        883
                name="TodoWrite",
      
        884
                arguments={
      
        885
                    "todos": [
      
        886
                        {
      
        887
                            "content": "Create 03-first-website.html",
      
        888
                            "active_form": "Creating 03-first-website.html",
      
        889
                            "status": "pending",
      
        890
                        },
      
        891
                        {
      
        892
                            "content": "Create 04-configuration-basics.html",
      
        893
                            "active_form": "Creating 04-configuration-basics.html",
      
        894
                            "status": "pending",
      
        895
                        },
      
        896
                    ]
      
        897
                },
      
        898
            )
      
        899
            executor = FakeExecutor(
      
        900
                [
      
        901
                    tool_outcome(
      
        902
                        tool_call=write_call,
      
        903
                        output=f"Successfully wrote {chapter_path}",
      
        904
                        is_error=False,
      
        905
                    ),
      
        906
                    tool_outcome(
      
        907
                        tool_call=stale_todo_call,
      
        908
                        output="Todos updated",
      
        909
                        is_error=False,
      
        910
                        metadata={
      
        911
                            "new_todos": [
      
        912
                                {
      
        913
                                    "content": "Create 03-first-website.html",
      
        914
                                    "active_form": "Creating 03-first-website.html",
      
        915
                                    "status": "pending",
      
        916
                                },
      
        917
                                {
      
        918
                                    "content": "Create 04-configuration-basics.html",
      
        919
                                    "active_form": "Creating 04-configuration-basics.html",
      
        920
                                    "status": "pending",
      
        921
                                },
      
        922
                            ]
      
        923
                        },
      
        924
                    ),
      
        925
                ]
      
        926
            )
      
        927
        
        928
            summary = TurnSummary(final_response="")
      
        929
            await runner.execute_batch(
      
        930
                tool_calls=[write_call, stale_todo_call],
      
        931
                tool_source="assistant",
      
        932
                pending_tool_calls_seen=set(),
      
        933
                emit=_noop_emit,
      
        934
                summary=summary,
      
        935
                dod=dod,
      
        936
                executor=executor,  # type: ignore[arg-type]
      
        937
                on_confirmation=None,
      
        938
                on_user_question=None,
      
        939
                emit_confirmation=None,
      
        940
                consecutive_errors=0,
      
        941
            )
      
        942
        
        943
            assert "Create 03-first-website.html" in dod.completed_items
      
        944
            assert "Create 03-first-website.html" not in dod.pending_items
      
        945
            assert "Create 04-configuration-basics.html" in dod.pending_items
      
        946
        
        947
        
        948
        @pytest.mark.asyncio
      
        949
        async def test_tool_batch_runner_proactively_queues_verified_html_inventory(
      
        950
            temp_dir: Path,
      
        951
        ) -> None:
      
        952
            async def assess_confidence(
      
        953
                tool_name: str,
      
        954
                tool_args: dict,
      
        955
                context: str,
      
        956
            ) -> ConfidenceAssessment:
      
        957
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        958
        
        959
            async def verify_action(
      
        960
                tool_name: str,
      
        961
                tool_args: dict,
      
        962
                result: str,
      
        963
                expected: str = "",
      
        964
            ) -> ActionVerification:
      
        965
                raise AssertionError("Verification should not run for this scenario")
      
        966
        
        967
            chapters = temp_dir / "chapters"
      
        968
            chapters.mkdir()
      
        969
            (chapters / "01-introduction.html").write_text(
      
        970
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        971
            )
      
        972
            (chapters / "02-setup.html").write_text(
      
        973
                "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
      
        974
            )
      
        975
            (temp_dir / "index.html").write_text("<ul></ul>\n")
      
        976
        
        977
            context = build_context(
      
        978
                temp_dir=temp_dir,
      
        979
                messages=[],
      
        980
                safeguards=FakeSafeguards(),
      
        981
                assess_confidence=assess_confidence,
      
        982
                verify_action=verify_action,
      
        983
                auto_recover=False,
      
        984
            )
      
        985
            context.session.current_task = (
      
        986
                f"Update {temp_dir / 'index.html'} so the chapter links match the sibling files."
      
        987
            )
      
        988
            persistent_messages: list[str] = []
      
        989
            ephemeral_messages: list[str] = []
      
        990
            context.queue_steering_message_callback = persistent_messages.append
      
        991
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        992
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        993
            tool_call = ToolCall(
      
        994
                id="glob-1",
      
        995
                name="glob",
      
        996
                arguments={"path": str(chapters), "pattern": "*.html"},
      
        997
            )
      
        998
            executor = FakeExecutor(
      
        999
                [
      
        1000
                    tool_outcome(
      
        1001
                        tool_call=tool_call,
      
        1002
                        output="\n".join(
      
        1003
                            [
      
        1004
                                str(chapters / "01-introduction.html"),
      
        1005
                                str(chapters / "02-setup.html"),
      
        1006
                            ]
      
        1007
                        ),
      
        1008
                        is_error=False,
      
        1009
                    )
      
        1010
                ]
      
        1011
            )
      
        1012
        
        1013
            summary = TurnSummary(final_response="")
      
        1014
            await runner.execute_batch(
      
        1015
                tool_calls=[tool_call],
      
        1016
                tool_source="assistant",
      
        1017
                pending_tool_calls_seen=set(),
      
        1018
                emit=_noop_emit,
      
        1019
                summary=summary,
      
        1020
                dod=create_definition_of_done("Fix the chapter links"),
      
        1021
                executor=executor,  # type: ignore[arg-type]
      
        1022
                on_confirmation=None,
      
        1023
                on_user_question=None,
      
        1024
                emit_confirmation=None,
      
        1025
                consecutive_errors=0,
      
        1026
            )
      
        1027
        
        1028
            assert persistent_messages == []
      
        1029
            assert ephemeral_messages == []
      
        1030
            assert len(summary.tool_result_messages) == 1
      
        1031
            assert "Verified chapter inventory:" not in summary.tool_result_messages[0].content
      
        1032
        
        1033
        
        1034
        @pytest.mark.asyncio
      
        1035
        async def test_tool_batch_runner_marks_validated_html_toc_completion_after_successful_edit(
      
        1036
            temp_dir: Path,
      
        1037
        ) -> None:
      
        1038
            async def assess_confidence(
      
        1039
                tool_name: str,
      
        1040
                tool_args: dict,
      
        1041
                context: str,
      
        1042
            ) -> ConfidenceAssessment:
      
        1043
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1044
        
        1045
            async def verify_action(
      
        1046
                tool_name: str,
      
        1047
                tool_args: dict,
      
        1048
                result: str,
      
        1049
                expected: str = "",
      
        1050
            ) -> ActionVerification:
      
        1051
                raise AssertionError("Verification should not run for this scenario")
      
        1052
        
        1053
            chapters = temp_dir / "chapters"
      
        1054
            chapters.mkdir()
      
        1055
            (chapters / "01-introduction.html").write_text(
      
        1056
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        1057
            )
      
        1058
            (chapters / "02-setup.html").write_text(
      
        1059
                "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
      
        1060
            )
      
        1061
            index_path = temp_dir / "index.html"
      
        1062
            old_block = (
      
        1063
                '<ul class="chapter-list">\n'
      
        1064
                '    <li><a href="chapters/01-old.html">Chapter 1: Old</a></li>\n'
      
        1065
                '    <li><a href="chapters/02-old.html">Chapter 2: Old</a></li>\n'
      
        1066
                "</ul>\n"
      
        1067
            )
      
        1068
            new_block = (
      
        1069
                '<ul class="chapter-list">\n'
      
        1070
                '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
      
        1071
                '    <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
      
        1072
                "</ul>\n"
      
        1073
            )
      
        1074
            index_path.write_text(new_block)
      
        1075
        
        1076
            context = build_context(
      
        1077
                temp_dir=temp_dir,
      
        1078
                messages=[],
      
        1079
                safeguards=FakeSafeguards(),
      
        1080
                assess_confidence=assess_confidence,
      
        1081
                verify_action=verify_action,
      
        1082
                auto_recover=False,
      
        1083
            )
      
        1084
            context.session.current_task = (
      
        1085
                "Update index.html so every chapter link and title matches the real HTML files in chapters/."
      
        1086
            )
      
        1087
            persistent_messages: list[str] = []
      
        1088
            ephemeral_messages: list[str] = []
      
        1089
            context.queue_steering_message_callback = persistent_messages.append
      
        1090
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1091
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1092
            tool_call = ToolCall(
      
        1093
                id="edit-1",
      
        1094
                name="edit",
      
        1095
                arguments={
      
        1096
                    "file_path": str(index_path),
      
        1097
                    "old_string": old_block,
      
        1098
                    "new_string": new_block,
      
        1099
                },
      
        1100
            )
      
        1101
            executor = FakeExecutor(
      
        1102
                [
      
        1103
                    tool_outcome(
      
        1104
                        tool_call=tool_call,
      
        1105
                        output=f"Successfully edited {index_path}",
      
        1106
                        is_error=False,
      
        1107
                    )
      
        1108
                ]
      
        1109
            )
      
        1110
        
        1111
            summary = TurnSummary(final_response="")
      
        1112
            await runner.execute_batch(
      
        1113
                tool_calls=[tool_call],
      
        1114
                tool_source="assistant",
      
        1115
                pending_tool_calls_seen=set(),
      
        1116
                emit=_noop_emit,
      
        1117
                summary=summary,
      
        1118
                dod=create_definition_of_done(
      
        1119
                    "Update index.html so every chapter link and title matches the real HTML files in chapters/."
      
        1120
                ),
      
        1121
                executor=executor,  # type: ignore[arg-type]
      
        1122
                on_confirmation=None,
      
        1123
                on_user_question=None,
      
        1124
                emit_confirmation=None,
      
        1125
                consecutive_errors=0,
      
        1126
            )
      
        1127
        
        1128
            assert all(
      
        1129
                "Semantic verification preview:" not in message.content
      
        1130
                for message in summary.tool_result_messages
      
        1131
            )
      
        1132
            assert persistent_messages == []
      
        1133
            assert ephemeral_messages == []
      
        1134
        
        1135
        
        1136
        @pytest.mark.asyncio
      
        1137
        async def test_tool_batch_runner_does_not_apply_html_toc_handoff_to_reference_read(
      
        1138
            temp_dir: Path,
      
        1139
        ) -> None:
      
        1140
            async def assess_confidence(
      
        1141
                tool_name: str,
      
        1142
                tool_args: dict,
      
        1143
                context: str,
      
        1144
            ) -> ConfidenceAssessment:
      
        1145
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1146
        
        1147
            async def verify_action(
      
        1148
                tool_name: str,
      
        1149
                tool_args: dict,
      
        1150
                result: str,
      
        1151
                expected: str = "",
      
        1152
            ) -> ActionVerification:
      
        1153
                raise AssertionError("Verification should not run for this scenario")
      
        1154
        
        1155
            chapters = temp_dir / "chapters"
      
        1156
            chapters.mkdir()
      
        1157
            (chapters / "01-introduction.html").write_text(
      
        1158
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        1159
            )
      
        1160
            (chapters / "02-setup.html").write_text(
      
        1161
                "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
      
        1162
            )
      
        1163
            index_path = temp_dir / "index.html"
      
        1164
            index_path.write_text(
      
        1165
                "<h2>Table of Contents</h2>\n"
      
        1166
                '<ul class="chapter-list">\n'
      
        1167
                '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
      
        1168
                '    <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
      
        1169
                "</ul>\n"
      
        1170
            )
      
        1171
        
        1172
            prompt = (
      
        1173
                "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
      
        1174
                "for the structure and cadence of the guide. We are going to make an all "
      
        1175
                "new equally thorough guide on how to use the nginx tool."
      
        1176
            )
      
        1177
        
        1178
            context = build_context(
      
        1179
                temp_dir=temp_dir,
      
        1180
                messages=[],
      
        1181
                safeguards=FakeSafeguards(),
      
        1182
                assess_confidence=assess_confidence,
      
        1183
                verify_action=verify_action,
      
        1184
                auto_recover=False,
      
        1185
            )
      
        1186
            context.session.current_task = prompt  # type: ignore[attr-defined]
      
        1187
            persistent_messages: list[str] = []
      
        1188
            ephemeral_messages: list[str] = []
      
        1189
            context.queue_steering_message_callback = persistent_messages.append
      
        1190
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1191
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1192
            tool_call = ToolCall(
      
        1193
                id="read-index",
      
        1194
                name="read",
      
        1195
                arguments={"file_path": str(index_path)},
      
        1196
            )
      
        1197
            executor = FakeExecutor(
      
        1198
                [
      
        1199
                    tool_outcome(
      
        1200
                        tool_call=tool_call,
      
        1201
                        output=index_path.read_text(),
      
        1202
                        is_error=False,
      
        1203
                    )
      
        1204
                ]
      
        1205
            )
      
        1206
        
        1207
            summary = TurnSummary(final_response="")
      
        1208
            await runner.execute_batch(
      
        1209
                tool_calls=[tool_call],
      
        1210
                tool_source="assistant",
      
        1211
                pending_tool_calls_seen=set(),
      
        1212
                emit=_noop_emit,
      
        1213
                summary=summary,
      
        1214
                dod=create_definition_of_done(prompt),
      
        1215
                executor=executor,  # type: ignore[arg-type]
      
        1216
                on_confirmation=None,
      
        1217
                on_user_question=None,
      
        1218
                emit_confirmation=None,
      
        1219
                consecutive_errors=0,
      
        1220
            )
      
        1221
        
        1222
            assert persistent_messages == []
      
        1223
            assert ephemeral_messages == []
      
        1224
            assert all(
      
        1225
                "Semantic verification preview:" not in message.content
      
        1226
                for message in summary.tool_result_messages
      
        1227
            )
      
        1228
        
        1229
        
        1230
        @pytest.mark.asyncio
      
        1231
        async def test_tool_batch_runner_queues_next_pending_todo_after_discovery_progress(
      
        1232
            temp_dir: Path,
      
        1233
        ) -> None:
      
        1234
            async def assess_confidence(
      
        1235
                tool_name: str,
      
        1236
                tool_args: dict,
      
        1237
                context: str,
      
        1238
            ) -> ConfidenceAssessment:
      
        1239
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1240
        
        1241
            async def verify_action(
      
        1242
                tool_name: str,
      
        1243
                tool_args: dict,
      
        1244
                result: str,
      
        1245
                expected: str = "",
      
        1246
            ) -> ActionVerification:
      
        1247
                raise AssertionError("Verification should not run for this scenario")
      
        1248
        
        1249
            reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
      
        1250
            reference.parent.mkdir(parents=True)
      
        1251
            reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      
        1252
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        1253
            chapters = nginx_root / "chapters"
      
        1254
            implementation_plan = temp_dir / "implementation.md"
      
        1255
            implementation_plan.write_text(
      
        1256
                "\n".join(
      
        1257
                    [
      
        1258
                        "# Implementation Plan",
      
        1259
                        "",
      
        1260
                        "## File Changes",
      
        1261
                        f"- `{chapters}/`",
      
        1262
                        f"- `{nginx_root / 'index.html'}`",
      
        1263
                        "",
      
        1264
                    ]
      
        1265
                )
      
        1266
            )
      
        1267
        
        1268
            context = build_context(
      
        1269
                temp_dir=temp_dir,
      
        1270
                messages=[],
      
        1271
                safeguards=FakeSafeguards(),
      
        1272
                assess_confidence=assess_confidence,
      
        1273
                verify_action=verify_action,
      
        1274
                auto_recover=False,
      
        1275
            )
      
        1276
            persistent_messages: list[str] = []
      
        1277
            ephemeral_messages: list[str] = []
      
        1278
            context.queue_steering_message_callback = persistent_messages.append
      
        1279
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1280
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1281
            dod = create_definition_of_done("Create an equally thorough nginx guide.")
      
        1282
            dod.implementation_plan = str(implementation_plan)
      
        1283
            sync_todos_to_definition_of_done(
      
        1284
                dod,
      
        1285
                [
      
        1286
                    {
      
        1287
                        "content": "Examine the existing Fortran guide structure to understand the cadence and format",
      
        1288
                        "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
      
        1289
                        "status": "pending",
      
        1290
                    },
      
        1291
                    {
      
        1292
                        "content": "Create the nginx directory structure",
      
        1293
                        "active_form": "Working on: Create the nginx directory structure",
      
        1294
                        "status": "pending",
      
        1295
                    },
      
        1296
                    {
      
        1297
                        "content": "Create the nginx index.html file",
      
        1298
                        "active_form": "Working on: Create the nginx index.html file",
      
        1299
                        "status": "pending",
      
        1300
                    },
      
        1301
                ],
      
        1302
            )
      
        1303
            tool_call = ToolCall(
      
        1304
                id="read-reference",
      
        1305
                name="read",
      
        1306
                arguments={"file_path": str(reference)},
      
        1307
            )
      
        1308
            executor = FakeExecutor(
      
        1309
                [
      
        1310
                    tool_outcome(
      
        1311
                        tool_call=tool_call,
      
        1312
                        output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
      
        1313
                        is_error=False,
      
        1314
                    )
      
        1315
                ]
      
        1316
            )
      
        1317
        
        1318
            summary = TurnSummary(final_response="")
      
        1319
            await runner.execute_batch(
      
        1320
                tool_calls=[tool_call],
      
        1321
                tool_source="assistant",
      
        1322
                pending_tool_calls_seen=set(),
      
        1323
                emit=_noop_emit,
      
        1324
                summary=summary,
      
        1325
                dod=dod,
      
        1326
                executor=executor,  # type: ignore[arg-type]
      
        1327
                on_confirmation=None,
      
        1328
                on_user_question=None,
      
        1329
                emit_confirmation=None,
      
        1330
                consecutive_errors=0,
      
        1331
            )
      
        1332
        
        1333
            assert (
      
        1334
                "Examine the existing Fortran guide structure to understand the cadence and format"
      
        1335
                in dod.completed_items
      
        1336
            )
      
        1337
            assert any(
      
        1338
                "Continue with the next pending item: `Create the nginx directory structure`"
      
        1339
                in message
      
        1340
                for message in persistent_messages
      
        1341
            )
      
        1342
            assert any(
      
        1343
                "Resume by creating `chapters/` now." in message
      
        1344
                for message in persistent_messages
      
        1345
            )
      
        1346
            assert all("01-introduction.html" not in message for message in persistent_messages)
      
        1347
            assert ephemeral_messages == []
      
        1348
        
        1349
        
        1350
        @pytest.mark.asyncio
      
        1351
        async def test_tool_batch_runner_queues_setup_directory_before_file_when_plan_lists_index_first(
      
        1352
            temp_dir: Path,
      
        1353
        ) -> None:
      
        1354
            async def assess_confidence(
      
        1355
                tool_name: str,
      
        1356
                tool_args: dict,
      
        1357
                context: str,
      
        1358
            ) -> ConfidenceAssessment:
      
        1359
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1360
        
        1361
            async def verify_action(
      
        1362
                tool_name: str,
      
        1363
                tool_args: dict,
      
        1364
                result: str,
      
        1365
                expected: str = "",
      
        1366
            ) -> ActionVerification:
      
        1367
                raise AssertionError("Verification should not run for this scenario")
      
        1368
        
        1369
            reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
      
        1370
            reference.parent.mkdir(parents=True)
      
        1371
            reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      
        1372
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        1373
            chapters = nginx_root / "chapters"
      
        1374
            implementation_plan = temp_dir / "implementation.md"
      
        1375
            implementation_plan.write_text(
      
        1376
                "\n".join(
      
        1377
                    [
      
        1378
                        "# Implementation Plan",
      
        1379
                        "",
      
        1380
                        "## File Changes",
      
        1381
                        f"- `{nginx_root / 'index.html'}`",
      
        1382
                        f"- `{chapters}/`",
      
        1383
                        "",
      
        1384
                    ]
      
        1385
                )
      
        1386
            )
      
        1387
        
        1388
            context = build_context(
      
        1389
                temp_dir=temp_dir,
      
        1390
                messages=[],
      
        1391
                safeguards=FakeSafeguards(),
      
        1392
                assess_confidence=assess_confidence,
      
        1393
                verify_action=verify_action,
      
        1394
                auto_recover=False,
      
        1395
            )
      
        1396
            persistent_messages: list[str] = []
      
        1397
            ephemeral_messages: list[str] = []
      
        1398
            context.queue_steering_message_callback = persistent_messages.append
      
        1399
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1400
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1401
            dod = create_definition_of_done("Create an equally thorough nginx guide.")
      
        1402
            dod.implementation_plan = str(implementation_plan)
      
        1403
            sync_todos_to_definition_of_done(
      
        1404
                dod,
      
        1405
                [
      
        1406
                    {
      
        1407
                        "content": "Examine the existing Fortran guide structure to understand the cadence and format",
      
        1408
                        "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
      
        1409
                        "status": "pending",
      
        1410
                    },
      
        1411
                    {
      
        1412
                        "content": "Create the nginx directory structure",
      
        1413
                        "active_form": "Working on: Create the nginx directory structure",
      
        1414
                        "status": "pending",
      
        1415
                    },
      
        1416
                    {
      
        1417
                        "content": "Create the nginx index.html file",
      
        1418
                        "active_form": "Working on: Create the nginx index.html file",
      
        1419
                        "status": "pending",
      
        1420
                    },
      
        1421
                ],
      
        1422
                project_root=temp_dir,
      
        1423
            )
      
        1424
            tool_call = ToolCall(
      
        1425
                id="read-reference-index-first",
      
        1426
                name="read",
      
        1427
                arguments={"file_path": str(reference)},
      
        1428
            )
      
        1429
            executor = FakeExecutor(
      
        1430
                [
      
        1431
                    tool_outcome(
      
        1432
                        tool_call=tool_call,
      
        1433
                        output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
      
        1434
                        is_error=False,
      
        1435
                    )
      
        1436
                ]
      
        1437
            )
      
        1438
        
        1439
            summary = TurnSummary(final_response="")
      
        1440
            await runner.execute_batch(
      
        1441
                tool_calls=[tool_call],
      
        1442
                tool_source="assistant",
      
        1443
                pending_tool_calls_seen=set(),
      
        1444
                emit=_noop_emit,
      
        1445
                summary=summary,
      
        1446
                dod=dod,
      
        1447
                executor=executor,  # type: ignore[arg-type]
      
        1448
                on_confirmation=None,
      
        1449
                on_user_question=None,
      
        1450
                emit_confirmation=None,
      
        1451
                consecutive_errors=0,
      
        1452
            )
      
        1453
        
        1454
            assert persistent_messages
      
        1455
            assert any(
      
        1456
                "Continue with the next pending item: `Create the nginx directory structure`"
      
        1457
                in message
      
        1458
                for message in persistent_messages
      
        1459
            )
      
        1460
            assert any(
      
        1461
                "Resume by creating `chapters/` now." in message
      
        1462
                for message in persistent_messages
      
        1463
            )
      
        1464
            assert all(
      
        1465
                "Next step: create `index.html`." not in message
      
        1466
                for message in persistent_messages
      
        1467
            )
      
        1468
            assert ephemeral_messages == []
      
        1469
        
        1470
        
        1471
        @pytest.mark.asyncio
      
        1472
        async def test_tool_batch_runner_duplicate_reference_read_prefers_next_pending_todo(
      
        1473
            temp_dir: Path,
      
        1474
        ) -> None:
      
        1475
            async def assess_confidence(
      
        1476
                tool_name: str,
      
        1477
                tool_args: dict,
      
        1478
                context: str,
      
        1479
            ) -> ConfidenceAssessment:
      
        1480
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1481
        
        1482
            async def verify_action(
      
        1483
                tool_name: str,
      
        1484
                tool_args: dict,
      
        1485
                result: str,
      
        1486
                expected: str = "",
      
        1487
            ) -> ActionVerification:
      
        1488
                raise AssertionError("Verification should not run for this scenario")
      
        1489
        
        1490
            reference = temp_dir / "fortran" / "index.html"
      
        1491
            reference.parent.mkdir(parents=True)
      
        1492
            reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
      
        1493
        
        1494
            messages = [
      
        1495
                Message(
      
        1496
                    role=Role.TOOL,
      
        1497
                    content=(
      
        1498
                        "Observation [read]: Result: "
      
        1499
                        "<h1>Fortran Beginner's Guide</h1>\n"
      
        1500
                    ),
      
        1501
                )
      
        1502
            ]
      
        1503
            context = build_context(
      
        1504
                temp_dir=temp_dir,
      
        1505
                messages=messages,
      
        1506
                safeguards=FakeSafeguards(),
      
        1507
                assess_confidence=assess_confidence,
      
        1508
                verify_action=verify_action,
      
        1509
                auto_recover=False,
      
        1510
            )
      
        1511
            prompt = (
      
        1512
                "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
      
        1513
                "for the structure and cadence of the guide. We are going to make an all "
      
        1514
                "new equally thorough guide on how to use the nginx tool."
      
        1515
            )
      
        1516
            context.session.current_task = prompt
      
        1517
            persistent_messages: list[str] = []
      
        1518
            ephemeral_messages: list[str] = []
      
        1519
            context.queue_steering_message_callback = persistent_messages.append
      
        1520
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1521
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1522
            dod = create_definition_of_done(prompt)
      
        1523
            sync_todos_to_definition_of_done(
      
        1524
                dod,
      
        1525
                [
      
        1526
                    {
      
        1527
                        "content": "Examine the existing Fortran guide structure to understand the cadence and format",
      
        1528
                        "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
      
        1529
                        "status": "completed",
      
        1530
                    },
      
        1531
                    {
      
        1532
                        "content": "Create the nginx directory structure",
      
        1533
                        "active_form": "Working on: Create the nginx directory structure",
      
        1534
                        "status": "pending",
      
        1535
                    },
      
        1536
                    {
      
        1537
                        "content": "Create the nginx index.html file",
      
        1538
                        "active_form": "Working on: Create the nginx index.html file",
      
        1539
                        "status": "pending",
      
        1540
                    },
      
        1541
                ],
      
        1542
            )
      
        1543
            tool_call = ToolCall(
      
        1544
                id="read-dup",
      
        1545
                name="read",
      
        1546
                arguments={"file_path": str(reference)},
      
        1547
            )
      
        1548
            duplicate_message = (
      
        1549
                "[Skipped - duplicate action: Already read "
      
        1550
                f"{reference} recently without any intervening changes; "
      
        1551
                "reuse the earlier read result instead of rereading]"
      
        1552
            )
      
        1553
            executor = FakeExecutor(
      
        1554
                [
      
        1555
                    ToolExecutionOutcome(
      
        1556
                        tool_call=tool_call,
      
        1557
                        state=ToolExecutionState.DUPLICATE,
      
        1558
                        message=Message.tool_result_message(
      
        1559
                            tool_call_id=tool_call.id,
      
        1560
                            display_content=duplicate_message,
      
        1561
                            result_content=duplicate_message,
      
        1562
                        ),
      
        1563
                        event_content=duplicate_message,
      
        1564
                        is_error=False,
      
        1565
                        result_output=duplicate_message,
      
        1566
                    )
      
        1567
                ]
      
        1568
            )
      
        1569
        
        1570
            summary = TurnSummary(final_response="")
      
        1571
            await runner.execute_batch(
      
        1572
                tool_calls=[tool_call],
      
        1573
                tool_source="assistant",
      
        1574
                pending_tool_calls_seen=set(),
      
        1575
                emit=_noop_emit,
      
        1576
                summary=summary,
      
        1577
                dod=dod,
      
        1578
                executor=executor,  # type: ignore[arg-type]
      
        1579
                on_confirmation=None,
      
        1580
                on_user_question=None,
      
        1581
                emit_confirmation=None,
      
        1582
                consecutive_errors=0,
      
        1583
            )
      
        1584
        
        1585
            assert len(persistent_messages) == 1
      
        1586
            assert "Reuse the earlier observation instead of repeating it." in persistent_messages[0]
      
        1587
            assert (
      
        1588
                "Continue with the next pending item: `Create the nginx directory structure`"
      
        1589
                in persistent_messages[0]
      
        1590
            )
      
        1591
            assert "Update `" not in persistent_messages[0]
      
        1592
            assert ephemeral_messages == []
      
        1593
        
        1594
        
        1595
        @pytest.mark.asyncio
      
        1596
        async def test_tool_batch_runner_successful_reference_read_prioritizes_concrete_missing_artifact(
      
        1597
            temp_dir: Path,
      
        1598
        ) -> None:
      
        1599
            async def assess_confidence(
      
        1600
                tool_name: str,
      
        1601
                tool_args: dict,
      
        1602
                context: str,
      
        1603
            ) -> ConfidenceAssessment:
      
        1604
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1605
        
        1606
            async def verify_action(
      
        1607
                tool_name: str,
      
        1608
                tool_args: dict,
      
        1609
                result: str,
      
        1610
                expected: str = "",
      
        1611
            ) -> ActionVerification:
      
        1612
                raise AssertionError("Verification should not run for this scenario")
      
        1613
        
        1614
            guide_root = temp_dir / "Loader" / "guides" / "nginx"
      
        1615
            chapters = guide_root / "chapters"
      
        1616
            chapters.mkdir(parents=True)
      
        1617
            chapter_one = chapters / "01-introduction.html"
      
        1618
            chapter_one.write_text("<html></html>\n")
      
        1619
            index_path = guide_root / "index.html"
      
        1620
        
        1621
            reference = temp_dir / "Loader" / "guides" / "fortran" / "chapters" / "01-introduction.html"
      
        1622
            reference.parent.mkdir(parents=True, exist_ok=True)
      
        1623
            reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      
        1624
        
        1625
            implementation_plan = temp_dir / "implementation.md"
      
        1626
            implementation_plan.write_text(
      
        1627
                "\n".join(
      
        1628
                    [
      
        1629
                        "# Implementation Plan",
      
        1630
                        "",
      
        1631
                        "## File Changes",
      
        1632
                        f"- `{guide_root}/`",
      
        1633
                        f"- `{chapters}/`",
      
        1634
                        f"- `{index_path}`",
      
        1635
                        f"- `{chapter_one}`",
      
        1636
                        f"- `{chapters / '02-installation.html'}`",
      
        1637
                        "",
      
        1638
                    ]
      
        1639
                )
      
        1640
            )
      
        1641
        
        1642
            context = build_context(
      
        1643
                temp_dir=temp_dir,
      
        1644
                messages=[],
      
        1645
                safeguards=FakeSafeguards(),
      
        1646
                assess_confidence=assess_confidence,
      
        1647
                verify_action=verify_action,
      
        1648
                auto_recover=False,
      
        1649
            )
      
        1650
            persistent_messages: list[str] = []
      
        1651
            ephemeral_messages: list[str] = []
      
        1652
            context.queue_steering_message_callback = persistent_messages.append
      
        1653
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1654
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1655
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        1656
            dod.implementation_plan = str(implementation_plan)
      
        1657
            dod.touched_files.append(str(chapter_one))
      
        1658
            sync_todos_to_definition_of_done(
      
        1659
                dod,
      
        1660
                [
      
        1661
                    {
      
        1662
                        "content": "Examine the existing Fortran guide structure to understand the format and cadence",
      
        1663
                        "active_form": "Working on: Examine the existing Fortran guide structure to understand the format and cadence",
      
        1664
                        "status": "pending",
      
        1665
                    },
      
        1666
                    {
      
        1667
                        "content": "Create each chapter file with appropriate content",
      
        1668
                        "active_form": "Working on: Create each chapter file with appropriate content",
      
        1669
                        "status": "pending",
      
        1670
                    },
      
        1671
                    {
      
        1672
                        "content": "Ensure all files follow the same structure and style as the Fortran guide",
      
        1673
                        "active_form": "Working on: Ensure all files follow the same structure and style as the Fortran guide",
      
        1674
                        "status": "pending",
      
        1675
                    },
      
        1676
                ],
      
        1677
            )
      
        1678
            tool_call = ToolCall(
      
        1679
                id="read-reference-chapter",
      
        1680
                name="read",
      
        1681
                arguments={"file_path": str(reference)},
      
        1682
            )
      
        1683
            read_output = "Observation [read]: Result: <h1>Introduction</h1>\n<p>Guide cadence.</p>\n"
      
        1684
            executor = FakeExecutor(
      
        1685
                [
      
        1686
                    ToolExecutionOutcome(
      
        1687
                        tool_call=tool_call,
      
        1688
                        state=ToolExecutionState.EXECUTED,
      
        1689
                        message=Message.tool_result_message(
      
        1690
                            tool_call_id=tool_call.id,
      
        1691
                            display_content=read_output,
      
        1692
                            result_content=read_output,
      
        1693
                        ),
      
        1694
                        event_content=read_output,
      
        1695
                        is_error=False,
      
        1696
                        result_output=read_output,
      
        1697
                    )
      
        1698
                ]
      
        1699
            )
      
        1700
        
        1701
            summary = TurnSummary(final_response="")
      
        1702
            await runner.execute_batch(
      
        1703
                tool_calls=[tool_call],
      
        1704
                tool_source="assistant",
      
        1705
                pending_tool_calls_seen=set(),
      
        1706
                emit=_noop_emit,
      
        1707
                summary=summary,
      
        1708
                dod=dod,
      
        1709
                executor=executor,  # type: ignore[arg-type]
      
        1710
                on_confirmation=None,
      
        1711
                on_user_question=None,
      
        1712
                emit_confirmation=None,
      
        1713
                consecutive_errors=0,
      
        1714
            )
      
        1715
        
        1716
            assert persistent_messages
      
        1717
            assert any(
      
        1718
                "Confirmed progress: `Examine the existing Fortran guide structure to understand the format and cadence`"
      
        1719
                in message
      
        1720
                for message in persistent_messages
      
        1721
            )
      
        1722
            assert any("Resume by creating `index.html` now." in message for message in persistent_messages)
      
        1723
            assert not any(
      
        1724
                "Continue with the next pending item: `Create each chapter file with appropriate content`"
      
        1725
                in message
      
        1726
                for message in persistent_messages
      
        1727
            )
      
        1728
            assert ephemeral_messages == []
      
        1729
        
        1730
        
        1731
        @pytest.mark.asyncio
      
        1732
        async def test_tool_batch_runner_duplicate_read_ignores_unplanned_expansion_after_plan_complete(
      
        1733
            temp_dir: Path,
      
        1734
        ) -> None:
      
        1735
            async def assess_confidence(
      
        1736
                tool_name: str,
      
        1737
                tool_args: dict,
      
        1738
                context: str,
      
        1739
            ) -> ConfidenceAssessment:
      
        1740
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        1741
        
        1742
            async def verify_action(
      
        1743
                tool_name: str,
      
        1744
                tool_args: dict,
      
        1745
                result: str,
      
        1746
                expected: str = "",
      
        1747
            ) -> ActionVerification:
      
        1748
                raise AssertionError("Verification should not run for this scenario")
      
        1749
        
        1750
            guide_root = temp_dir / "guides" / "nginx"
      
        1751
            chapters = guide_root / "chapters"
      
        1752
            guide_root.mkdir(parents=True)
      
        1753
            chapters.mkdir()
      
        1754
            index_path = guide_root / "index.html"
      
        1755
            chapter_one = chapters / "01-getting-started.html"
      
        1756
            chapter_two = chapters / "02-installation.html"
      
        1757
            index_path.write_text("<html></html>\n")
      
        1758
            chapter_one.write_text("<h1>One</h1>\n")
      
        1759
            chapter_two.write_text("<h1>Two</h1>\n")
      
        1760
        
        1761
            implementation_plan = temp_dir / "implementation.md"
      
        1762
            implementation_plan.write_text(
      
        1763
                "\n".join(
      
        1764
                    [
      
        1765
                        "# Implementation Plan",
      
        1766
                        "",
      
        1767
                        "## File Changes",
      
        1768
                        f"- `{guide_root}/`",
      
        1769
                        f"- `{chapters}/`",
      
        1770
                        f"- `{index_path}`",
      
        1771
                        f"- `{chapter_one}`",
      
        1772
                        f"- `{chapter_two}`",
      
        1773
                        "",
      
        1774
                    ]
      
        1775
                )
      
        1776
            )
      
        1777
        
        1778
            context = build_context(
      
        1779
                temp_dir=temp_dir,
      
        1780
                messages=[],
      
        1781
                safeguards=FakeSafeguards(),
      
        1782
                assess_confidence=assess_confidence,
      
        1783
                verify_action=verify_action,
      
        1784
                auto_recover=False,
      
        1785
            )
      
        1786
            persistent_messages: list[str] = []
      
        1787
            ephemeral_messages: list[str] = []
      
        1788
            context.queue_steering_message_callback = persistent_messages.append
      
        1789
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1790
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1791
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        1792
            dod.implementation_plan = str(implementation_plan)
      
        1793
            dod.pending_items = [
      
        1794
                "Create 07-performance-tuning.html",
      
        1795
                "Verify all guide files are linked and complete",
      
        1796
                "Complete the requested work",
      
        1797
            ]
      
        1798
        
        1799
            tool_call = ToolCall(
      
        1800
                id="read-dup",
      
        1801
                name="read",
      
        1802
                arguments={"file_path": str(chapter_one)},
      
        1803
            )
      
        1804
            duplicate_message = (
      
        1805
                "[Skipped - duplicate action: Already read "
      
        1806
                f"{chapter_one} recently without any intervening changes; "
      
        1807
                "reuse the earlier read result instead of rereading]"
      
        1808
            )
      
        1809
            executor = FakeExecutor(
      
        1810
                [
      
        1811
                    ToolExecutionOutcome(
      
        1812
                        tool_call=tool_call,
      
        1813
                        state=ToolExecutionState.DUPLICATE,
      
        1814
                        message=Message.tool_result_message(
      
        1815
                            tool_call_id=tool_call.id,
      
        1816
                            display_content=duplicate_message,
      
        1817
                            result_content=duplicate_message,
      
        1818
                        ),
      
        1819
                        event_content=duplicate_message,
      
        1820
                        is_error=False,
      
        1821
                        result_output=duplicate_message,
      
        1822
                    )
      
        1823
                ]
      
        1824
            )
      
        1825
        
        1826
            summary = TurnSummary(final_response="")
      
        1827
            await runner.execute_batch(
      
        1828
                tool_calls=[tool_call],
      
        1829
                tool_source="assistant",
      
        1830
                pending_tool_calls_seen=set(),
      
        1831
                emit=_noop_emit,
      
        1832
                summary=summary,
      
        1833
                dod=dod,
      
        1834
                executor=executor,  # type: ignore[arg-type]
      
        1835
                on_confirmation=None,
      
        1836
                on_user_question=None,
      
        1837
                emit_confirmation=None,
      
        1838
                consecutive_errors=0,
      
        1839
            )
      
        1840
        
        1841
            assert len(persistent_messages) == 1
      
        1842
            assert "Verify all guide files are linked and complete" in persistent_messages[0]
      
        1843
            assert "Create 07-performance-tuning.html" not in persistent_messages[0]
      
        1844
            assert ephemeral_messages == []
      
        1845
        
        1846
        
        1847
        @pytest.mark.asyncio
      
        1848
        async def test_tool_batch_runner_duplicate_read_after_plan_complete_pushes_verification_handoff(
      
        1849
            temp_dir: Path,
      
        1850
        ) -> None:
      
        1851
            async def assess_confidence(
      
        1852
                tool_name: str,
      
        1853
                tool_args: dict,
      
        1854
                context: str,
      
        1855
            ) -> ConfidenceAssessment:
      
        1856
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        1857
        
        1858
            async def verify_action(
      
        1859
                tool_name: str,
      
        1860
                tool_args: dict,
      
        1861
                result: str,
      
        1862
                expected: str = "",
      
        1863
            ) -> ActionVerification:
      
        1864
                raise AssertionError("Verification should not run for this scenario")
      
        1865
        
        1866
            guide_root = temp_dir / "guides" / "nginx"
      
        1867
            chapters = guide_root / "chapters"
      
        1868
            guide_root.mkdir(parents=True)
      
        1869
            chapters.mkdir()
      
        1870
            index_path = guide_root / "index.html"
      
        1871
            chapter_one = chapters / "01-getting-started.html"
      
        1872
            chapter_two = chapters / "02-installation.html"
      
        1873
            index_path.write_text("<html></html>\n")
      
        1874
            chapter_one.write_text("<h1>One</h1>\n")
      
        1875
            chapter_two.write_text("<h1>Two</h1>\n")
      
        1876
        
        1877
            implementation_plan = temp_dir / "implementation.md"
      
        1878
            implementation_plan.write_text(
      
        1879
                "\n".join(
      
        1880
                    [
      
        1881
                        "# Implementation Plan",
      
        1882
                        "",
      
        1883
                        "## File Changes",
      
        1884
                        f"- `{guide_root}/`",
      
        1885
                        f"- `{chapters}/`",
      
        1886
                        f"- `{index_path}`",
      
        1887
                        f"- `{chapter_one}`",
      
        1888
                        f"- `{chapter_two}`",
      
        1889
                        "",
      
        1890
                    ]
      
        1891
                )
      
        1892
            )
      
        1893
        
        1894
            context = build_context(
      
        1895
                temp_dir=temp_dir,
      
        1896
                messages=[],
      
        1897
                safeguards=FakeSafeguards(),
      
        1898
                assess_confidence=assess_confidence,
      
        1899
                verify_action=verify_action,
      
        1900
                auto_recover=False,
      
        1901
            )
      
        1902
            persistent_messages: list[str] = []
      
        1903
            ephemeral_messages: list[str] = []
      
        1904
            context.queue_steering_message_callback = persistent_messages.append
      
        1905
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1906
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1907
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        1908
            dod.implementation_plan = str(implementation_plan)
      
        1909
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        1910
            dod.pending_items = [
      
        1911
                "Create 07-performance-tuning.html",
      
        1912
                "Complete the requested work",
      
        1913
            ]
      
        1914
        
        1915
            tool_call = ToolCall(
      
        1916
                id="read-dup",
      
        1917
                name="read",
      
        1918
                arguments={"file_path": str(chapter_one)},
      
        1919
            )
      
        1920
            duplicate_message = (
      
        1921
                "[Skipped - duplicate action: Already read "
      
        1922
                f"{chapter_one} recently without any intervening changes; "
      
        1923
                "reuse the earlier read result instead of rereading]"
      
        1924
            )
      
        1925
            executor = FakeExecutor(
      
        1926
                [
      
        1927
                    ToolExecutionOutcome(
      
        1928
                        tool_call=tool_call,
      
        1929
                        state=ToolExecutionState.DUPLICATE,
      
        1930
                        message=Message.tool_result_message(
      
        1931
                            tool_call_id=tool_call.id,
      
        1932
                            display_content=duplicate_message,
      
        1933
                            result_content=duplicate_message,
      
        1934
                        ),
      
        1935
                        event_content=duplicate_message,
      
        1936
                        is_error=False,
      
        1937
                        result_output=duplicate_message,
      
        1938
                    )
      
        1939
                ]
      
        1940
            )
      
        1941
        
        1942
            summary = TurnSummary(final_response="")
      
        1943
            await runner.execute_batch(
      
        1944
                tool_calls=[tool_call],
      
        1945
                tool_source="assistant",
      
        1946
                pending_tool_calls_seen=set(),
      
        1947
                emit=_noop_emit,
      
        1948
                summary=summary,
      
        1949
                dod=dod,
      
        1950
                executor=executor,  # type: ignore[arg-type]
      
        1951
                on_confirmation=None,
      
        1952
                on_user_question=None,
      
        1953
                emit_confirmation=None,
      
        1954
                consecutive_errors=0,
      
        1955
            )
      
        1956
        
        1957
            assert len(persistent_messages) == 1
      
        1958
            assert "All explicitly planned artifacts already exist on disk." in persistent_messages[0]
      
        1959
            assert (
      
        1960
                "Move to verification or final confirmation using the files already on disk."
      
        1961
                in persistent_messages[0]
      
        1962
            )
      
        1963
            assert "Create 07-performance-tuning.html" not in persistent_messages[0]
      
        1964
            assert ephemeral_messages == []
      
        1965
        
        1966
        
        1967
        @pytest.mark.asyncio
      
        1968
        async def test_tool_batch_runner_duplicate_read_after_plan_complete_ignores_stale_creation_todos(
      
        1969
            temp_dir: Path,
      
        1970
        ) -> None:
      
        1971
            async def assess_confidence(
      
        1972
                tool_name: str,
      
        1973
                tool_args: dict,
      
        1974
                context: str,
      
        1975
            ) -> ConfidenceAssessment:
      
        1976
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        1977
        
        1978
            async def verify_action(
      
        1979
                tool_name: str,
      
        1980
                tool_args: dict,
      
        1981
                result: str,
      
        1982
                expected: str = "",
      
        1983
            ) -> ActionVerification:
      
        1984
                raise AssertionError("Verification should not run for this scenario")
      
        1985
        
        1986
            guide_root = temp_dir / "guides" / "nginx"
      
        1987
            chapters = guide_root / "chapters"
      
        1988
            guide_root.mkdir(parents=True)
      
        1989
            chapters.mkdir()
      
        1990
            index_path = guide_root / "index.html"
      
        1991
            chapter_one = chapters / "01-getting-started.html"
      
        1992
            chapter_two = chapters / "02-installation.html"
      
        1993
            index_path.write_text("<html></html>\n")
      
        1994
            chapter_one.write_text("<h1>One</h1>\n")
      
        1995
            chapter_two.write_text("<h1>Two</h1>\n")
      
        1996
        
        1997
            implementation_plan = temp_dir / "implementation.md"
      
        1998
            implementation_plan.write_text(
      
        1999
                "\n".join(
      
        2000
                    [
      
        2001
                        "# Implementation Plan",
      
        2002
                        "",
      
        2003
                        "## File Changes",
      
        2004
                        f"- `{guide_root}/`",
      
        2005
                        f"- `{chapters}/`",
      
        2006
                        f"- `{index_path}`",
      
        2007
                        f"- `{chapter_one}`",
      
        2008
                        f"- `{chapter_two}`",
      
        2009
                        "",
      
        2010
                    ]
      
        2011
                )
      
        2012
            )
      
        2013
        
        2014
            context = build_context(
      
        2015
                temp_dir=temp_dir,
      
        2016
                messages=[],
      
        2017
                safeguards=FakeSafeguards(),
      
        2018
                assess_confidence=assess_confidence,
      
        2019
                verify_action=verify_action,
      
        2020
                auto_recover=False,
      
        2021
            )
      
        2022
            persistent_messages: list[str] = []
      
        2023
            ephemeral_messages: list[str] = []
      
        2024
            context.queue_steering_message_callback = persistent_messages.append
      
        2025
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2026
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2027
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2028
            dod.implementation_plan = str(implementation_plan)
      
        2029
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        2030
            dod.pending_items = [
      
        2031
                "Create 01-getting-started.html",
      
        2032
                "Creating 02-installation.html",
      
        2033
                "Complete the requested work",
      
        2034
            ]
      
        2035
        
        2036
            tool_call = ToolCall(
      
        2037
                id="read-dup-built-stale",
      
        2038
                name="read",
      
        2039
                arguments={"file_path": str(chapter_one)},
      
        2040
            )
      
        2041
            duplicate_message = (
      
        2042
                "[Skipped - duplicate action: Already read "
      
        2043
                f"{chapter_one} recently without any intervening changes; "
      
        2044
                "reuse the earlier read result instead of rereading]"
      
        2045
            )
      
        2046
            executor = FakeExecutor(
      
        2047
                [
      
        2048
                    ToolExecutionOutcome(
      
        2049
                        tool_call=tool_call,
      
        2050
                        state=ToolExecutionState.DUPLICATE,
      
        2051
                        message=Message.tool_result_message(
      
        2052
                            tool_call_id=tool_call.id,
      
        2053
                            display_content=duplicate_message,
      
        2054
                            result_content=duplicate_message,
      
        2055
                        ),
      
        2056
                        event_content=duplicate_message,
      
        2057
                        is_error=False,
      
        2058
                        result_output=duplicate_message,
      
        2059
                    )
      
        2060
                ]
      
        2061
            )
      
        2062
        
        2063
            summary = TurnSummary(final_response="")
      
        2064
            await runner.execute_batch(
      
        2065
                tool_calls=[tool_call],
      
        2066
                tool_source="assistant",
      
        2067
                pending_tool_calls_seen=set(),
      
        2068
                emit=_noop_emit,
      
        2069
                summary=summary,
      
        2070
                dod=dod,
      
        2071
                executor=executor,  # type: ignore[arg-type]
      
        2072
                on_confirmation=None,
      
        2073
                on_user_question=None,
      
        2074
                emit_confirmation=None,
      
        2075
                consecutive_errors=0,
      
        2076
            )
      
        2077
        
        2078
            assert len(persistent_messages) == 1
      
        2079
            assert "All explicitly planned artifacts already exist on disk." in persistent_messages[0]
      
        2080
            assert (
      
        2081
                "Move to verification or final confirmation using the files already on disk."
      
        2082
                in persistent_messages[0]
      
        2083
            )
      
        2084
            assert "Create 01-getting-started.html" not in persistent_messages[0]
      
        2085
            assert "Creating 02-installation.html" not in persistent_messages[0]
      
        2086
            assert ephemeral_messages == []
      
        2087
        
        2088
        
        2089
        @pytest.mark.asyncio
      
        2090
        async def test_tool_batch_runner_successful_read_after_plan_complete_pushes_review_handoff(
      
        2091
            temp_dir: Path,
      
        2092
        ) -> None:
      
        2093
            async def assess_confidence(
      
        2094
                tool_name: str,
      
        2095
                tool_args: dict,
      
        2096
                context: str,
      
        2097
            ) -> ConfidenceAssessment:
      
        2098
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        2099
        
        2100
            async def verify_action(
      
        2101
                tool_name: str,
      
        2102
                tool_args: dict,
      
        2103
                result: str,
      
        2104
                expected: str = "",
      
        2105
            ) -> ActionVerification:
      
        2106
                raise AssertionError("Verification should not run for this scenario")
      
        2107
        
        2108
            guide_root = temp_dir / "guides" / "nginx"
      
        2109
            chapters = guide_root / "chapters"
      
        2110
            guide_root.mkdir(parents=True)
      
        2111
            chapters.mkdir()
      
        2112
            index_path = guide_root / "index.html"
      
        2113
            chapter_one = chapters / "01-getting-started.html"
      
        2114
            chapter_two = chapters / "02-installation.html"
      
        2115
            index_path.write_text("<html></html>\n")
      
        2116
            chapter_one.write_text("<h1>One</h1>\n")
      
        2117
            chapter_two.write_text("<h1>Two</h1>\n")
      
        2118
        
        2119
            implementation_plan = temp_dir / "implementation.md"
      
        2120
            implementation_plan.write_text(
      
        2121
                "\n".join(
      
        2122
                    [
      
        2123
                        "# Implementation Plan",
      
        2124
                        "",
      
        2125
                        "## File Changes",
      
        2126
                        f"- `{guide_root}/`",
      
        2127
                        f"- `{chapters}/`",
      
        2128
                        f"- `{index_path}`",
      
        2129
                        f"- `{chapter_one}`",
      
        2130
                        f"- `{chapter_two}`",
      
        2131
                        "",
      
        2132
                    ]
      
        2133
                )
      
        2134
            )
      
        2135
        
        2136
            context = build_context(
      
        2137
                temp_dir=temp_dir,
      
        2138
                messages=[],
      
        2139
                safeguards=FakeSafeguards(),
      
        2140
                assess_confidence=assess_confidence,
      
        2141
                verify_action=verify_action,
      
        2142
                auto_recover=False,
      
        2143
            )
      
        2144
            persistent_messages: list[str] = []
      
        2145
            ephemeral_messages: list[str] = []
      
        2146
            context.queue_steering_message_callback = persistent_messages.append
      
        2147
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2148
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2149
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2150
            dod.implementation_plan = str(implementation_plan)
      
        2151
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        2152
            sync_todos_to_definition_of_done(
      
        2153
                dod,
      
        2154
                [
      
        2155
                    {
      
        2156
                        "content": "Create 01-getting-started.html",
      
        2157
                        "active_form": "Creating 01-getting-started.html",
      
        2158
                        "status": "pending",
      
        2159
                    },
      
        2160
                    {
      
        2161
                        "content": "Ensure all files are properly linked and formatted consistently",
      
        2162
                        "active_form": "Reviewing guide consistency and linkage",
      
        2163
                        "status": "pending",
      
        2164
                    },
      
        2165
                ],
      
        2166
            )
      
        2167
        
        2168
            tool_call = ToolCall(
      
        2169
                id="read-built-review",
      
        2170
                name="read",
      
        2171
                arguments={"file_path": str(chapter_one)},
      
        2172
            )
      
        2173
            executor = FakeExecutor(
      
        2174
                [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)]
      
        2175
            )
      
        2176
        
        2177
            summary = TurnSummary(final_response="")
      
        2178
            await runner.execute_batch(
      
        2179
                tool_calls=[tool_call],
      
        2180
                tool_source="assistant",
      
        2181
                pending_tool_calls_seen=set(),
      
        2182
                emit=_noop_emit,
      
        2183
                summary=summary,
      
        2184
                dod=dod,
      
        2185
                executor=executor,  # type: ignore[arg-type]
      
        2186
                on_confirmation=None,
      
        2187
                on_user_question=None,
      
        2188
                emit_confirmation=None,
      
        2189
                consecutive_errors=0,
      
        2190
            )
      
        2191
        
        2192
            assert persistent_messages == []
      
        2193
            assert len(ephemeral_messages) == 1
      
        2194
            message = ephemeral_messages[0]
      
        2195
            assert "All explicitly planned artifacts already exist." in message
      
        2196
            assert "Ensure all files are properly linked and formatted consistently" in message
      
        2197
            assert "Create 01-getting-started.html" not in message
      
        2198
            assert "do not keep broad-rereading the output set" in message
      
        2199
            assert "If no specific mismatch remains, move to verification now." in message
      
        2200
        
        2201
        
        2202
        @pytest.mark.asyncio
      
        2203
        async def test_tool_batch_runner_successful_read_after_plan_complete_switches_to_verify(
      
        2204
            temp_dir: Path,
      
        2205
        ) -> None:
      
        2206
            async def assess_confidence(
      
        2207
                tool_name: str,
      
        2208
                tool_args: dict,
      
        2209
                context: str,
      
        2210
            ) -> ConfidenceAssessment:
      
        2211
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        2212
        
        2213
            async def verify_action(
      
        2214
                tool_name: str,
      
        2215
                tool_args: dict,
      
        2216
                result: str,
      
        2217
                expected: str = "",
      
        2218
            ) -> ActionVerification:
      
        2219
                raise AssertionError("Verification should not run for this scenario")
      
        2220
        
        2221
            guide_root = temp_dir / "guides" / "nginx"
      
        2222
            chapters = guide_root / "chapters"
      
        2223
            guide_root.mkdir(parents=True)
      
        2224
            chapters.mkdir()
      
        2225
            index_path = guide_root / "index.html"
      
        2226
            chapter_one = chapters / "01-getting-started.html"
      
        2227
            chapter_two = chapters / "02-installation.html"
      
        2228
            index_path.write_text("<html></html>\n")
      
        2229
            chapter_one.write_text("<h1>One</h1>\n")
      
        2230
            chapter_two.write_text("<h1>Two</h1>\n")
      
        2231
        
        2232
            implementation_plan = temp_dir / "implementation.md"
      
        2233
            implementation_plan.write_text(
      
        2234
                "\n".join(
      
        2235
                    [
      
        2236
                        "# Implementation Plan",
      
        2237
                        "",
      
        2238
                        "## File Changes",
      
        2239
                        f"- `{guide_root}/`",
      
        2240
                        f"- `{chapters}/`",
      
        2241
                        f"- `{index_path}`",
      
        2242
                        f"- `{chapter_one}`",
      
        2243
                        f"- `{chapter_two}`",
      
        2244
                        "",
      
        2245
                    ]
      
        2246
                )
      
        2247
            )
      
        2248
        
        2249
            context = build_context(
      
        2250
                temp_dir=temp_dir,
      
        2251
                messages=[],
      
        2252
                safeguards=FakeSafeguards(),
      
        2253
                assess_confidence=assess_confidence,
      
        2254
                verify_action=verify_action,
      
        2255
                auto_recover=False,
      
        2256
            )
      
        2257
            persistent_messages: list[str] = []
      
        2258
            ephemeral_messages: list[str] = []
      
        2259
            context.queue_steering_message_callback = persistent_messages.append
      
        2260
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2261
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2262
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2263
            dod.implementation_plan = str(implementation_plan)
      
        2264
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        2265
        
        2266
            tool_call = ToolCall(
      
        2267
                id="read-built-verify",
      
        2268
                name="read",
      
        2269
                arguments={"file_path": str(chapter_one)},
      
        2270
            )
      
        2271
            executor = FakeExecutor(
      
        2272
                [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)]
      
        2273
            )
      
        2274
        
        2275
            summary = TurnSummary(final_response="")
      
        2276
            await runner.execute_batch(
      
        2277
                tool_calls=[tool_call],
      
        2278
                tool_source="assistant",
      
        2279
                pending_tool_calls_seen=set(),
      
        2280
                emit=_noop_emit,
      
        2281
                summary=summary,
      
        2282
                dod=dod,
      
        2283
                executor=executor,  # type: ignore[arg-type]
      
        2284
                on_confirmation=None,
      
        2285
                on_user_question=None,
      
        2286
                emit_confirmation=None,
      
        2287
                consecutive_errors=0,
      
        2288
            )
      
        2289
        
        2290
            assert len(persistent_messages) == 1
      
        2291
            assert "All explicitly planned artifacts already exist." in persistent_messages[0]
      
        2292
            assert "Verification should run next." in persistent_messages[0]
      
        2293
            assert "stop broad rereads" in persistent_messages[0]
      
        2294
            assert ephemeral_messages == []
      
        2295
            assert context.workflow_mode == "verify"
      
        2296
        
        2297
        
        2298
        @pytest.mark.asyncio
      
        2299
        async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
      
        2300
            temp_dir: Path,
      
        2301
        ) -> None:
      
        2302
            async def assess_confidence(
      
        2303
                tool_name: str,
      
        2304
                tool_args: dict,
      
        2305
                context: str,
      
        2306
            ) -> ConfidenceAssessment:
      
        2307
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2308
        
        2309
            async def verify_action(
      
        2310
                tool_name: str,
      
        2311
                tool_args: dict,
      
        2312
                result: str,
      
        2313
                expected: str = "",
      
        2314
            ) -> ActionVerification:
      
        2315
                raise AssertionError("Verification should not run for this scenario")
      
        2316
        
        2317
            reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
      
        2318
            reference.parent.mkdir(parents=True)
      
        2319
            reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      
        2320
        
        2321
            context = build_context(
      
        2322
                temp_dir=temp_dir,
      
        2323
                messages=[],
      
        2324
                safeguards=FakeSafeguards(),
      
        2325
                assess_confidence=assess_confidence,
      
        2326
                verify_action=verify_action,
      
        2327
                auto_recover=False,
      
        2328
            )
      
        2329
            persistent_messages: list[str] = []
      
        2330
            ephemeral_messages: list[str] = []
      
        2331
            context.queue_steering_message_callback = persistent_messages.append
      
        2332
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2333
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2334
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2335
            sync_todos_to_definition_of_done(
      
        2336
                dod,
      
        2337
                [
      
        2338
                    {
      
        2339
                        "content": "Examine the existing Fortran guide structure to understand the cadence and format",
      
        2340
                        "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
      
        2341
                        "status": "pending",
      
        2342
                    },
      
        2343
                    {
      
        2344
                        "content": "Create the nginx index.html file",
      
        2345
                        "active_form": "Working on: Create the nginx index.html file",
      
        2346
                        "status": "pending",
      
        2347
                    },
      
        2348
                ],
      
        2349
            )
      
        2350
            tool_call = ToolCall(
      
        2351
                id="read-reference",
      
        2352
                name="read",
      
        2353
                arguments={"file_path": str(reference)},
      
        2354
            )
      
        2355
            executor = FakeExecutor(
      
        2356
                [
      
        2357
                    tool_outcome(
      
        2358
                        tool_call=tool_call,
      
        2359
                        output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
      
        2360
                        is_error=False,
      
        2361
                    )
      
        2362
                ]
      
        2363
            )
      
        2364
        
        2365
            summary = TurnSummary(final_response="")
      
        2366
            await runner.execute_batch(
      
        2367
                tool_calls=[tool_call],
      
        2368
                tool_source="assistant",
      
        2369
                pending_tool_calls_seen=set(),
      
        2370
                emit=_noop_emit,
      
        2371
                summary=summary,
      
        2372
                dod=dod,
      
        2373
                executor=executor,  # type: ignore[arg-type]
      
        2374
                on_confirmation=None,
      
        2375
                on_user_question=None,
      
        2376
                emit_confirmation=None,
      
        2377
                consecutive_errors=0,
      
        2378
            )
      
        2379
        
        2380
            assert any(
      
        2381
                "Continue with the next pending item: `Create the nginx index.html file`"
      
        2382
                in message
      
        2383
                for message in persistent_messages
      
        2384
            )
      
        2385
            assert any(
      
        2386
                "stop gathering more reference material and perform the change now" in message
      
        2387
                for message in persistent_messages
      
        2388
            )
      
        2389
            assert ephemeral_messages == []
      
        2390
        
        2391
        
        2392
        @pytest.mark.asyncio
      
        2393
        async def test_tool_batch_runner_discovery_completion_handoff_stays_persistent(
      
        2394
            temp_dir: Path,
      
        2395
        ) -> None:
      
        2396
            async def assess_confidence(
      
        2397
                tool_name: str,
      
        2398
                tool_args: dict,
      
        2399
                context: str,
      
        2400
            ) -> ConfidenceAssessment:
      
        2401
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2402
        
        2403
            async def verify_action(
      
        2404
                tool_name: str,
      
        2405
                tool_args: dict,
      
        2406
                result: str,
      
        2407
                expected: str = "",
      
        2408
            ) -> ActionVerification:
      
        2409
                raise AssertionError("Verification should not run for this scenario")
      
        2410
        
        2411
            reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
      
        2412
            reference.parent.mkdir(parents=True)
      
        2413
            reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      
        2414
        
        2415
            context = build_context(
      
        2416
                temp_dir=temp_dir,
      
        2417
                messages=[],
      
        2418
                safeguards=FakeSafeguards(),
      
        2419
                assess_confidence=assess_confidence,
      
        2420
                verify_action=verify_action,
      
        2421
                auto_recover=False,
      
        2422
            )
      
        2423
            persistent_messages: list[str] = []
      
        2424
            ephemeral_messages: list[str] = []
      
        2425
            context.queue_steering_message_callback = persistent_messages.append
      
        2426
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2427
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2428
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2429
            sync_todos_to_definition_of_done(
      
        2430
                dod,
      
        2431
                [
      
        2432
                    {
      
        2433
                        "content": "First, examine the existing fortran guide structure and content",
      
        2434
                        "active_form": "Working on: First, examine the existing fortran guide structure and content",
      
        2435
                        "status": "pending",
      
        2436
                    },
      
        2437
                    {
      
        2438
                        "content": "Create the nginx directory structure",
      
        2439
                        "active_form": "Working on: Create the nginx directory structure",
      
        2440
                        "status": "pending",
      
        2441
                    },
      
        2442
                ],
      
        2443
            )
      
        2444
            tool_call = ToolCall(
      
        2445
                id="read-reference",
      
        2446
                name="read",
      
        2447
                arguments={"file_path": str(reference)},
      
        2448
            )
      
        2449
            executor = FakeExecutor(
      
        2450
                [
      
        2451
                    tool_outcome(
      
        2452
                        tool_call=tool_call,
      
        2453
                        output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
      
        2454
                        is_error=False,
      
        2455
                    )
      
        2456
                ]
      
        2457
            )
      
        2458
        
        2459
            summary = TurnSummary(final_response="")
      
        2460
            await runner.execute_batch(
      
        2461
                tool_calls=[tool_call],
      
        2462
                tool_source="assistant",
      
        2463
                pending_tool_calls_seen=set(),
      
        2464
                emit=_noop_emit,
      
        2465
                summary=summary,
      
        2466
                dod=dod,
      
        2467
                executor=executor,  # type: ignore[arg-type]
      
        2468
                on_confirmation=None,
      
        2469
                on_user_question=None,
      
        2470
                emit_confirmation=None,
      
        2471
                consecutive_errors=0,
      
        2472
            )
      
        2473
        
        2474
            assert persistent_messages
      
        2475
            assert any(
      
        2476
                "Continue with the next pending item: `Create the nginx directory structure`"
      
        2477
                in message
      
        2478
                for message in persistent_messages
      
        2479
            )
      
        2480
            assert ephemeral_messages == []
      
        2481
        
        2482
        
        2483
        @pytest.mark.asyncio
      
        2484
        async def test_tool_batch_runner_missing_artifact_nudge_names_next_file_after_setup_mkdir(
      
        2485
            temp_dir: Path,
      
        2486
        ) -> None:
      
        2487
            async def assess_confidence(
      
        2488
                tool_name: str,
      
        2489
                tool_args: dict,
      
        2490
                context: str,
      
        2491
            ) -> ConfidenceAssessment:
      
        2492
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2493
        
        2494
            async def verify_action(
      
        2495
                tool_name: str,
      
        2496
                tool_args: dict,
      
        2497
                result: str,
      
        2498
                expected: str = "",
      
        2499
            ) -> ActionVerification:
      
        2500
                raise AssertionError("Verification should not run for this scenario")
      
        2501
        
        2502
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        2503
            chapters = nginx_root / "chapters"
      
        2504
            implementation_plan = temp_dir / "implementation.md"
      
        2505
            implementation_plan.write_text(
      
        2506
                "\n".join(
      
        2507
                    [
      
        2508
                        "# Implementation Plan",
      
        2509
                        "",
      
        2510
                        "## File Changes",
      
        2511
                        f"- `{chapters}/`",
      
        2512
                        f"- `{nginx_root / 'index.html'}`",
      
        2513
                        "",
      
        2514
                    ]
      
        2515
                )
      
        2516
            )
      
        2517
        
        2518
            context = build_context(
      
        2519
                temp_dir=temp_dir,
      
        2520
                messages=[],
      
        2521
                safeguards=FakeSafeguards(),
      
        2522
                assess_confidence=assess_confidence,
      
        2523
                verify_action=verify_action,
      
        2524
                auto_recover=False,
      
        2525
            )
      
        2526
            persistent_messages: list[str] = []
      
        2527
            ephemeral_messages: list[str] = []
      
        2528
            context.queue_steering_message_callback = persistent_messages.append
      
        2529
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2530
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2531
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2532
            dod.implementation_plan = str(implementation_plan)
      
        2533
            sync_todos_to_definition_of_done(
      
        2534
                dod,
      
        2535
                [
      
        2536
                    {
      
        2537
                        "content": "Create the nginx directory structure",
      
        2538
                        "active_form": "Creating the nginx directory structure",
      
        2539
                        "status": "pending",
      
        2540
                    },
      
        2541
                    {
      
        2542
                        "content": "Develop the main index.html file with proper structure",
      
        2543
                        "active_form": "Developing the main index.html file with proper structure",
      
        2544
                        "status": "pending",
      
        2545
                    },
      
        2546
                ],
      
        2547
            )
      
        2548
        
        2549
            tool_call = ToolCall(
      
        2550
                id="mkdir-nginx",
      
        2551
                name="bash",
      
        2552
                arguments={"command": f"mkdir -p {chapters}"},
      
        2553
            )
      
        2554
            executor = FakeExecutor(
      
        2555
                [
      
        2556
                    tool_outcome(
      
        2557
                        tool_call=tool_call,
      
        2558
                        output="",
      
        2559
                        is_error=False,
      
        2560
                    )
      
        2561
                ]
      
        2562
            )
      
        2563
        
        2564
            summary = TurnSummary(final_response="")
      
        2565
            await runner.execute_batch(
      
        2566
                tool_calls=[tool_call],
      
        2567
                tool_source="assistant",
      
        2568
                pending_tool_calls_seen=set(),
      
        2569
                emit=_noop_emit,
      
        2570
                summary=summary,
      
        2571
                dod=dod,
      
        2572
                executor=executor,  # type: ignore[arg-type]
      
        2573
                on_confirmation=None,
      
        2574
                on_user_question=None,
      
        2575
                emit_confirmation=None,
      
        2576
                consecutive_errors=0,
      
        2577
            )
      
        2578
        
        2579
            assert persistent_messages
      
        2580
            message = persistent_messages[-1]
      
        2581
            assert "Directory setup is complete." in message
      
        2582
            assert "Next step: create `index.html`." in message
      
        2583
            assert "Write a compact but real initial version of that file now" in message
      
        2584
            assert ephemeral_messages == []
      
        2585
        
        2586
        
        2587
        @pytest.mark.asyncio
      
        2588
        async def test_tool_batch_runner_first_chapter_handoff_stays_persistent_until_substantive_output_exists(
      
        2589
            temp_dir: Path,
      
        2590
        ) -> None:
      
        2591
            async def assess_confidence(
      
        2592
                tool_name: str,
      
        2593
                tool_args: dict,
      
        2594
                context: str,
      
        2595
            ) -> ConfidenceAssessment:
      
        2596
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2597
        
        2598
            async def verify_action(
      
        2599
                tool_name: str,
      
        2600
                tool_args: dict,
      
        2601
                result: str,
      
        2602
                expected: str = "",
      
        2603
            ) -> ActionVerification:
      
        2604
                raise AssertionError("Verification should not run for this scenario")
      
        2605
        
        2606
            nginx_root = temp_dir / "guides" / "nginx"
      
        2607
            chapters = nginx_root / "chapters"
      
        2608
            chapters.mkdir(parents=True)
      
        2609
            index_path = nginx_root / "index.html"
      
        2610
        
        2611
            implementation_plan = temp_dir / "implementation.md"
      
        2612
            implementation_plan.write_text(
      
        2613
                "\n".join(
      
        2614
                    [
      
        2615
                        "# Implementation Plan",
      
        2616
                        "",
      
        2617
                        "## File Changes",
      
        2618
                        f"- `{chapters}/`",
      
        2619
                        f"- `{index_path}`",
      
        2620
                        f"- `{chapters / '01-introduction.html'}`",
      
        2621
                        "",
      
        2622
                    ]
      
        2623
                )
      
        2624
            )
      
        2625
        
        2626
            context = build_context(
      
        2627
                temp_dir=temp_dir,
      
        2628
                messages=[],
      
        2629
                safeguards=FakeSafeguards(),
      
        2630
                assess_confidence=assess_confidence,
      
        2631
                verify_action=verify_action,
      
        2632
                auto_recover=False,
      
        2633
            )
      
        2634
            persistent_messages: list[str] = []
      
        2635
            ephemeral_messages: list[str] = []
      
        2636
            context.queue_steering_message_callback = persistent_messages.append
      
        2637
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2638
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2639
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2640
            dod.implementation_plan = str(implementation_plan)
      
        2641
            sync_todos_to_definition_of_done(
      
        2642
                dod,
      
        2643
                [
      
        2644
                    {
      
        2645
                        "content": "Create the main index.html file with proper structure",
      
        2646
                        "active_form": "Creating the main index.html file with proper structure",
      
        2647
                        "status": "pending",
      
        2648
                    },
      
        2649
                    {
      
        2650
                        "content": "Create each chapter file with appropriate content",
      
        2651
                        "active_form": "Creating each chapter file with appropriate content",
      
        2652
                        "status": "pending",
      
        2653
                    },
      
        2654
                ],
      
        2655
            )
      
        2656
        
        2657
            tool_call = ToolCall(
      
        2658
                id="write-index",
      
        2659
                name="write",
      
        2660
                arguments={
      
        2661
                    "file_path": str(index_path),
      
        2662
                    "content": "<html></html>\n",
      
        2663
                },
      
        2664
            )
      
        2665
            executor = FakeExecutor(
      
        2666
                [
      
        2667
                    tool_outcome(
      
        2668
                        tool_call=tool_call,
      
        2669
                        output=f"Successfully wrote 14 bytes to {index_path}",
      
        2670
                        is_error=False,
      
        2671
                    )
      
        2672
                ]
      
        2673
            )
      
        2674
        
        2675
            summary = TurnSummary(final_response="")
      
        2676
            await runner.execute_batch(
      
        2677
                tool_calls=[tool_call],
      
        2678
                tool_source="assistant",
      
        2679
                pending_tool_calls_seen=set(),
      
        2680
                emit=_noop_emit,
      
        2681
                summary=summary,
      
        2682
                dod=dod,
      
        2683
                executor=executor,  # type: ignore[arg-type]
      
        2684
                on_confirmation=None,
      
        2685
                on_user_question=None,
      
        2686
                emit_confirmation=None,
      
        2687
                consecutive_errors=0,
      
        2688
            )
      
        2689
        
        2690
            assert persistent_messages
      
        2691
            assert ephemeral_messages == []
      
        2692
            message = persistent_messages[-1]
      
        2693
            assert "Confirmed progress:" in message
      
        2694
            assert "Next step: create `01-introduction.html`." in message
      
        2695
            assert (
      
        2696
                f"Prefer one `write(file_path=..., content=...)` call for `{(chapters / '01-introduction.html').resolve(strict=False)}` now."
      
        2697
                in message
      
        2698
            )
      
        2699
            assert "Write a compact but real initial version of that file now" not in message
      
        2700
            assert "Do not reread reference material or spend the next turn on bookkeeping." in message
      
        2701
        
        2702
        
        2703
        @pytest.mark.asyncio
      
        2704
        async def test_tool_batch_runner_directory_handoff_uses_home_relative_path(
      
        2705
            temp_dir: Path,
      
        2706
            monkeypatch: pytest.MonkeyPatch,
      
        2707
        ) -> None:
      
        2708
            monkeypatch.setenv("HOME", str(temp_dir.resolve(strict=False)))
      
        2709
        
        2710
            async def assess_confidence(
      
        2711
                tool_name: str,
      
        2712
                tool_args: dict,
      
        2713
                context: str,
      
        2714
            ) -> ConfidenceAssessment:
      
        2715
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2716
        
        2717
            async def verify_action(
      
        2718
                tool_name: str,
      
        2719
                tool_args: dict,
      
        2720
                result: str,
      
        2721
                expected: str = "",
      
        2722
            ) -> ActionVerification:
      
        2723
                raise AssertionError("Verification should not run for this scenario")
      
        2724
        
        2725
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        2726
            chapters = nginx_root / "chapters"
      
        2727
            index_path = nginx_root / "index.html"
      
        2728
        
        2729
            implementation_plan = temp_dir / "implementation.md"
      
        2730
            implementation_plan.write_text(
      
        2731
                "\n".join(
      
        2732
                    [
      
        2733
                        "# Implementation Plan",
      
        2734
                        "",
      
        2735
                        "## File Changes",
      
        2736
                        f"- `{chapters}/`",
      
        2737
                        f"- `{index_path}`",
      
        2738
                        "",
      
        2739
                    ]
      
        2740
                )
      
        2741
            )
      
        2742
        
        2743
            context = build_context(
      
        2744
                temp_dir=temp_dir,
      
        2745
                messages=[],
      
        2746
                safeguards=FakeSafeguards(),
      
        2747
                assess_confidence=assess_confidence,
      
        2748
                verify_action=verify_action,
      
        2749
                auto_recover=False,
      
        2750
            )
      
        2751
            persistent_messages: list[str] = []
      
        2752
            context.queue_steering_message_callback = persistent_messages.append
      
        2753
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2754
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2755
            dod.implementation_plan = str(implementation_plan)
      
        2756
            sync_todos_to_definition_of_done(
      
        2757
                dod,
      
        2758
                [
      
        2759
                    {
      
        2760
                        "content": "Create the nginx directory structure",
      
        2761
                        "active_form": "Creating the nginx directory structure",
      
        2762
                        "status": "pending",
      
        2763
                    },
      
        2764
                    {
      
        2765
                        "content": "Develop the main index.html file with proper structure",
      
        2766
                        "active_form": "Developing the main index.html file with proper structure",
      
        2767
                        "status": "pending",
      
        2768
                    },
      
        2769
                ],
      
        2770
            )
      
        2771
        
        2772
            tool_call = ToolCall(
      
        2773
                id="mkdir-nginx-home",
      
        2774
                name="bash",
      
        2775
                arguments={"command": f"mkdir -p {chapters}"},
      
        2776
            )
      
        2777
            executor = FakeExecutor(
      
        2778
                [
      
        2779
                    tool_outcome(
      
        2780
                        tool_call=tool_call,
      
        2781
                        output="",
      
        2782
                        is_error=False,
      
        2783
                    )
      
        2784
                ]
      
        2785
            )
      
        2786
        
        2787
            summary = TurnSummary(final_response="")
      
        2788
            await runner.execute_batch(
      
        2789
                tool_calls=[tool_call],
      
        2790
                tool_source="assistant",
      
        2791
                pending_tool_calls_seen=set(),
      
        2792
                emit=_noop_emit,
      
        2793
                summary=summary,
      
        2794
                dod=dod,
      
        2795
                executor=executor,  # type: ignore[arg-type]
      
        2796
                on_confirmation=None,
      
        2797
                on_user_question=None,
      
        2798
                emit_confirmation=None,
      
        2799
                consecutive_errors=0,
      
        2800
            )
      
        2801
        
        2802
            assert persistent_messages
      
        2803
            message = persistent_messages[-1]
      
        2804
            assert "Next step: create `index.html`." in message
      
        2805
            assert "`~/Loader/guides/nginx/index.html`" in message
      
        2806
            assert "Write a compact but real initial version of that file now" in message
      
        2807
        
        2808
        
        2809
        @pytest.mark.asyncio
      
        2810
        async def test_tool_batch_runner_redirects_post_write_self_audit_to_next_missing_artifact(
      
        2811
            temp_dir: Path,
      
        2812
        ) -> None:
      
        2813
            async def assess_confidence(
      
        2814
                tool_name: str,
      
        2815
                tool_args: dict,
      
        2816
                context: str,
      
        2817
            ) -> ConfidenceAssessment:
      
        2818
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        2819
        
        2820
            async def verify_action(
      
        2821
                tool_name: str,
      
        2822
                tool_args: dict,
      
        2823
                result: str,
      
        2824
                expected: str = "",
      
        2825
            ) -> ActionVerification:
      
        2826
                raise AssertionError("Verification should not run in this scenario")
      
        2827
        
        2828
            nginx_root = temp_dir / "guides" / "nginx"
      
        2829
            chapters = nginx_root / "chapters"
      
        2830
            chapters.mkdir(parents=True)
      
        2831
            index_path = nginx_root / "index.html"
      
        2832
            index_path.write_text(
      
        2833
                "\n".join(
      
        2834
                    [
      
        2835
                        "<html>",
      
        2836
                        '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>',
      
        2837
                        '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
      
        2838
                        "</html>",
      
        2839
                    ]
      
        2840
                )
      
        2841
                + "\n"
      
        2842
            )
      
        2843
        
        2844
            implementation_plan = temp_dir / "implementation.md"
      
        2845
            implementation_plan.write_text(
      
        2846
                "\n".join(
      
        2847
                    [
      
        2848
                        "# Implementation Plan",
      
        2849
                        "",
      
        2850
                        "## File Changes",
      
        2851
                        f"- `{nginx_root}/`",
      
        2852
                        f"- `{chapters}/`",
      
        2853
                        f"- `{index_path}`",
      
        2854
                        f"- `{chapters / '01-introduction.html'}`",
      
        2855
                        "",
      
        2856
                    ]
      
        2857
                )
      
        2858
            )
      
        2859
        
        2860
            context = build_context(
      
        2861
                temp_dir=temp_dir,
      
        2862
                messages=[],
      
        2863
                safeguards=FakeSafeguards(),
      
        2864
                assess_confidence=assess_confidence,
      
        2865
                verify_action=verify_action,
      
        2866
                auto_recover=False,
      
        2867
            )
      
        2868
            persistent_messages: list[str] = []
      
        2869
            ephemeral_messages: list[str] = []
      
        2870
            context.queue_steering_message_callback = persistent_messages.append
      
        2871
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2872
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2873
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2874
            dod.implementation_plan = str(implementation_plan)
      
        2875
            dod.touched_files.append(str(index_path))
      
        2876
            dod.completed_items.append("Develop the main index.html file for the nginx guide")
      
        2877
            dod.pending_items.append("Create chapter files for the nginx guide")
      
        2878
        
        2879
            tool_call = ToolCall(
      
        2880
                id="read-index-self-audit",
      
        2881
                name="read",
      
        2882
                arguments={"file_path": str(index_path)},
      
        2883
            )
      
        2884
            executor = FakeExecutor(
      
        2885
                [
      
        2886
                    tool_outcome(
      
        2887
                        tool_call=tool_call,
      
        2888
                        output="1\t<html>\n",
      
        2889
                        is_error=False,
      
        2890
                    )
      
        2891
                ]
      
        2892
            )
      
        2893
        
        2894
            summary = TurnSummary(final_response="")
      
        2895
            await runner.execute_batch(
      
        2896
                tool_calls=[tool_call],
      
        2897
                tool_source="assistant",
      
        2898
                pending_tool_calls_seen=set(),
      
        2899
                emit=_noop_emit,
      
        2900
                summary=summary,
      
        2901
                dod=dod,
      
        2902
                executor=executor,  # type: ignore[arg-type]
      
        2903
                on_confirmation=None,
      
        2904
                on_user_question=None,
      
        2905
                emit_confirmation=None,
      
        2906
                consecutive_errors=0,
      
        2907
            )
      
        2908
        
        2909
            assert persistent_messages
      
        2910
            message = persistent_messages[-1]
      
        2911
            assert "You already have the current contents of `index.html` from the successful write." in message
      
        2912
            assert "Resume by creating `01-introduction.html` now." in message
      
        2913
            assert "Do not spend another turn rereading the file you just wrote or on TodoWrite alone." in message
      
        2914
            assert ephemeral_messages == []
      
        2915
        
        2916
        
        2917
        @pytest.mark.asyncio
      
        2918
        async def test_tool_batch_runner_preserves_first_file_handoff_after_recovery_prompt(
      
        2919
            temp_dir: Path,
      
        2920
        ) -> None:
      
        2921
            async def assess_confidence(
      
        2922
                tool_name: str,
      
        2923
                tool_args: dict,
      
        2924
                context: str,
      
        2925
            ) -> ConfidenceAssessment:
      
        2926
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2927
        
        2928
            async def verify_action(
      
        2929
                tool_name: str,
      
        2930
                tool_args: dict,
      
        2931
                result: str,
      
        2932
                expected: str = "",
      
        2933
            ) -> ActionVerification:
      
        2934
                raise AssertionError("Verification should not run for this scenario")
      
        2935
        
        2936
            nginx_root = temp_dir / "guides" / "nginx"
      
        2937
            chapters = nginx_root / "chapters"
      
        2938
            chapters.mkdir(parents=True)
      
        2939
            index_path = nginx_root / "index.html"
      
        2940
        
        2941
            implementation_plan = temp_dir / "implementation.md"
      
        2942
            implementation_plan.write_text(
      
        2943
                "\n".join(
      
        2944
                    [
      
        2945
                        "# Implementation Plan",
      
        2946
                        "",
      
        2947
                        "## File Changes",
      
        2948
                        f"- `{chapters}/`",
      
        2949
                        f"- `{index_path}`",
      
        2950
                        f"- `{chapters / '01-introduction.html'}`",
      
        2951
                        "",
      
        2952
                    ]
      
        2953
                )
      
        2954
            )
      
        2955
        
        2956
            context = build_context(
      
        2957
                temp_dir=temp_dir,
      
        2958
                messages=[
      
        2959
                    Message(
      
        2960
                        role=Role.USER,
      
        2961
                        content=(
      
        2962
                            "[EMPTY ASSISTANT RESPONSE]\n"
      
        2963
                            "Respond with that concrete mutation tool call now. Do not return an empty response."
      
        2964
                        ),
      
        2965
                    )
      
        2966
                ],
      
        2967
                safeguards=FakeSafeguards(),
      
        2968
                assess_confidence=assess_confidence,
      
        2969
                verify_action=verify_action,
      
        2970
                auto_recover=False,
      
        2971
            )
      
        2972
            persistent_messages: list[str] = []
      
        2973
            ephemeral_messages: list[str] = []
      
        2974
            context.queue_steering_message_callback = persistent_messages.append
      
        2975
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2976
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2977
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2978
            dod.implementation_plan = str(implementation_plan)
      
        2979
            sync_todos_to_definition_of_done(
      
        2980
                dod,
      
        2981
                [
      
        2982
                    {
      
        2983
                        "content": "Create the main index.html file with proper structure",
      
        2984
                        "active_form": "Creating the main index.html file with proper structure",
      
        2985
                        "status": "pending",
      
        2986
                    },
      
        2987
                    {
      
        2988
                        "content": "Create each chapter file with appropriate content",
      
        2989
                        "active_form": "Creating each chapter file with appropriate content",
      
        2990
                        "status": "pending",
      
        2991
                    },
      
        2992
                ],
      
        2993
            )
      
        2994
        
        2995
            tool_call = ToolCall(
      
        2996
                id="write-index-recovered",
      
        2997
                name="write",
      
        2998
                arguments={
      
        2999
                    "file_path": str(index_path),
      
        3000
                    "content": "<html></html>\n",
      
        3001
                },
      
        3002
            )
      
        3003
            executor = FakeExecutor(
      
        3004
                [
      
        3005
                    tool_outcome(
      
        3006
                        tool_call=tool_call,
      
        3007
                        output=f"Successfully wrote 14 bytes to {index_path}",
      
        3008
                        is_error=False,
      
        3009
                    )
      
        3010
                ]
      
        3011
            )
      
        3012
        
        3013
            summary = TurnSummary(final_response="")
      
        3014
            await runner.execute_batch(
      
        3015
                tool_calls=[tool_call],
      
        3016
                tool_source="assistant",
      
        3017
                pending_tool_calls_seen=set(),
      
        3018
                emit=_noop_emit,
      
        3019
                summary=summary,
      
        3020
                dod=dod,
      
        3021
                executor=executor,  # type: ignore[arg-type]
      
        3022
                on_confirmation=None,
      
        3023
                on_user_question=None,
      
        3024
                emit_confirmation=None,
      
        3025
                consecutive_errors=0,
      
        3026
            )
      
        3027
        
        3028
            assert persistent_messages
      
        3029
            assert ephemeral_messages == []
      
        3030
            message = persistent_messages[-1]
      
        3031
            assert "Next step: create `01-introduction.html`." in message
      
        3032
            assert "Write a compact but real initial version of that file now" not in message
      
        3033
        
        3034
        
        3035
        @pytest.mark.asyncio
      
        3036
        async def test_tool_batch_runner_todowrite_uses_concrete_output_language_for_aggregate_chapter_step(
      
        3037
            temp_dir: Path,
      
        3038
        ) -> None:
      
        3039
            async def assess_confidence(
      
        3040
                tool_name: str,
      
        3041
                tool_args: dict,
      
        3042
                context: str,
      
        3043
            ) -> ConfidenceAssessment:
      
        3044
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3045
        
        3046
            async def verify_action(
      
        3047
                tool_name: str,
      
        3048
                tool_args: dict,
      
        3049
                result: str,
      
        3050
                expected: str = "",
      
        3051
            ) -> ActionVerification:
      
        3052
                raise AssertionError("Verification should not run in this scenario")
      
        3053
        
        3054
            guide_root = temp_dir / "guides" / "nginx"
      
        3055
            chapters = guide_root / "chapters"
      
        3056
            chapters.mkdir(parents=True)
      
        3057
            index_path = guide_root / "index.html"
      
        3058
            index_path.write_text(
      
        3059
                "\n".join(
      
        3060
                    [
      
        3061
                        "<html>",
      
        3062
                        '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>',
      
        3063
                        '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
      
        3064
                        "</html>",
      
        3065
                    ]
      
        3066
                )
      
        3067
                + "\n"
      
        3068
            )
      
        3069
        
        3070
            implementation_plan = temp_dir / "implementation.md"
      
        3071
            implementation_plan.write_text(
      
        3072
                "\n".join(
      
        3073
                    [
      
        3074
                        "# Implementation Plan",
      
        3075
                        "",
      
        3076
                        "## File Changes",
      
        3077
                        f"- `{guide_root}/`",
      
        3078
                        f"- `{chapters}/`",
      
        3079
                        f"- `{index_path}`",
      
        3080
                        "",
      
        3081
                    ]
      
        3082
                )
      
        3083
            )
      
        3084
        
        3085
            context = build_context(
      
        3086
                temp_dir=temp_dir,
      
        3087
                messages=[],
      
        3088
                safeguards=FakeSafeguards(),
      
        3089
                assess_confidence=assess_confidence,
      
        3090
                verify_action=verify_action,
      
        3091
            )
      
        3092
            queued_messages: list[str] = []
      
        3093
            context.queue_steering_message_callback = queued_messages.append
      
        3094
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3095
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3096
            dod.implementation_plan = str(implementation_plan)
      
        3097
            dod.touched_files.append(str(index_path))
      
        3098
            sync_todos_to_definition_of_done(
      
        3099
                dod,
      
        3100
                [
      
        3101
                    {
      
        3102
                        "content": "Develop the main index.html file with proper structure",
      
        3103
                        "active_form": "Developing the main index.html file with proper structure",
      
        3104
                        "status": "completed",
      
        3105
                    },
      
        3106
                    {
      
        3107
                        "content": "Create chapter files with content and structure",
      
        3108
                        "active_form": "Creating chapter files with content and structure",
      
        3109
                        "status": "pending",
      
        3110
                    },
      
        3111
                ],
      
        3112
            )
      
        3113
        
        3114
            todos = [
      
        3115
                {
      
        3116
                    "content": "Develop the main index.html file with proper structure",
      
        3117
                    "active_form": "Developing the main index.html file with proper structure",
      
        3118
                    "status": "completed",
      
        3119
                },
      
        3120
                {
      
        3121
                    "content": "Create chapter files with content and structure",
      
        3122
                    "active_form": "Creating chapter files with content and structure",
      
        3123
                    "status": "pending",
      
        3124
                },
      
        3125
            ]
      
        3126
            tool_call = ToolCall(
      
        3127
                id="todo-aggregate",
      
        3128
                name="TodoWrite",
      
        3129
                arguments={"todos": todos},
      
        3130
            )
      
        3131
            executor = FakeExecutor(
      
        3132
                [
      
        3133
                    tool_outcome(
      
        3134
                        tool_call=tool_call,
      
        3135
                        output="Todos updated",
      
        3136
                        is_error=False,
      
        3137
                        metadata={"new_todos": todos},
      
        3138
                    )
      
        3139
                ]
      
        3140
            )
      
        3141
        
        3142
            summary = TurnSummary(final_response="")
      
        3143
            await runner.execute_batch(
      
        3144
                tool_calls=[tool_call],
      
        3145
                tool_source="assistant",
      
        3146
                pending_tool_calls_seen=set(),
      
        3147
                emit=_noop_emit,
      
        3148
                summary=summary,
      
        3149
                dod=dod,
      
        3150
                executor=executor,  # type: ignore[arg-type]
      
        3151
                on_confirmation=None,
      
        3152
                on_user_question=None,
      
        3153
                emit_confirmation=None,
      
        3154
                consecutive_errors=0,
      
        3155
            )
      
        3156
        
        3157
            assert queued_messages
      
        3158
            message = queued_messages[-1]
      
        3159
            assert "Todo tracking is updated." in message
      
        3160
            assert "Next step: create `01-introduction.html`." in message
      
        3161
            assert (
      
        3162
                "Continue with the next pending item: `Create chapter files with content and structure`."
      
        3163
                not in message
      
        3164
            )
      
        3165
        
        3166
        
        3167
        @pytest.mark.asyncio
      
        3168
        async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_review(
      
        3169
            temp_dir: Path,
      
        3170
        ) -> None:
      
        3171
            async def assess_confidence(
      
        3172
                tool_name: str,
      
        3173
                tool_args: dict,
      
        3174
                context: str,
      
        3175
            ) -> ConfidenceAssessment:
      
        3176
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        3177
        
        3178
            async def verify_action(
      
        3179
                tool_name: str,
      
        3180
                tool_args: dict,
      
        3181
                result: str,
      
        3182
                expected: str = "",
      
        3183
            ) -> ActionVerification:
      
        3184
                raise AssertionError("Verification should not run for this scenario")
      
        3185
        
        3186
            guide_root = temp_dir / "guides" / "nginx"
      
        3187
            chapters = guide_root / "chapters"
      
        3188
            chapters.mkdir(parents=True)
      
        3189
            index_path = guide_root / "index.html"
      
        3190
            chapter_one = chapters / "01-getting-started.html"
      
        3191
            chapter_one.write_text("<h1>One</h1>\n")
      
        3192
            index_path.write_text("<a href=\"chapters/01-getting-started.html\">One</a>\n")
      
        3193
        
        3194
            implementation_plan = temp_dir / "implementation.md"
      
        3195
            implementation_plan.write_text(
      
        3196
                "\n".join(
      
        3197
                    [
      
        3198
                        "# Implementation Plan",
      
        3199
                        "",
      
        3200
                        "## File Changes",
      
        3201
                        f"- `{index_path}`",
      
        3202
                        f"- `{chapter_one}`",
      
        3203
                        f"- `{chapters / '06-ssl-configuration.html'}`",
      
        3204
                        "",
      
        3205
                    ]
      
        3206
                )
      
        3207
            )
      
        3208
        
        3209
            context = build_context(
      
        3210
                temp_dir=temp_dir,
      
        3211
                messages=[],
      
        3212
                safeguards=FakeSafeguards(),
      
        3213
                assess_confidence=assess_confidence,
      
        3214
                verify_action=verify_action,
      
        3215
                auto_recover=False,
      
        3216
            )
      
        3217
            persistent_messages: list[str] = []
      
        3218
            ephemeral_messages: list[str] = []
      
        3219
            context.queue_steering_message_callback = persistent_messages.append
      
        3220
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3221
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3222
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3223
            dod.implementation_plan = str(implementation_plan)
      
        3224
            sync_todos_to_definition_of_done(
      
        3225
                dod,
      
        3226
                [
      
        3227
                    {
      
        3228
                        "content": "Ensure all files are properly linked and formatted consistently",
      
        3229
                        "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
      
        3230
                        "status": "pending",
      
        3231
                    },
      
        3232
                    {
      
        3233
                        "content": "Create the final chapter (06-ssl-configuration.html)",
      
        3234
                        "active_form": "Working on: Create the final chapter (06-ssl-configuration.html)",
      
        3235
                        "status": "pending",
      
        3236
                    },
      
        3237
                ],
      
        3238
            )
      
        3239
            assert tool_batches_should_prioritize_missing_artifact(
      
        3240
                dod=dod,
      
        3241
                next_pending=dod.pending_items[0],
      
        3242
                missing_artifact=(chapters / "06-ssl-configuration.html", False),
      
        3243
                project_root=temp_dir,
      
        3244
            )
      
        3245
        
        3246
            tool_call = ToolCall(
      
        3247
                id="dup-read",
      
        3248
                name="read",
      
        3249
                arguments={"file_path": str(index_path)},
      
        3250
            )
      
        3251
            runner._queue_duplicate_observation_nudge(tool_call, dod=dod)  # type: ignore[attr-defined]
      
        3252
        
        3253
            assert persistent_messages
      
        3254
            message = persistent_messages[-1]
      
        3255
            assert "06-ssl-configuration.html" in message
      
        3256
            assert "Do not switch into review or consistency-check mode" in message
      
        3257
            assert (
      
        3258
                "Continue with the next pending item: `Ensure all files are properly linked and formatted consistently`"
      
        3259
                not in message
      
        3260
            )
      
        3261
        
        3262
        
        3263
        @pytest.mark.asyncio
      
        3264
        async def test_tool_batch_runner_hands_off_to_verification_once_planned_artifacts_exist(
      
        3265
            temp_dir: Path,
      
        3266
        ) -> None:
      
        3267
            async def assess_confidence(
      
        3268
                tool_name: str,
      
        3269
                tool_args: dict,
      
        3270
                context: str,
      
        3271
            ) -> ConfidenceAssessment:
      
        3272
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        3273
        
        3274
            async def verify_action(
      
        3275
                tool_name: str,
      
        3276
                tool_args: dict,
      
        3277
                result: str,
      
        3278
                expected: str = "",
      
        3279
            ) -> ActionVerification:
      
        3280
                raise AssertionError("Verification should not run for this scenario")
      
        3281
        
        3282
            guide_root = temp_dir / "guides" / "nginx"
      
        3283
            chapters = guide_root / "chapters"
      
        3284
            chapters.mkdir(parents=True)
      
        3285
            index_path = guide_root / "index.html"
      
        3286
            chapter_one = chapters / "01-getting-started.html"
      
        3287
            chapter_two = chapters / "02-installation.html"
      
        3288
            index_path.write_text("<a href=\"chapters/01-getting-started.html\">One</a>\n")
      
        3289
            chapter_one.write_text("<h1>One</h1>\n")
      
        3290
            chapter_two.write_text("<h1>Two</h1>\n")
      
        3291
        
        3292
            implementation_plan = temp_dir / "implementation.md"
      
        3293
            implementation_plan.write_text(
      
        3294
                "\n".join(
      
        3295
                    [
      
        3296
                        "# Implementation Plan",
      
        3297
                        "",
      
        3298
                        "## File Changes",
      
        3299
                        f"- `{chapters}/`",
      
        3300
                        f"- `{index_path}`",
      
        3301
                        f"- `{chapter_one}`",
      
        3302
                        f"- `{chapter_two}`",
      
        3303
                        "",
      
        3304
                    ]
      
        3305
                )
      
        3306
            )
      
        3307
        
        3308
            context = build_context(
      
        3309
                temp_dir=temp_dir,
      
        3310
                messages=[],
      
        3311
                safeguards=FakeSafeguards(),
      
        3312
                assess_confidence=assess_confidence,
      
        3313
                verify_action=verify_action,
      
        3314
                auto_recover=False,
      
        3315
            )
      
        3316
            persistent_messages: list[str] = []
      
        3317
            ephemeral_messages: list[str] = []
      
        3318
            context.queue_steering_message_callback = persistent_messages.append
      
        3319
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3320
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3321
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3322
            dod.implementation_plan = str(implementation_plan)
      
        3323
            sync_todos_to_definition_of_done(
      
        3324
                dod,
      
        3325
                [
      
        3326
                    {
      
        3327
                        "content": "Create the guide files",
      
        3328
                        "active_form": "Working on: Create the guide files",
      
        3329
                        "status": "completed",
      
        3330
                    },
      
        3331
                    {
      
        3332
                        "content": "Ensure all files are properly linked and formatted consistently",
      
        3333
                        "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
      
        3334
                        "status": "pending",
      
        3335
                    },
      
        3336
                ],
      
        3337
            )
      
        3338
            tool_call = ToolCall(
      
        3339
                id="write-final",
      
        3340
                name="write",
      
        3341
                arguments={
      
        3342
                    "file_path": str(chapter_two),
      
        3343
                    "content": "<h1>Two</h1>\n",
      
        3344
                },
      
        3345
            )
      
        3346
            executor = FakeExecutor(
      
        3347
                [
      
        3348
                    tool_outcome(
      
        3349
                        tool_call=tool_call,
      
        3350
                        output=f"Successfully wrote {chapter_two}",
      
        3351
                        is_error=False,
      
        3352
                    )
      
        3353
                ]
      
        3354
            )
      
        3355
        
        3356
            summary = TurnSummary(final_response="")
      
        3357
            await runner.execute_batch(
      
        3358
                tool_calls=[tool_call],
      
        3359
                tool_source="assistant",
      
        3360
                pending_tool_calls_seen=set(),
      
        3361
                emit=_noop_emit,
      
        3362
                summary=summary,
      
        3363
                dod=dod,
      
        3364
                executor=executor,  # type: ignore[arg-type]
      
        3365
                on_confirmation=None,
      
        3366
                on_user_question=None,
      
        3367
                emit_confirmation=None,
      
        3368
                consecutive_errors=0,
      
        3369
            )
      
        3370
        
        3371
            assert any(
      
        3372
                "All explicitly planned artifacts now exist on disk." in message
      
        3373
                for message in persistent_messages
      
        3374
            )
      
        3375
            assert any(
      
        3376
                "Ensure all files are properly linked and formatted consistently" in message
      
        3377
                for message in persistent_messages
      
        3378
            )
      
        3379
            assert any(
      
        3380
                "Move to verification once no specific mismatch remains." in message
      
        3381
                for message in persistent_messages
      
        3382
            )
      
        3383
        
        3384
        
        3385
        @pytest.mark.asyncio
      
        3386
        async def test_tool_batch_runner_mutation_handoff_points_at_next_missing_artifact(
      
        3387
            temp_dir: Path,
      
        3388
        ) -> None:
      
        3389
            async def assess_confidence(
      
        3390
                tool_name: str,
      
        3391
                tool_args: dict,
      
        3392
                context: str,
      
        3393
            ) -> ConfidenceAssessment:
      
        3394
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3395
        
        3396
            async def verify_action(
      
        3397
                tool_name: str,
      
        3398
                tool_args: dict,
      
        3399
                result: str,
      
        3400
                expected: str = "",
      
        3401
            ) -> ActionVerification:
      
        3402
                raise AssertionError("Verification should not run in this scenario")
      
        3403
        
        3404
            guide_root = temp_dir / "guides" / "nginx"
      
        3405
            chapters = guide_root / "chapters"
      
        3406
            guide_root.mkdir(parents=True)
      
        3407
            chapters.mkdir()
      
        3408
            index_path = guide_root / "index.html"
      
        3409
            index_path.write_text("<html></html>\n")
      
        3410
            chapter_one = chapters / "01-getting-started.html"
      
        3411
            chapter_two = chapters / "02-installation.html"
      
        3412
            implementation_plan = temp_dir / "implementation.md"
      
        3413
            implementation_plan.write_text(
      
        3414
                "\n".join(
      
        3415
                    [
      
        3416
                        "# Implementation Plan",
      
        3417
                        "",
      
        3418
                        "## File Changes",
      
        3419
                        f"- `{guide_root}/`",
      
        3420
                        f"- `{index_path}`",
      
        3421
                        f"- `{chapter_one}`",
      
        3422
                        f"- `{chapter_two}`",
      
        3423
                        "",
      
        3424
                    ]
      
        3425
                )
      
        3426
            )
      
        3427
        
        3428
            context = build_context(
      
        3429
                temp_dir=temp_dir,
      
        3430
                messages=[],
      
        3431
                safeguards=FakeSafeguards(),
      
        3432
                assess_confidence=assess_confidence,
      
        3433
                verify_action=verify_action,
      
        3434
                auto_recover=False,
      
        3435
            )
      
        3436
            persistent_messages: list[str] = []
      
        3437
            ephemeral_messages: list[str] = []
      
        3438
            context.queue_steering_message_callback = persistent_messages.append
      
        3439
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3440
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3441
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3442
            dod.implementation_plan = str(implementation_plan)
      
        3443
            sync_todos_to_definition_of_done(
      
        3444
                dod,
      
        3445
                [
      
        3446
                    {
      
        3447
                        "content": "Create the main index.html file with proper structure",
      
        3448
                        "active_form": "Working on: Create the main index.html file with proper structure",
      
        3449
                        "status": "pending",
      
        3450
                    },
      
        3451
                    {
      
        3452
                        "content": "Create each chapter file in sequence, following the established pattern",
      
        3453
                        "active_form": "Working on: Create each chapter file in sequence, following the established pattern",
      
        3454
                        "status": "pending",
      
        3455
                    },
      
        3456
                    {
      
        3457
                        "content": "Ensure all files are properly linked and formatted consistently",
      
        3458
                        "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
      
        3459
                        "status": "pending",
      
        3460
                    },
      
        3461
                ],
      
        3462
            )
      
        3463
            tool_call = ToolCall(
      
        3464
                id="write-index",
      
        3465
                name="write",
      
        3466
                arguments={"file_path": str(index_path), "content": "<html></html>\n"},
      
        3467
            )
      
        3468
            executor = FakeExecutor(
      
        3469
                [tool_outcome(tool_call=tool_call, output=f"Successfully wrote {index_path}", is_error=False)]
      
        3470
            )
      
        3471
        
        3472
            summary = TurnSummary(final_response="")
      
        3473
            await runner.execute_batch(
      
        3474
                tool_calls=[tool_call],
      
        3475
                tool_source="assistant",
      
        3476
                pending_tool_calls_seen=set(),
      
        3477
                emit=_noop_emit,
      
        3478
                summary=summary,
      
        3479
                dod=dod,
      
        3480
                executor=executor,  # type: ignore[arg-type]
      
        3481
                on_confirmation=None,
      
        3482
                on_user_question=None,
      
        3483
                emit_confirmation=None,
      
        3484
                consecutive_errors=0,
      
        3485
            )
      
        3486
        
        3487
            assert persistent_messages
      
        3488
            assert ephemeral_messages == []
      
        3489
            message = persistent_messages[-1]
      
        3490
            assert "Next step: create `01-getting-started.html`." in message
      
        3491
            assert "Write a compact but real initial version of that file now" not in message
      
        3492
            assert "refresh `TodoWrite`" not in message
      
        3493
            assert "Do not reread reference material or spend the next turn on bookkeeping." in message
      
        3494
        
        3495
        
        3496
        @pytest.mark.asyncio
      
        3497
        async def test_tool_batch_runner_large_plan_does_not_claim_completion_early(
      
        3498
            temp_dir: Path,
      
        3499
        ) -> None:
      
        3500
            async def assess_confidence(
      
        3501
                tool_name: str,
      
        3502
                tool_args: dict,
      
        3503
                context: str,
      
        3504
            ) -> ConfidenceAssessment:
      
        3505
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3506
        
        3507
            async def verify_action(
      
        3508
                tool_name: str,
      
        3509
                tool_args: dict,
      
        3510
                result: str,
      
        3511
                expected: str = "",
      
        3512
            ) -> ActionVerification:
      
        3513
                raise AssertionError("Verification should not run in this scenario")
      
        3514
        
        3515
            guide_root = temp_dir / "guides" / "nginx"
      
        3516
            chapters = guide_root / "chapters"
      
        3517
            guide_root.mkdir(parents=True)
      
        3518
            chapters.mkdir()
      
        3519
            index_path = guide_root / "index.html"
      
        3520
            index_path.write_text("<html></html>\n")
      
        3521
        
        3522
            chapter_paths = [
      
        3523
                chapters / "01-getting-started.html",
      
        3524
                chapters / "02-installation.html",
      
        3525
                chapters / "03-first-website.html",
      
        3526
                chapters / "04-configuration-basics.html",
      
        3527
                chapters / "05-advanced-configurations.html",
      
        3528
                chapters / "06-performance-tuning.html",
      
        3529
                chapters / "07-security-best-practices.html",
      
        3530
            ]
      
        3531
            for chapter in chapter_paths[:4]:
      
        3532
                chapter.write_text(f"<h1>{chapter.stem}</h1>\n")
      
        3533
            chapter_paths[4].write_text("<h1>Advanced configurations</h1>\n")
      
        3534
        
        3535
            implementation_plan = temp_dir / "implementation.md"
      
        3536
            implementation_plan.write_text(
      
        3537
                "\n".join(
      
        3538
                    [
      
        3539
                        "# Implementation Plan",
      
        3540
                        "",
      
        3541
                        "## File Changes",
      
        3542
                        f"- `{guide_root}/`",
      
        3543
                        f"- `{chapters}/`",
      
        3544
                        f"- `{index_path}`",
      
        3545
                        *[f"- `{path}`" for path in chapter_paths],
      
        3546
                        "",
      
        3547
                    ]
      
        3548
                )
      
        3549
            )
      
        3550
        
        3551
            context = build_context(
      
        3552
                temp_dir=temp_dir,
      
        3553
                messages=[],
      
        3554
                safeguards=FakeSafeguards(),
      
        3555
                assess_confidence=assess_confidence,
      
        3556
                verify_action=verify_action,
      
        3557
                auto_recover=False,
      
        3558
            )
      
        3559
            persistent_messages: list[str] = []
      
        3560
            ephemeral_messages: list[str] = []
      
        3561
            context.queue_steering_message_callback = persistent_messages.append
      
        3562
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3563
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3564
            dod = create_definition_of_done("Create a thorough nginx guide.")
      
        3565
            dod.implementation_plan = str(implementation_plan)
      
        3566
            sync_todos_to_definition_of_done(
      
        3567
                dod,
      
        3568
                [
      
        3569
                    {
      
        3570
                        "content": "Create the nginx guide artifacts",
      
        3571
                        "active_form": "Creating nginx guide artifacts",
      
        3572
                        "status": "pending",
      
        3573
                    },
      
        3574
                    {
      
        3575
                        "content": "Verify all guide files are linked and complete",
      
        3576
                        "active_form": "Verifying guide linkage and completeness",
      
        3577
                        "status": "pending",
      
        3578
                    },
      
        3579
                ],
      
        3580
            )
      
        3581
            tool_call = ToolCall(
      
        3582
                id="write-chapter-05",
      
        3583
                name="write",
      
        3584
                arguments={
      
        3585
                    "file_path": str(chapter_paths[4]),
      
        3586
                    "content": "<h1>Advanced configurations</h1>\n",
      
        3587
                },
      
        3588
            )
      
        3589
            executor = FakeExecutor(
      
        3590
                [
      
        3591
                    tool_outcome(
      
        3592
                        tool_call=tool_call,
      
        3593
                        output=f"Successfully wrote {chapter_paths[4]}",
      
        3594
                        is_error=False,
      
        3595
                    )
      
        3596
                ]
      
        3597
            )
      
        3598
        
        3599
            summary = TurnSummary(final_response="")
      
        3600
            await runner.execute_batch(
      
        3601
                tool_calls=[tool_call],
      
        3602
                tool_source="assistant",
      
        3603
                pending_tool_calls_seen=set(),
      
        3604
                emit=_noop_emit,
      
        3605
                summary=summary,
      
        3606
                dod=dod,
      
        3607
                executor=executor,  # type: ignore[arg-type]
      
        3608
                on_confirmation=None,
      
        3609
                on_user_question=None,
      
        3610
                emit_confirmation=None,
      
        3611
                consecutive_errors=0,
      
        3612
            )
      
        3613
        
        3614
            assert any(
      
        3615
                "Next step: create `06-performance-tuning.html`." in message
      
        3616
                for message in ephemeral_messages
      
        3617
            )
      
        3618
            assert not any(
      
        3619
                "All explicitly planned artifacts now exist on disk." in message
      
        3620
                for message in ephemeral_messages
      
        3621
            )
      
        3622
        
        3623
        
        3624
        @pytest.mark.asyncio
      
        3625
        async def test_tool_batch_runner_uses_compact_missing_artifact_nudge_after_substantial_progress(
      
        3626
            temp_dir: Path,
      
        3627
        ) -> None:
      
        3628
            async def assess_confidence(
      
        3629
                tool_name: str,
      
        3630
                tool_args: dict,
      
        3631
                context: str,
      
        3632
            ) -> ConfidenceAssessment:
      
        3633
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3634
        
        3635
            async def verify_action(
      
        3636
                tool_name: str,
      
        3637
                tool_args: dict,
      
        3638
                result: str,
      
        3639
                expected: str = "",
      
        3640
            ) -> ActionVerification:
      
        3641
                raise AssertionError("Verification should not run in this scenario")
      
        3642
        
        3643
            guide_root = temp_dir / "guides" / "nginx"
      
        3644
            chapters = guide_root / "chapters"
      
        3645
            guide_root.mkdir(parents=True)
      
        3646
            chapters.mkdir()
      
        3647
            index_path = guide_root / "index.html"
      
        3648
            chapter_paths = [
      
        3649
                chapters / "01-introduction.html",
      
        3650
                chapters / "02-installation.html",
      
        3651
                chapters / "03-configuration.html",
      
        3652
                chapters / "04-basic-usage.html",
      
        3653
                chapters / "05-advanced-features.html",
      
        3654
            ]
      
        3655
            for path in (index_path, *chapter_paths[:4]):
      
        3656
                path.write_text("<html></html>\n")
      
        3657
        
        3658
            implementation_plan = temp_dir / "implementation.md"
      
        3659
            implementation_plan.write_text(
      
        3660
                "\n".join(
      
        3661
                    [
      
        3662
                        "# Implementation Plan",
      
        3663
                        "",
      
        3664
                        "## File Changes",
      
        3665
                        f"- `{guide_root}/`",
      
        3666
                        f"- `{chapters}/`",
      
        3667
                        f"- `{index_path}`",
      
        3668
                        *[f"- `{path}`" for path in chapter_paths],
      
        3669
                        "",
      
        3670
                    ]
      
        3671
                )
      
        3672
            )
      
        3673
        
        3674
            context = build_context(
      
        3675
                temp_dir=temp_dir,
      
        3676
                messages=[],
      
        3677
                safeguards=FakeSafeguards(),
      
        3678
                assess_confidence=assess_confidence,
      
        3679
                verify_action=verify_action,
      
        3680
                auto_recover=False,
      
        3681
            )
      
        3682
            persistent_messages: list[str] = []
      
        3683
            ephemeral_messages: list[str] = []
      
        3684
            context.queue_steering_message_callback = persistent_messages.append
      
        3685
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3686
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3687
            dod = create_definition_of_done("Create a thorough nginx guide.")
      
        3688
            dod.implementation_plan = str(implementation_plan)
      
        3689
            dod.touched_files.extend(str(path) for path in (index_path, *chapter_paths[:4]))
      
        3690
            dod.completed_items.extend(
      
        3691
                [
      
        3692
                    "Create the nginx directory structure",
      
        3693
                    "Create the main index.html file with proper structure",
      
        3694
                ]
      
        3695
            )
      
        3696
            sync_todos_to_definition_of_done(
      
        3697
                dod,
      
        3698
                [
      
        3699
                    {
      
        3700
                        "content": "Create each chapter file with appropriate content",
      
        3701
                        "active_form": "Creating each chapter file with appropriate content",
      
        3702
                        "status": "pending",
      
        3703
                    }
      
        3704
                ],
      
        3705
            )
      
        3706
            tool_call = ToolCall(
      
        3707
                id="write-chapter-04",
      
        3708
                name="write",
      
        3709
                arguments={
      
        3710
                    "file_path": str(chapter_paths[3]),
      
        3711
                    "content": "<html>updated</html>\n",
      
        3712
                },
      
        3713
            )
      
        3714
            executor = FakeExecutor(
      
        3715
                [
      
        3716
                    tool_outcome(
      
        3717
                        tool_call=tool_call,
      
        3718
                        output=f"Successfully wrote {chapter_paths[3]}",
      
        3719
                        is_error=False,
      
        3720
                    )
      
        3721
                ]
      
        3722
            )
      
        3723
        
        3724
            summary = TurnSummary(final_response="")
      
        3725
            await runner.execute_batch(
      
        3726
                tool_calls=[tool_call],
      
        3727
                tool_source="assistant",
      
        3728
                pending_tool_calls_seen=set(),
      
        3729
                emit=_noop_emit,
      
        3730
                summary=summary,
      
        3731
                dod=dod,
      
        3732
                executor=executor,  # type: ignore[arg-type]
      
        3733
                on_confirmation=None,
      
        3734
                on_user_question=None,
      
        3735
                emit_confirmation=None,
      
        3736
                consecutive_errors=0,
      
        3737
            )
      
        3738
        
        3739
            assert ephemeral_messages
      
        3740
            message = ephemeral_messages[-1]
      
        3741
            assert "Next step: create `05-advanced-features.html`." in message
      
        3742
            assert "Do not reread reference material or spend the next turn on bookkeeping." in message
      
        3743
            assert "refresh `TodoWrite`" not in message
      
        3744
        
        3745
        
        3746
        @pytest.mark.asyncio
      
        3747
        async def test_tool_batch_runner_todowrite_with_missing_artifact_requeues_exact_resume_step(
      
        3748
            temp_dir: Path,
      
        3749
        ) -> None:
      
        3750
            async def assess_confidence(
      
        3751
                tool_name: str,
      
        3752
                tool_args: dict,
      
        3753
                context: str,
      
        3754
            ) -> ConfidenceAssessment:
      
        3755
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3756
        
        3757
            async def verify_action(
      
        3758
                tool_name: str,
      
        3759
                tool_args: dict,
      
        3760
                result: str,
      
        3761
                expected: str = "",
      
        3762
            ) -> ActionVerification:
      
        3763
                raise AssertionError("Verification should not run in this scenario")
      
        3764
        
        3765
            guide_root = temp_dir / "guides" / "nginx"
      
        3766
            chapters = guide_root / "chapters"
      
        3767
            guide_root.mkdir(parents=True)
      
        3768
            chapters.mkdir()
      
        3769
            index_path = guide_root / "index.html"
      
        3770
            index_path.write_text("<html></html>\n")
      
        3771
            chapter_one = chapters / "01-getting-started.html"
      
        3772
            chapter_two = chapters / "02-installation.html"
      
        3773
            chapter_one.write_text("<h1>One</h1>\n")
      
        3774
        
        3775
            implementation_plan = temp_dir / "implementation.md"
      
        3776
            implementation_plan.write_text(
      
        3777
                "\n".join(
      
        3778
                    [
      
        3779
                        "# Implementation Plan",
      
        3780
                        "",
      
        3781
                        "## File Changes",
      
        3782
                        f"- `{guide_root}/`",
      
        3783
                        f"- `{chapters}/`",
      
        3784
                        f"- `{index_path}`",
      
        3785
                        f"- `{chapter_one}`",
      
        3786
                        f"- `{chapter_two}`",
      
        3787
                        "",
      
        3788
                    ]
      
        3789
                )
      
        3790
            )
      
        3791
        
        3792
            context = build_context(
      
        3793
                temp_dir=temp_dir,
      
        3794
                messages=[],
      
        3795
                safeguards=FakeSafeguards(),
      
        3796
                assess_confidence=assess_confidence,
      
        3797
                verify_action=verify_action,
      
        3798
                auto_recover=False,
      
        3799
            )
      
        3800
            persistent_messages: list[str] = []
      
        3801
            ephemeral_messages: list[str] = []
      
        3802
            context.queue_steering_message_callback = persistent_messages.append
      
        3803
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3804
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3805
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3806
            dod.implementation_plan = str(implementation_plan)
      
        3807
            sync_todos_to_definition_of_done(
      
        3808
                dod,
      
        3809
                [
      
        3810
                    {
      
        3811
                        "content": "Create 01-getting-started.html",
      
        3812
                        "active_form": "Creating 01-getting-started.html",
      
        3813
                        "status": "completed",
      
        3814
                    },
      
        3815
                    {
      
        3816
                        "content": "Create 02-installation.html",
      
        3817
                        "active_form": "Creating 02-installation.html",
      
        3818
                        "status": "pending",
      
        3819
                    },
      
        3820
                ],
      
        3821
            )
      
        3822
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        3823
        
        3824
            tool_call = ToolCall(
      
        3825
                id="todo-only",
      
        3826
                name="TodoWrite",
      
        3827
                arguments={
      
        3828
                    "todos": [
      
        3829
                        {
      
        3830
                            "content": "Create 01-getting-started.html",
      
        3831
                            "active_form": "Creating 01-getting-started.html",
      
        3832
                            "status": "completed",
      
        3833
                        },
      
        3834
                        {
      
        3835
                            "content": "Create 02-installation.html",
      
        3836
                            "active_form": "Creating 02-installation.html",
      
        3837
                            "status": "pending",
      
        3838
                        },
      
        3839
                    ]
      
        3840
                },
      
        3841
            )
      
        3842
            executor = FakeExecutor(
      
        3843
                [
      
        3844
                    tool_outcome(
      
        3845
                        tool_call=tool_call,
      
        3846
                        output="Todos updated",
      
        3847
                        is_error=False,
      
        3848
                        metadata={
      
        3849
                            "new_todos": [
      
        3850
                                {
      
        3851
                                    "content": "Create 01-getting-started.html",
      
        3852
                                    "active_form": "Creating 01-getting-started.html",
      
        3853
                                    "status": "completed",
      
        3854
                                },
      
        3855
                                {
      
        3856
                                    "content": "Create 02-installation.html",
      
        3857
                                    "active_form": "Creating 02-installation.html",
      
        3858
                                    "status": "pending",
      
        3859
                                },
      
        3860
                            ]
      
        3861
                        },
      
        3862
                    )
      
        3863
                ]
      
        3864
            )
      
        3865
        
        3866
            summary = TurnSummary(final_response="")
      
        3867
            await runner.execute_batch(
      
        3868
                tool_calls=[tool_call],
      
        3869
                tool_source="assistant",
      
        3870
                pending_tool_calls_seen=set(),
      
        3871
                emit=_noop_emit,
      
        3872
                summary=summary,
      
        3873
                dod=dod,
      
        3874
                executor=executor,  # type: ignore[arg-type]
      
        3875
                on_confirmation=None,
      
        3876
                on_user_question=None,
      
        3877
                emit_confirmation=None,
      
        3878
                consecutive_errors=0,
      
        3879
            )
      
        3880
        
        3881
            assert persistent_messages
      
        3882
            message = persistent_messages[-1]
      
        3883
            assert "Todo tracking is updated. Next step: create `02-installation.html`." in message
      
        3884
            assert "Prefer one `write(file_path=..., content=...)` call" in message
      
        3885
            assert "Make your next response the concrete mutation tool call itself." in message
      
        3886
            assert ephemeral_messages == []
      
        3887
        
        3888
        
        3889
        @pytest.mark.asyncio
      
        3890
        async def test_tool_batch_runner_todowrite_after_artifacts_exist_pushes_verification_handoff(
      
        3891
            temp_dir: Path,
      
        3892
        ) -> None:
      
        3893
            async def assess_confidence(
      
        3894
                tool_name: str,
      
        3895
                tool_args: dict,
      
        3896
                context: str,
      
        3897
            ) -> ConfidenceAssessment:
      
        3898
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3899
        
        3900
            async def verify_action(
      
        3901
                tool_name: str,
      
        3902
                tool_args: dict,
      
        3903
                result: str,
      
        3904
                expected: str = "",
      
        3905
            ) -> ActionVerification:
      
        3906
                raise AssertionError("Verification should not run in this scenario")
      
        3907
        
        3908
            guide_root = temp_dir / "guides" / "nginx"
      
        3909
            chapters = guide_root / "chapters"
      
        3910
            guide_root.mkdir(parents=True)
      
        3911
            chapters.mkdir()
      
        3912
            index_path = guide_root / "index.html"
      
        3913
            chapter_one = chapters / "01-getting-started.html"
      
        3914
            chapter_two = chapters / "02-installation.html"
      
        3915
            index_path.write_text("<html></html>\n")
      
        3916
            chapter_one.write_text("<h1>One</h1>\n")
      
        3917
            chapter_two.write_text("<h1>Two</h1>\n")
      
        3918
        
        3919
            implementation_plan = temp_dir / "implementation.md"
      
        3920
            implementation_plan.write_text(
      
        3921
                "\n".join(
      
        3922
                    [
      
        3923
                        "# Implementation Plan",
      
        3924
                        "",
      
        3925
                        "## File Changes",
      
        3926
                        f"- `{guide_root}/`",
      
        3927
                        f"- `{chapters}/`",
      
        3928
                        f"- `{index_path}`",
      
        3929
                        f"- `{chapter_one}`",
      
        3930
                        f"- `{chapter_two}`",
      
        3931
                        "",
      
        3932
                    ]
      
        3933
                )
      
        3934
            )
      
        3935
        
        3936
            context = build_context(
      
        3937
                temp_dir=temp_dir,
      
        3938
                messages=[],
      
        3939
                safeguards=FakeSafeguards(),
      
        3940
                assess_confidence=assess_confidence,
      
        3941
                verify_action=verify_action,
      
        3942
                auto_recover=False,
      
        3943
            )
      
        3944
            queued_messages: list[str] = []
      
        3945
            context.queue_steering_message_callback = queued_messages.append
      
        3946
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3947
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3948
            dod.implementation_plan = str(implementation_plan)
      
        3949
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        3950
            sync_todos_to_definition_of_done(
      
        3951
                dod,
      
        3952
                [
      
        3953
                    {
      
        3954
                        "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3955
                        "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3956
                        "status": "pending",
      
        3957
                    },
      
        3958
                    {
      
        3959
                        "content": "Verify all guide files are linked and complete",
      
        3960
                        "active_form": "Working on: Verify all guide files are linked and complete",
      
        3961
                        "status": "pending",
      
        3962
                    },
      
        3963
                ],
      
        3964
                project_root=temp_dir,
      
        3965
            )
      
        3966
        
        3967
            tool_call = ToolCall(
      
        3968
                id="todo-only",
      
        3969
                name="TodoWrite",
      
        3970
                arguments={
      
        3971
                    "todos": [
      
        3972
                        {
      
        3973
                            "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3974
                            "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3975
                            "status": "pending",
      
        3976
                        },
      
        3977
                        {
      
        3978
                            "content": "Verify all guide files are linked and complete",
      
        3979
                            "active_form": "Working on: Verify all guide files are linked and complete",
      
        3980
                            "status": "pending",
      
        3981
                        },
      
        3982
                    ]
      
        3983
                },
      
        3984
            )
      
        3985
            executor = FakeExecutor(
      
        3986
                [
      
        3987
                    tool_outcome(
      
        3988
                        tool_call=tool_call,
      
        3989
                        output="Todos updated",
      
        3990
                        is_error=False,
      
        3991
                        metadata={
      
        3992
                            "new_todos": [
      
        3993
                                {
      
        3994
                                    "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3995
                                    "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3996
                                    "status": "pending",
      
        3997
                                },
      
        3998
                                {
      
        3999
                                    "content": "Verify all guide files are linked and complete",
      
        4000
                                    "active_form": "Working on: Verify all guide files are linked and complete",
      
        4001
                                    "status": "pending",
      
        4002
                                },
      
        4003
                            ]
      
        4004
                        },
      
        4005
                    )
      
        4006
                ]
      
        4007
            )
      
        4008
        
        4009
            summary = TurnSummary(final_response="")
      
        4010
            await runner.execute_batch(
      
        4011
                tool_calls=[tool_call],
      
        4012
                tool_source="assistant",
      
        4013
                pending_tool_calls_seen=set(),
      
        4014
                emit=_noop_emit,
      
        4015
                summary=summary,
      
        4016
                dod=dod,
      
        4017
                executor=executor,  # type: ignore[arg-type]
      
        4018
                on_confirmation=None,
      
        4019
                on_user_question=None,
      
        4020
                emit_confirmation=None,
      
        4021
                consecutive_errors=0,
      
        4022
            )
      
        4023
        
        4024
            assert queued_messages
      
        4025
            message = queued_messages[-1]
      
        4026
            assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
      
        4027
            assert "Verify all guide files are linked and complete" in message
      
        4028
            assert "Move to verification once no specific mismatch remains." in message
      
        4029
            assert "reopen reference materials" in message
      
        4030
            assert "Fortran guide structure" not in message
      
        4031
            assert context.workflow_mode == "execute"
      
        4032
        
        4033
        
        4034
        @pytest.mark.asyncio
      
        4035
        async def test_tool_batch_runner_todowrite_after_outputs_exist_but_links_missing_still_handoffs_to_verify(
      
        4036
            temp_dir: Path,
      
        4037
        ) -> None:
      
        4038
            async def assess_confidence(
      
        4039
                tool_name: str,
      
        4040
                tool_args: dict,
      
        4041
                context: str,
      
        4042
            ) -> ConfidenceAssessment:
      
        4043
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        4044
        
        4045
            async def verify_action(
      
        4046
                tool_name: str,
      
        4047
                tool_args: dict,
      
        4048
                result: str,
      
        4049
                expected: str = "",
      
        4050
            ) -> ActionVerification:
      
        4051
                raise AssertionError("Verification should not run for this scenario")
      
        4052
        
        4053
            guide_root = temp_dir / "guides" / "nginx"
      
        4054
            chapters = guide_root / "chapters"
      
        4055
            guide_root.mkdir(parents=True)
      
        4056
            chapters.mkdir()
      
        4057
            index_path = guide_root / "index.html"
      
        4058
            chapter_one = chapters / "01-introduction.html"
      
        4059
            chapter_two = chapters / "02-installation.html"
      
        4060
            index_path.write_text(
      
        4061
                "\n".join(
      
        4062
                    [
      
        4063
                        '<a href="chapters/01-introduction.html">Intro</a>',
      
        4064
                        '<a href="chapters/02-installation.html">Install</a>',
      
        4065
                        '<a href="../index.html">Back</a>',
      
        4066
                        "",
      
        4067
                    ]
      
        4068
                )
      
        4069
            )
      
        4070
            chapter_one.write_text("<html></html>\n")
      
        4071
            chapter_two.write_text("<html></html>\n")
      
        4072
        
        4073
            implementation_plan = temp_dir / "implementation.md"
      
        4074
            implementation_plan.write_text(
      
        4075
                "\n".join(
      
        4076
                    [
      
        4077
                        "# Implementation Plan",
      
        4078
                        "",
      
        4079
                        "## File Changes",
      
        4080
                        f"- `{guide_root}/`",
      
        4081
                        f"- `{chapters}/`",
      
        4082
                        f"- `{index_path}`",
      
        4083
                        f"- `{chapter_one}`",
      
        4084
                        f"- `{chapter_two}`",
      
        4085
                        "",
      
        4086
                    ]
      
        4087
                )
      
        4088
            )
      
        4089
        
        4090
            context = build_context(
      
        4091
                temp_dir=temp_dir,
      
        4092
                messages=[],
      
        4093
                safeguards=FakeSafeguards(),
      
        4094
                assess_confidence=assess_confidence,
      
        4095
                verify_action=verify_action,
      
        4096
                auto_recover=False,
      
        4097
            )
      
        4098
            queued_messages: list[str] = []
      
        4099
            context.queue_steering_message_callback = queued_messages.append
      
        4100
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4101
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        4102
            dod.implementation_plan = str(implementation_plan)
      
        4103
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        4104
            sync_todos_to_definition_of_done(
      
        4105
                dod,
      
        4106
                [
      
        4107
                    {
      
        4108
                        "content": "Create chapter files following the established pattern",
      
        4109
                        "active_form": "Creating chapter files",
      
        4110
                        "status": "in_progress",
      
        4111
                    }
      
        4112
                ],
      
        4113
                project_root=temp_dir,
      
        4114
            )
      
        4115
        
        4116
            tool_call = ToolCall(
      
        4117
                id="todo-post-build",
      
        4118
                name="TodoWrite",
      
        4119
                arguments={
      
        4120
                    "todos": [
      
        4121
                        {
      
        4122
                            "content": "Create chapter files following the established pattern",
      
        4123
                            "active_form": "Creating chapter files",
      
        4124
                            "status": "in_progress",
      
        4125
                        }
      
        4126
                    ]
      
        4127
                },
      
        4128
            )
      
        4129
            executor = FakeExecutor(
      
        4130
                [
      
        4131
                    tool_outcome(
      
        4132
                        tool_call=tool_call,
      
        4133
                        output="Todos updated",
      
        4134
                        is_error=False,
      
        4135
                        metadata={
      
        4136
                            "new_todos": [
      
        4137
                                {
      
        4138
                                    "content": "Create chapter files following the established pattern",
      
        4139
                                    "active_form": "Creating chapter files",
      
        4140
                                    "status": "in_progress",
      
        4141
                                }
      
        4142
                            ]
      
        4143
                        },
      
        4144
                    )
      
        4145
                ]
      
        4146
            )
      
        4147
        
        4148
            summary = TurnSummary(final_response="")
      
        4149
            await runner.execute_batch(
      
        4150
                tool_calls=[tool_call],
      
        4151
                tool_source="assistant",
      
        4152
                pending_tool_calls_seen=set(),
      
        4153
                emit=_noop_emit,
      
        4154
                summary=summary,
      
        4155
                dod=dod,
      
        4156
                executor=executor,  # type: ignore[arg-type]
      
        4157
                on_confirmation=None,
      
        4158
                on_user_question=None,
      
        4159
                emit_confirmation=None,
      
        4160
                consecutive_errors=0,
      
        4161
            )
      
        4162
        
        4163
            assert queued_messages
      
        4164
            message = queued_messages[-1]
      
        4165
            assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
      
        4166
            assert "Verification should run next." in message
      
        4167
            assert "Repair or verify the current files instead of expanding the artifact set." not in message
      
        4168
            assert context.workflow_mode == "verify"
      
        4169
        
        4170
        
        4171
        @pytest.mark.asyncio
      
        4172
        async def test_tool_batch_runner_todowrite_drops_unplanned_expansion_after_outputs_exist(
      
        4173
            temp_dir: Path,
      
        4174
        ) -> None:
      
        4175
            async def assess_confidence(
      
        4176
                tool_name: str,
      
        4177
                tool_args: dict,
      
        4178
                context: str,
      
        4179
            ) -> ConfidenceAssessment:
      
        4180
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        4181
        
        4182
            async def verify_action(
      
        4183
                tool_name: str,
      
        4184
                tool_args: dict,
      
        4185
                result: str,
      
        4186
                expected: str = "",
      
        4187
            ) -> ActionVerification:
      
        4188
                raise AssertionError("Verification should not run for this scenario")
      
        4189
        
        4190
            guide_root = temp_dir / "guides" / "nginx"
      
        4191
            chapters = guide_root / "chapters"
      
        4192
            guide_root.mkdir(parents=True)
      
        4193
            chapters.mkdir()
      
        4194
            index_path = guide_root / "index.html"
      
        4195
            chapter_one = chapters / "01-introduction.html"
      
        4196
            chapter_two = chapters / "02-installation.html"
      
        4197
            index_path.write_text(
      
        4198
                "\n".join(
      
        4199
                    [
      
        4200
                        '<a href="chapters/01-introduction.html">Intro</a>',
      
        4201
                        '<a href="chapters/02-installation.html">Install</a>',
      
        4202
                        '<a href="../index.html">Back</a>',
      
        4203
                        "",
      
        4204
                    ]
      
        4205
                )
      
        4206
            )
      
        4207
            chapter_one.write_text("<html></html>\n")
      
        4208
            chapter_two.write_text("<html></html>\n")
      
        4209
        
        4210
            implementation_plan = temp_dir / "implementation.md"
      
        4211
            implementation_plan.write_text(
      
        4212
                "\n".join(
      
        4213
                    [
      
        4214
                        "# Implementation Plan",
      
        4215
                        "",
      
        4216
                        "## File Changes",
      
        4217
                        f"- `{guide_root}/`",
      
        4218
                        f"- `{chapters}/`",
      
        4219
                        f"- `{index_path}`",
      
        4220
                        f"- `{chapter_one}`",
      
        4221
                        f"- `{chapter_two}`",
      
        4222
                        "",
      
        4223
                    ]
      
        4224
                )
      
        4225
            )
      
        4226
        
        4227
            context = build_context(
      
        4228
                temp_dir=temp_dir,
      
        4229
                messages=[],
      
        4230
                safeguards=FakeSafeguards(),
      
        4231
                assess_confidence=assess_confidence,
      
        4232
                verify_action=verify_action,
      
        4233
                auto_recover=False,
      
        4234
            )
      
        4235
            queued_messages: list[str] = []
      
        4236
            context.queue_steering_message_callback = queued_messages.append
      
        4237
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4238
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        4239
            dod.implementation_plan = str(implementation_plan)
      
        4240
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        4241
        
        4242
            tool_call = ToolCall(
      
        4243
                id="todo-post-build-expansion",
      
        4244
                name="TodoWrite",
      
        4245
                arguments={
      
        4246
                    "todos": [
      
        4247
                        {
      
        4248
                            "content": "Create index.html for nginx guide",
      
        4249
                            "activeForm": "Creating index.html",
      
        4250
                            "status": "in_progress",
      
        4251
                        },
      
        4252
                        {
      
        4253
                            "content": "Create chapter 01-introduction.html",
      
        4254
                            "activeForm": "Creating chapter 01-introduction.html",
      
        4255
                            "status": "completed",
      
        4256
                        },
      
        4257
                        {
      
        4258
                            "content": "Create chapter 02-installation.html",
      
        4259
                            "activeForm": "Creating chapter 02-installation.html",
      
        4260
                            "status": "completed",
      
        4261
                        },
      
        4262
                        {
      
        4263
                            "content": "Create chapter 08-troubleshooting.html",
      
        4264
                            "activeForm": "Creating chapter 08-troubleshooting.html",
      
        4265
                            "status": "pending",
      
        4266
                        },
      
        4267
                    ]
      
        4268
                },
      
        4269
            )
      
        4270
            executor = FakeExecutor(
      
        4271
                [
      
        4272
                    tool_outcome(
      
        4273
                        tool_call=tool_call,
      
        4274
                        output="Todos updated",
      
        4275
                        is_error=False,
      
        4276
                        metadata={
      
        4277
                            "new_todos": [
      
        4278
                                {
      
        4279
                                    "content": "Create index.html for nginx guide",
      
        4280
                                    "active_form": "Creating index.html",
      
        4281
                                    "status": "in_progress",
      
        4282
                                },
      
        4283
                                {
      
        4284
                                    "content": "Create chapter 01-introduction.html",
      
        4285
                                    "active_form": "Creating chapter 01-introduction.html",
      
        4286
                                    "status": "completed",
      
        4287
                                },
      
        4288
                                {
      
        4289
                                    "content": "Create chapter 02-installation.html",
      
        4290
                                    "active_form": "Creating chapter 02-installation.html",
      
        4291
                                    "status": "completed",
      
        4292
                                },
      
        4293
                                {
      
        4294
                                    "content": "Create chapter 08-troubleshooting.html",
      
        4295
                                    "active_form": "Creating chapter 08-troubleshooting.html",
      
        4296
                                    "status": "pending",
      
        4297
                                },
      
        4298
                            ]
      
        4299
                        },
      
        4300
                    )
      
        4301
                ]
      
        4302
            )
      
        4303
        
        4304
            summary = TurnSummary(final_response="")
      
        4305
            await runner.execute_batch(
      
        4306
                tool_calls=[tool_call],
      
        4307
                tool_source="assistant",
      
        4308
                pending_tool_calls_seen=set(),
      
        4309
                emit=_noop_emit,
      
        4310
                summary=summary,
      
        4311
                dod=dod,
      
        4312
                executor=executor,  # type: ignore[arg-type]
      
        4313
                on_confirmation=None,
      
        4314
                on_user_question=None,
      
        4315
                emit_confirmation=None,
      
        4316
                consecutive_errors=0,
      
        4317
            )
      
        4318
        
        4319
            assert queued_messages
      
        4320
            message = queued_messages[-1]
      
        4321
            assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
      
        4322
            assert "Verification should run next." in message
      
        4323
            assert "Repair or verify the current files instead of expanding the artifact set." not in message
      
        4324
            assert "08-troubleshooting.html" not in message
      
        4325
            assert context.workflow_mode == "verify"
      
        4326
        
        4327
        
        4328
        @pytest.mark.asyncio
      
        4329
        async def test_tool_batch_runner_todowrite_with_existing_output_roots_requeues_next_mutation(
      
        4330
            temp_dir: Path,
      
        4331
        ) -> None:
      
        4332
            async def assess_confidence(
      
        4333
                tool_name: str,
      
        4334
                tool_args: dict,
      
        4335
                context: str,
      
        4336
            ) -> ConfidenceAssessment:
      
        4337
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        4338
        
        4339
            async def verify_action(
      
        4340
                tool_name: str,
      
        4341
                tool_args: dict,
      
        4342
                result: str,
      
        4343
                expected: str = "",
      
        4344
            ) -> ActionVerification:
      
        4345
                raise AssertionError("Verification should not run in this scenario")
      
        4346
        
        4347
            guide_root = temp_dir / "guides" / "nginx"
      
        4348
            chapters = guide_root / "chapters"
      
        4349
            guide_root.mkdir(parents=True)
      
        4350
            chapters.mkdir()
      
        4351
            index_path = guide_root / "index.html"
      
        4352
            index_path.write_text(
      
        4353
                "\n".join(
      
        4354
                    [
      
        4355
                        "<!DOCTYPE html>",
      
        4356
                        "<html>",
      
        4357
                        "<body>",
      
        4358
                        '<a href="chapters/01-introduction.html">Introduction</a>',
      
        4359
                        "</body>",
      
        4360
                        "</html>",
      
        4361
                        "",
      
        4362
                    ]
      
        4363
                )
      
        4364
            )
      
        4365
        
        4366
            implementation_plan = temp_dir / "implementation.md"
      
        4367
            implementation_plan.write_text(
      
        4368
                "\n".join(
      
        4369
                    [
      
        4370
                        "# Implementation Plan",
      
        4371
                        "",
      
        4372
                        "## File Changes",
      
        4373
                        f"- `{guide_root}/`",
      
        4374
                        f"- `{chapters}/`",
      
        4375
                        f"- `{index_path}`",
      
        4376
                        "",
      
        4377
                    ]
      
        4378
                )
      
        4379
            )
      
        4380
        
        4381
            context = build_context(
      
        4382
                temp_dir=temp_dir,
      
        4383
                messages=[],
      
        4384
                safeguards=FakeSafeguards(),
      
        4385
                assess_confidence=assess_confidence,
      
        4386
                verify_action=verify_action,
      
        4387
                auto_recover=False,
      
        4388
            )
      
        4389
            queued_messages: list[str] = []
      
        4390
            context.queue_steering_message_callback = queued_messages.append
      
        4391
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4392
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        4393
            dod.implementation_plan = str(implementation_plan)
      
        4394
            dod.touched_files.append(str(index_path))
      
        4395
            sync_todos_to_definition_of_done(
      
        4396
                dod,
      
        4397
                [
      
        4398
                    {
      
        4399
                        "content": "Examine the existing Fortran guide structure",
      
        4400
                        "active_form": "Examining the existing Fortran guide structure",
      
        4401
                        "status": "completed",
      
        4402
                    },
      
        4403
                    {
      
        4404
                        "content": "Create the nginx directory structure",
      
        4405
                        "active_form": "Creating the nginx directory structure",
      
        4406
                        "status": "completed",
      
        4407
                    },
      
        4408
                    {
      
        4409
                        "content": "Write the introduction chapter",
      
        4410
                        "active_form": "Writing the introduction chapter",
      
        4411
                        "status": "pending",
      
        4412
                    },
      
        4413
                ],
      
        4414
                project_root=temp_dir,
      
        4415
            )
      
        4416
        
        4417
            tool_call = ToolCall(
      
        4418
                id="todo-next-mutation",
      
        4419
                name="TodoWrite",
      
        4420
                arguments={
      
        4421
                    "todos": [
      
        4422
                        {
      
        4423
                            "content": "Examine the existing Fortran guide structure",
      
        4424
                            "active_form": "Examining the existing Fortran guide structure",
      
        4425
                            "status": "completed",
      
        4426
                        },
      
        4427
                        {
      
        4428
                            "content": "Create the nginx directory structure",
      
        4429
                            "active_form": "Creating the nginx directory structure",
      
        4430
                            "status": "completed",
      
        4431
                        },
      
        4432
                        {
      
        4433
                            "content": "Write the introduction chapter",
      
        4434
                            "active_form": "Writing the introduction chapter",
      
        4435
                            "status": "pending",
      
        4436
                        },
      
        4437
                    ]
      
        4438
                },
      
        4439
            )
      
        4440
            executor = FakeExecutor(
      
        4441
                [
      
        4442
                    tool_outcome(
      
        4443
                        tool_call=tool_call,
      
        4444
                        output="Todos updated",
      
        4445
                        is_error=False,
      
        4446
                        metadata={
      
        4447
                            "new_todos": [
      
        4448
                                {
      
        4449
                                    "content": "Examine the existing Fortran guide structure",
      
        4450
                                    "active_form": "Examining the existing Fortran guide structure",
      
        4451
                                    "status": "completed",
      
        4452
                                },
      
        4453
                                {
      
        4454
                                    "content": "Create the nginx directory structure",
      
        4455
                                    "active_form": "Creating the nginx directory structure",
      
        4456
                                    "status": "completed",
      
        4457
                                },
      
        4458
                                {
      
        4459
                                    "content": "Write the introduction chapter",
      
        4460
                                    "active_form": "Writing the introduction chapter",
      
        4461
                                    "status": "pending",
      
        4462
                                },
      
        4463
                            ]
      
        4464
                        },
      
        4465
                    )
      
        4466
                ]
      
        4467
            )
      
        4468
        
        4469
            summary = TurnSummary(final_response="")
      
        4470
            await runner.execute_batch(
      
        4471
                tool_calls=[tool_call],
      
        4472
                tool_source="assistant",
      
        4473
                pending_tool_calls_seen=set(),
      
        4474
                emit=_noop_emit,
      
        4475
                summary=summary,
      
        4476
                dod=dod,
      
        4477
                executor=executor,  # type: ignore[arg-type]
      
        4478
                on_confirmation=None,
      
        4479
                on_user_question=None,
      
        4480
                emit_confirmation=None,
      
        4481
                consecutive_errors=0,
      
        4482
            )
      
        4483
        
        4484
            assert queued_messages
      
        4485
            message = queued_messages[-1]
      
        4486
            assert "Todo tracking is updated. Next step: create `01-introduction.html`." in message
      
        4487
            assert "Prefer one `write(file_path=..., content=...)` call" in message
      
        4488
            assert "Make your next response the concrete mutation tool call itself." in message
      
        4489
        
        4490
        
        4491
        @pytest.mark.asyncio
      
        4492
        async def test_tool_batch_runner_todowrite_prefers_pending_index_over_empty_output_directory(
      
        4493
            temp_dir: Path,
      
        4494
        ) -> None:
      
        4495
            async def assess_confidence(
      
        4496
                tool_name: str,
      
        4497
                tool_args: dict,
      
        4498
                context: str,
      
        4499
            ) -> ConfidenceAssessment:
      
        4500
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        4501
        
        4502
            async def verify_action(
      
        4503
                tool_name: str,
      
        4504
                tool_args: dict,
      
        4505
                result: str,
      
        4506
                expected: str = "",
      
        4507
            ) -> ActionVerification:
      
        4508
                raise AssertionError("Verification should not run in this scenario")
      
        4509
        
        4510
            guide_root = temp_dir / "Loader" / "guides" / "nginx"
      
        4511
            chapters = guide_root / "chapters"
      
        4512
            chapters.mkdir(parents=True)
      
        4513
            index_path = guide_root / "index.html"
      
        4514
            implementation_plan = temp_dir / "implementation.md"
      
        4515
            implementation_plan.write_text(
      
        4516
                "\n".join(
      
        4517
                    [
      
        4518
                        "# Implementation Plan",
      
        4519
                        "",
      
        4520
                        "## File Changes",
      
        4521
                        f"- `{chapters}/`",
      
        4522
                        f"- `{index_path}`",
      
        4523
                        "",
      
        4524
                    ]
      
        4525
                )
      
        4526
            )
      
        4527
        
        4528
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        4529
            dod.implementation_plan = str(implementation_plan)
      
        4530
            sync_todos_to_definition_of_done(
      
        4531
                dod,
      
        4532
                [
      
        4533
                    {
      
        4534
                        "content": "Examine the existing Fortran guide structure to understand the format and depth",
      
        4535
                        "active_form": "Examining the existing Fortran guide structure",
      
        4536
                        "status": "completed",
      
        4537
                    },
      
        4538
                    {
      
        4539
                        "content": "Create the new nginx guide directory structure",
      
        4540
                        "active_form": "Creating the new nginx guide directory structure",
      
        4541
                        "status": "completed",
      
        4542
                    },
      
        4543
                    {
      
        4544
                        "content": "Create a new index.html for the nginx guide",
      
        4545
                        "active_form": "Creating a new index.html for the nginx guide",
      
        4546
                        "status": "pending",
      
        4547
                    },
      
        4548
                    {
      
        4549
                        "content": "Create the first chapter for the nginx guide",
      
        4550
                        "active_form": "Creating the first chapter for the nginx guide",
      
        4551
                        "status": "pending",
      
        4552
                    },
      
        4553
                ],
      
        4554
                project_root=temp_dir,
      
        4555
            )
      
        4556
        
        4557
            queued_messages: list[str] = []
      
        4558
            context = build_context(
      
        4559
                temp_dir=temp_dir,
      
        4560
                messages=[],
      
        4561
                safeguards=FakeSafeguards(),
      
        4562
                assess_confidence=assess_confidence,
      
        4563
                verify_action=verify_action,
      
        4564
                auto_recover=False,
      
        4565
            )
      
        4566
            context.queue_steering_message_callback = queued_messages.append
      
        4567
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4568
        
        4569
            todos = [
      
        4570
                {
      
        4571
                    "content": "Examine the existing Fortran guide structure to understand the format and depth",
      
        4572
                    "active_form": "Examining the existing Fortran guide structure",
      
        4573
                    "status": "completed",
      
        4574
                },
      
        4575
                {
      
        4576
                    "content": "Create the new nginx guide directory structure",
      
        4577
                    "active_form": "Creating the new nginx guide directory structure",
      
        4578
                    "status": "completed",
      
        4579
                },
      
        4580
                {
      
        4581
                    "content": "Create a new index.html for the nginx guide",
      
        4582
                    "active_form": "Creating a new index.html for the nginx guide",
      
        4583
                    "status": "pending",
      
        4584
                },
      
        4585
                {
      
        4586
                    "content": "Create the first chapter for the nginx guide",
      
        4587
                    "active_form": "Creating the first chapter for the nginx guide",
      
        4588
                    "status": "pending",
      
        4589
                },
      
        4590
            ]
      
        4591
            tool_call = ToolCall(
      
        4592
                id="todo-index-before-chapter",
      
        4593
                name="TodoWrite",
      
        4594
                arguments={"todos": todos},
      
        4595
            )
      
        4596
            executor = FakeExecutor(
      
        4597
                [
      
        4598
                    tool_outcome(
      
        4599
                        tool_call=tool_call,
      
        4600
                        output="Todos updated",
      
        4601
                        is_error=False,
      
        4602
                        metadata={"new_todos": todos},
      
        4603
                    )
      
        4604
                ]
      
        4605
            )
      
        4606
        
        4607
            summary = TurnSummary(final_response="")
      
        4608
            await runner.execute_batch(
      
        4609
                tool_calls=[tool_call],
      
        4610
                tool_source="assistant",
      
        4611
                pending_tool_calls_seen=set(),
      
        4612
                emit=_noop_emit,
      
        4613
                summary=summary,
      
        4614
                dod=dod,
      
        4615
                executor=executor,  # type: ignore[arg-type]
      
        4616
                on_confirmation=None,
      
        4617
                on_user_question=None,
      
        4618
                emit_confirmation=None,
      
        4619
                consecutive_errors=0,
      
        4620
            )
      
        4621
        
        4622
            assert queued_messages
      
        4623
            message = queued_messages[-1]
      
        4624
            assert "Todo tracking is updated. Next step: create `index.html`." in message
      
        4625
            assert f"Prefer one `write(file_path=..., content=...)` call for `{index_path.resolve(strict=False)}`" in message
      
        4626
            assert "01-introduction.html" not in message
      
        4627
        
        4628
        
        4629
        @pytest.mark.asyncio
      
        4630
        async def test_tool_batch_runner_todowrite_with_declared_child_targets_names_next_missing_file(
      
        4631
            temp_dir: Path,
      
        4632
        ) -> None:
      
        4633
            async def assess_confidence(
      
        4634
                tool_name: str,
      
        4635
                tool_args: dict,
      
        4636
                context: str,
      
        4637
            ) -> ConfidenceAssessment:
      
        4638
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        4639
        
        4640
            async def verify_action(
      
        4641
                tool_name: str,
      
        4642
                tool_args: dict,
      
        4643
                result: str,
      
        4644
                expected: str = "",
      
        4645
            ) -> ActionVerification:
      
        4646
                raise AssertionError("Verification should not run in this scenario")
      
        4647
        
        4648
            guide_root = temp_dir / "guides" / "nginx"
      
        4649
            chapters = guide_root / "chapters"
      
        4650
            guide_root.mkdir(parents=True)
      
        4651
            chapters.mkdir()
      
        4652
            index_path = guide_root / "index.html"
      
        4653
            index_path.write_text(
      
        4654
                "\n".join(
      
        4655
                    [
      
        4656
                        "<html>",
      
        4657
                        '<a href="chapters/introduction.html">Introduction</a>',
      
        4658
                        '<a href="chapters/installation.html">Installation</a>',
      
        4659
                        "</html>",
      
        4660
                    ]
      
        4661
                )
      
        4662
                + "\n"
      
        4663
            )
      
        4664
        
        4665
            implementation_plan = temp_dir / "implementation.md"
      
        4666
            implementation_plan.write_text(
      
        4667
                "\n".join(
      
        4668
                    [
      
        4669
                        "# Implementation Plan",
      
        4670
                        "",
      
        4671
                        "## File Changes",
      
        4672
                        f"- `{guide_root}/`",
      
        4673
                        f"- `{chapters}/`",
      
        4674
                        f"- `{index_path}`",
      
        4675
                        "",
      
        4676
                    ]
      
        4677
                )
      
        4678
            )
      
        4679
        
        4680
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        4681
            dod.implementation_plan = str(implementation_plan)
      
        4682
            dod.pending_items = [
      
        4683
                "Write the introduction chapter",
      
        4684
                "Complete the requested work",
      
        4685
            ]
      
        4686
            dod.touched_files.append(str(index_path))
      
        4687
        
        4688
            queued_messages: list[str] = []
      
        4689
            context = build_context(
      
        4690
                temp_dir=temp_dir,
      
        4691
                messages=[],
      
        4692
                safeguards=FakeSafeguards(),
      
        4693
                assess_confidence=assess_confidence,
      
        4694
                verify_action=verify_action,
      
        4695
                auto_recover=False,
      
        4696
            )
      
        4697
            context.queue_steering_message_callback = queued_messages.append
      
        4698
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4699
        
        4700
            tool_call = ToolCall(
      
        4701
                id="todo-1",
      
        4702
                name="TodoWrite",
      
        4703
                arguments={
      
        4704
                    "todos": [
      
        4705
                        {
      
        4706
                            "content": "Write the introduction chapter",
      
        4707
                            "activeForm": "Writing the introduction chapter",
      
        4708
                            "status": "pending",
      
        4709
                        }
      
        4710
                    ]
      
        4711
                },
      
        4712
            )
      
        4713
            executor = FakeExecutor(
      
        4714
                [
      
        4715
                    tool_outcome(
      
        4716
                        tool_call=tool_call,
      
        4717
                        output="Todos updated",
      
        4718
                        is_error=False,
      
        4719
                        metadata={
      
        4720
                            "new_todos": [
      
        4721
                                {
      
        4722
                                    "content": "Write the introduction chapter",
      
        4723
                                    "active_form": "Writing the introduction chapter",
      
        4724
                                    "status": "pending",
      
        4725
                                }
      
        4726
                            ]
      
        4727
                        },
      
        4728
                    )
      
        4729
                ]
      
        4730
            )
      
        4731
        
        4732
            summary = TurnSummary(final_response="")
      
        4733
            await runner.execute_batch(
      
        4734
                tool_calls=[tool_call],
      
        4735
                tool_source="assistant",
      
        4736
                pending_tool_calls_seen=set(),
      
        4737
                emit=_noop_emit,
      
        4738
                summary=summary,
      
        4739
                dod=dod,
      
        4740
                executor=executor,  # type: ignore[arg-type]
      
        4741
                on_confirmation=None,
      
        4742
                on_user_question=None,
      
        4743
                emit_confirmation=None,
      
        4744
                consecutive_errors=0,
      
        4745
            )
      
        4746
        
        4747
            assert queued_messages
      
        4748
            message = queued_messages[-1]
      
        4749
            assert "Todo tracking is updated. Next step: create `introduction.html`." in message
      
        4750
            assert "Prefer one `write(file_path=..., content=...)` call" in message
      
        4751
            assert "Make your next response the concrete mutation tool call itself." in message
      
        4752
        
        4753
        
        4754
        @pytest.mark.asyncio
      
        4755
        async def test_tool_batch_runner_todowrite_names_concrete_pending_file_after_artifacts_exist(
      
        4756
            temp_dir: Path,
      
        4757
        ) -> None:
      
        4758
            async def assess_confidence(
      
        4759
                tool_name: str,
      
        4760
                tool_args: dict,
      
        4761
                context: str,
      
        4762
            ) -> ConfidenceAssessment:
      
        4763
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        4764
        
        4765
            async def verify_action(
      
        4766
                tool_name: str,
      
        4767
                tool_args: dict,
      
        4768
                result: str,
      
        4769
                expected: str = "",
      
        4770
            ) -> ActionVerification:
      
        4771
                raise AssertionError("Verification should not run in this scenario")
      
        4772
        
        4773
            guide_root = temp_dir / "guides" / "nginx"
      
        4774
            chapters = guide_root / "chapters"
      
        4775
            guide_root.mkdir(parents=True)
      
        4776
            chapters.mkdir()
      
        4777
            index_path = guide_root / "index.html"
      
        4778
            chapter_one = chapters / "01-introduction.html"
      
        4779
            index_path.write_text(
      
        4780
                "\n".join(
      
        4781
                    [
      
        4782
                        "<html>",
      
        4783
                        '<a href="chapters/01-introduction.html">Chapter 1: Introduction to NGINX Tool</a>',
      
        4784
                        '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
      
        4785
                        "</html>",
      
        4786
                    ]
      
        4787
                )
      
        4788
                + "\n"
      
        4789
            )
      
        4790
            chapter_one.write_text("<html></html>\n")
      
        4791
        
        4792
            implementation_plan = temp_dir / "implementation.md"
      
        4793
            implementation_plan.write_text(
      
        4794
                "\n".join(
      
        4795
                    [
      
        4796
                        "# Implementation Plan",
      
        4797
                        "",
      
        4798
                        "## File Changes",
      
        4799
                        f"- `{guide_root}/`",
      
        4800
                        f"- `{chapters}/`",
      
        4801
                        f"- `{index_path}`",
      
        4802
                        "",
      
        4803
                    ]
      
        4804
                )
      
        4805
            )
      
        4806
        
        4807
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        4808
            dod.implementation_plan = str(implementation_plan)
      
        4809
            dod.pending_items = [
      
        4810
                "Creating Chapter 2: Installation and Setup",
      
        4811
                "Complete the requested work",
      
        4812
            ]
      
        4813
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        4814
        
        4815
            queued_messages: list[str] = []
      
        4816
            context = build_context(
      
        4817
                temp_dir=temp_dir,
      
        4818
                messages=[],
      
        4819
                safeguards=FakeSafeguards(),
      
        4820
                assess_confidence=assess_confidence,
      
        4821
                verify_action=verify_action,
      
        4822
                auto_recover=False,
      
        4823
            )
      
        4824
            context.queue_steering_message_callback = queued_messages.append
      
        4825
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4826
        
        4827
            tool_call = ToolCall(
      
        4828
                id="todo-1",
      
        4829
                name="TodoWrite",
      
        4830
                arguments={
      
        4831
                    "todos": [
      
        4832
                        {
      
        4833
                            "content": "Creating Chapter 2: Installation and Setup",
      
        4834
                            "activeForm": "Creating Chapter 2: Installation and Setup",
      
        4835
                            "status": "pending",
      
        4836
                        }
      
        4837
                    ]
      
        4838
                },
      
        4839
            )
      
        4840
            executor = FakeExecutor(
      
        4841
                [
      
        4842
                    tool_outcome(
      
        4843
                        tool_call=tool_call,
      
        4844
                        output="Todos updated",
      
        4845
                        is_error=False,
      
        4846
                        metadata={
      
        4847
                            "new_todos": [
      
        4848
                                {
      
        4849
                                    "content": "Creating Chapter 2: Installation and Setup",
      
        4850
                                    "active_form": "Creating Chapter 2: Installation and Setup",
      
        4851
                                    "status": "pending",
      
        4852
                                }
      
        4853
                            ]
      
        4854
                        },
      
        4855
                    )
      
        4856
                ]
      
        4857
            )
      
        4858
        
        4859
            summary = TurnSummary(final_response="")
      
        4860
            await runner.execute_batch(
      
        4861
                tool_calls=[tool_call],
      
        4862
                tool_source="assistant",
      
        4863
                pending_tool_calls_seen=set(),
      
        4864
                emit=_noop_emit,
      
        4865
                summary=summary,
      
        4866
                dod=dod,
      
        4867
                executor=executor,  # type: ignore[arg-type]
      
        4868
                on_confirmation=None,
      
        4869
                on_user_question=None,
      
        4870
                emit_confirmation=None,
      
        4871
                consecutive_errors=0,
      
        4872
            )
      
        4873
        
        4874
            assert queued_messages
      
        4875
            message = queued_messages[-1]
      
        4876
            assert "Todo tracking is updated. Next step: create `02-installation.html`." in message
      
        4877
            assert "Prefer one `write(file_path=..., content=...)` call" in message
      
        4878
            assert "Make your next response the concrete mutation tool call itself" in message
      
        4879
        
        4880
        
        4881
        @pytest.mark.asyncio
      
        4882
        async def test_tool_batch_runner_todowrite_uses_observed_sibling_pattern_for_next_file(
      
        4883
            temp_dir: Path,
      
        4884
        ) -> None:
      
        4885
            async def assess_confidence(
      
        4886
                tool_name: str,
      
        4887
                tool_args: dict,
      
        4888
                context: str,
      
        4889
            ) -> ConfidenceAssessment:
      
        4890
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        4891
        
        4892
            async def verify_action(
      
        4893
                tool_name: str,
      
        4894
                tool_args: dict,
      
        4895
                result: str,
      
        4896
                expected: str = "",
      
        4897
            ) -> ActionVerification:
      
        4898
                raise AssertionError("Verification should not run in this scenario")
      
        4899
        
        4900
            reference_chapters = temp_dir / "fortran" / "chapters"
      
        4901
            reference_chapters.mkdir(parents=True)
      
        4902
            (reference_chapters / "01-introduction.html").write_text("<h1>Introduction</h1>\n")
      
        4903
        
        4904
            guide_root = temp_dir / "guides" / "nginx"
      
        4905
            chapters = guide_root / "chapters"
      
        4906
            guide_root.mkdir(parents=True)
      
        4907
            chapters.mkdir()
      
        4908
            index_path = guide_root / "index.html"
      
        4909
            index_path.write_text("<html></html>\n")
      
        4910
        
        4911
            implementation_plan = temp_dir / "implementation.md"
      
        4912
            implementation_plan.write_text(
      
        4913
                "\n".join(
      
        4914
                    [
      
        4915
                        "# Implementation Plan",
      
        4916
                        "",
      
        4917
                        "## File Changes",
      
        4918
                        f"- `{guide_root}/`",
      
        4919
                        f"- `{chapters}/`",
      
        4920
                        f"- `{index_path}`",
      
        4921
                        "",
      
        4922
                    ]
      
        4923
                )
      
        4924
            )
      
        4925
        
        4926
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        4927
            dod.implementation_plan = str(implementation_plan)
      
        4928
            dod.pending_items = [
      
        4929
                "Write the introduction chapter",
      
        4930
                "Complete the requested work",
      
        4931
            ]
      
        4932
            dod.touched_files.append(str(index_path))
      
        4933
        
        4934
            queued_messages: list[str] = []
      
        4935
            context = build_context(
      
        4936
                temp_dir=temp_dir,
      
        4937
                messages=[
      
        4938
                    Message(
      
        4939
                        role=Role.ASSISTANT,
      
        4940
                        content="",
      
        4941
                        tool_calls=[
      
        4942
                            ToolCall(
      
        4943
                                id="read-ref-1",
      
        4944
                                name="read",
      
        4945
                                arguments={"file_path": str(reference_chapters / "01-introduction.html")},
      
        4946
                            )
      
        4947
                        ],
      
        4948
                    )
      
        4949
                ],
      
        4950
                safeguards=FakeSafeguards(),
      
        4951
                assess_confidence=assess_confidence,
      
        4952
                verify_action=verify_action,
      
        4953
                auto_recover=False,
      
        4954
            )
      
        4955
            context.queue_steering_message_callback = queued_messages.append
      
        4956
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4957
        
        4958
            tool_call = ToolCall(
      
        4959
                id="todo-observed-1",
      
        4960
                name="TodoWrite",
      
        4961
                arguments={
      
        4962
                    "todos": [
      
        4963
                        {
      
        4964
                            "content": "Write the introduction chapter",
      
        4965
                            "activeForm": "Writing the introduction chapter",
      
        4966
                            "status": "pending",
      
        4967
                        }
      
        4968
                    ]
      
        4969
                },
      
        4970
            )
      
        4971
            executor = FakeExecutor(
      
        4972
                [
      
        4973
                    tool_outcome(
      
        4974
                        tool_call=tool_call,
      
        4975
                        output="Todos updated",
      
        4976
                        is_error=False,
      
        4977
                        metadata={
      
        4978
                            "new_todos": [
      
        4979
                                {
      
        4980
                                    "content": "Write the introduction chapter",
      
        4981
                                    "active_form": "Writing the introduction chapter",
      
        4982
                                    "status": "pending",
      
        4983
                                }
      
        4984
                            ]
      
        4985
                        },
      
        4986
                    )
      
        4987
                ]
      
        4988
            )
      
        4989
        
        4990
            summary = TurnSummary(final_response="")
      
        4991
            await runner.execute_batch(
      
        4992
                tool_calls=[tool_call],
      
        4993
                tool_source="assistant",
      
        4994
                pending_tool_calls_seen=set(),
      
        4995
                emit=_noop_emit,
      
        4996
                summary=summary,
      
        4997
                dod=dod,
      
        4998
                executor=executor,  # type: ignore[arg-type]
      
        4999
                on_confirmation=None,
      
        5000
                on_user_question=None,
      
        5001
                emit_confirmation=None,
      
        5002
                consecutive_errors=0,
      
        5003
            )
      
        5004
        
        5005
            assert queued_messages
      
        5006
            message = queued_messages[-1]
      
        5007
            assert "Todo tracking is updated. Next step: create `01-introduction.html`." in message
      
        5008
            assert "Prefer one `write(file_path=..., content=...)` call" in message
      
        5009
        
        5010
        
        5011
        @pytest.mark.asyncio
      
        5012
        async def test_tool_batch_runner_bookkeeping_note_with_missing_artifact_requeues_resume_step(
      
        5013
            temp_dir: Path,
      
        5014
        ) -> None:
      
        5015
            async def assess_confidence(
      
        5016
                tool_name: str,
      
        5017
                tool_args: dict,
      
        5018
                context: str,
      
        5019
            ) -> ConfidenceAssessment:
      
        5020
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        5021
        
        5022
            async def verify_action(
      
        5023
                tool_name: str,
      
        5024
                tool_args: dict,
      
        5025
                result: str,
      
        5026
                expected: str = "",
      
        5027
            ) -> ActionVerification:
      
        5028
                raise AssertionError("Verification should not run in this scenario")
      
        5029
        
        5030
            guide_root = temp_dir / "guides" / "nginx"
      
        5031
            chapters = guide_root / "chapters"
      
        5032
            guide_root.mkdir(parents=True)
      
        5033
            chapters.mkdir()
      
        5034
            index_path = guide_root / "index.html"
      
        5035
            chapter_one = chapters / "01-getting-started.html"
      
        5036
            chapter_two = chapters / "02-installation.html"
      
        5037
            index_path.write_text("<html></html>\n")
      
        5038
            chapter_one.write_text("<h1>One</h1>\n")
      
        5039
        
        5040
            implementation_plan = temp_dir / "implementation.md"
      
        5041
            implementation_plan.write_text(
      
        5042
                "\n".join(
      
        5043
                    [
      
        5044
                        "# Implementation Plan",
      
        5045
                        "",
      
        5046
                        "## File Changes",
      
        5047
                        f"- `{guide_root}/`",
      
        5048
                        f"- `{chapters}/`",
      
        5049
                        f"- `{index_path}`",
      
        5050
                        f"- `{chapter_one}`",
      
        5051
                        f"- `{chapter_two}`",
      
        5052
                        "",
      
        5053
                    ]
      
        5054
                )
      
        5055
            )
      
        5056
        
        5057
            context = build_context(
      
        5058
                temp_dir=temp_dir,
      
        5059
                messages=[],
      
        5060
                safeguards=FakeSafeguards(),
      
        5061
                assess_confidence=assess_confidence,
      
        5062
                verify_action=verify_action,
      
        5063
                auto_recover=False,
      
        5064
            )
      
        5065
            queued_messages: list[str] = []
      
        5066
            context.queue_steering_message_callback = queued_messages.append
      
        5067
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5068
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5069
            dod.implementation_plan = str(implementation_plan)
      
        5070
            sync_todos_to_definition_of_done(
      
        5071
                dod,
      
        5072
                [
      
        5073
                    {
      
        5074
                        "content": "Create 01-getting-started.html",
      
        5075
                        "active_form": "Creating 01-getting-started.html",
      
        5076
                        "status": "completed",
      
        5077
                    },
      
        5078
                    {
      
        5079
                        "content": "Create 02-installation.html",
      
        5080
                        "active_form": "Creating 02-installation.html",
      
        5081
                        "status": "pending",
      
        5082
                    },
      
        5083
                ],
      
        5084
                project_root=temp_dir,
      
        5085
            )
      
        5086
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        5087
        
        5088
            tool_call = ToolCall(
      
        5089
                id="working-note",
      
        5090
                name="notepad_write_working",
      
        5091
                arguments={"content": "Creating the second chapter file: Installation"},
      
        5092
            )
      
        5093
            executor = FakeExecutor(
      
        5094
                [
      
        5095
                    tool_outcome(
      
        5096
                        tool_call=tool_call,
      
        5097
                        output="Working note recorded",
      
        5098
                        is_error=False,
      
        5099
                    )
      
        5100
                ]
      
        5101
            )
      
        5102
        
        5103
            summary = TurnSummary(final_response="")
      
        5104
            await runner.execute_batch(
      
        5105
                tool_calls=[tool_call],
      
        5106
                tool_source="assistant",
      
        5107
                pending_tool_calls_seen=set(),
      
        5108
                emit=_noop_emit,
      
        5109
                summary=summary,
      
        5110
                dod=dod,
      
        5111
                executor=executor,  # type: ignore[arg-type]
      
        5112
                on_confirmation=None,
      
        5113
                on_user_question=None,
      
        5114
                emit_confirmation=None,
      
        5115
                consecutive_errors=0,
      
        5116
            )
      
        5117
        
        5118
            assert queued_messages
      
        5119
            message = queued_messages[-1]
      
        5120
            assert "Bookkeeping note is recorded. A declared output artifact is still missing." in message
      
        5121
            assert "Resume by creating `02-installation.html` now." in message
      
        5122
            assert "Make your next response the concrete mutation tool call itself" in message
      
        5123
            assert "refresh `TodoWrite`" in message
      
        5124
            assert "Do not spend the next turn on additional notes, rediscovery, verification, or final confirmation" in message
      
        5125
        
        5126
        
        5127
        @pytest.mark.asyncio
      
        5128
        async def test_tool_batch_runner_working_note_respects_discovery_first_pending_step(
      
        5129
            temp_dir: Path,
      
        5130
        ) -> None:
      
        5131
            async def assess_confidence(
      
        5132
                tool_name: str,
      
        5133
                tool_args: dict,
      
        5134
                context: str,
      
        5135
            ) -> ConfidenceAssessment:
      
        5136
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        5137
        
        5138
            async def verify_action(
      
        5139
                tool_name: str,
      
        5140
                tool_args: dict,
      
        5141
                result: str,
      
        5142
                expected: str = "",
      
        5143
            ) -> ActionVerification:
      
        5144
                raise AssertionError("Verification should not run in this scenario")
      
        5145
        
        5146
            implementation_plan = temp_dir / "implementation.md"
      
        5147
            implementation_plan.write_text(
      
        5148
                "\n".join(
      
        5149
                    [
      
        5150
                        "# Implementation Plan",
      
        5151
                        "",
      
        5152
                        "## File Changes",
      
        5153
                        f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
      
        5154
                        f"- `{temp_dir / 'guides' / 'nginx' / 'chapters'}`",
      
        5155
                        "",
      
        5156
                    ]
      
        5157
                )
      
        5158
            )
      
        5159
        
        5160
            context = build_context(
      
        5161
                temp_dir=temp_dir,
      
        5162
                messages=[],
      
        5163
                safeguards=FakeSafeguards(),
      
        5164
                assess_confidence=assess_confidence,
      
        5165
                verify_action=verify_action,
      
        5166
                auto_recover=False,
      
        5167
            )
      
        5168
            queued_messages: list[str] = []
      
        5169
            context.queue_steering_message_callback = queued_messages.append
      
        5170
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5171
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5172
            dod.implementation_plan = str(implementation_plan)
      
        5173
            dod.pending_items.extend(
      
        5174
                [
      
        5175
                    "First, examine the existing fortran guide structure and content to understand the format",
      
        5176
                    "Create the nginx directory structure",
      
        5177
                    "Develop the main index.html file for the nginx guide",
      
        5178
                ]
      
        5179
            )
      
        5180
        
        5181
            tool_call = ToolCall(
      
        5182
                id="working-note",
      
        5183
                name="notepad_write_working",
      
        5184
                arguments={"content": "Analyzing the fortran guide structure before creating nginx guide"},
      
        5185
            )
      
        5186
            executor = FakeExecutor(
      
        5187
                [
      
        5188
                    tool_outcome(
      
        5189
                        tool_call=tool_call,
      
        5190
                        output="Working note recorded",
      
        5191
                        is_error=False,
      
        5192
                    )
      
        5193
                ]
      
        5194
            )
      
        5195
        
        5196
            summary = TurnSummary(final_response="")
      
        5197
            await runner.execute_batch(
      
        5198
                tool_calls=[tool_call],
      
        5199
                tool_source="assistant",
      
        5200
                pending_tool_calls_seen=set(),
      
        5201
                emit=_noop_emit,
      
        5202
                summary=summary,
      
        5203
                dod=dod,
      
        5204
                executor=executor,  # type: ignore[arg-type]
      
        5205
                on_confirmation=None,
      
        5206
                on_user_question=None,
      
        5207
                emit_confirmation=None,
      
        5208
                consecutive_errors=0,
      
        5209
            )
      
        5210
        
        5211
            assert queued_messages
      
        5212
            message = queued_messages[-1]
      
        5213
            assert (
      
        5214
                "Continue with the next pending item: `First, examine the existing fortran guide structure and content to understand the format`."
      
        5215
                in message
      
        5216
            )
      
        5217
            assert "one concrete evidence-gathering tool call" in message
      
        5218
            assert "Resume by creating `index.html` now." not in message
      
        5219
        
        5220
        
        5221
        @pytest.mark.asyncio
      
        5222
        async def test_tool_batch_runner_working_note_prefers_declared_output_gap_over_stale_discovery(
      
        5223
            temp_dir: Path,
      
        5224
        ) -> None:
      
        5225
            async def assess_confidence(
      
        5226
                tool_name: str,
      
        5227
                tool_args: dict,
      
        5228
                context: str,
      
        5229
            ) -> ConfidenceAssessment:
      
        5230
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        5231
        
        5232
            async def verify_action(
      
        5233
                tool_name: str,
      
        5234
                tool_args: dict,
      
        5235
                result: str,
      
        5236
                expected: str = "",
      
        5237
            ) -> ActionVerification:
      
        5238
                raise AssertionError("Verification should not run in this scenario")
      
        5239
        
        5240
            guide_root = temp_dir / "guides" / "nginx"
      
        5241
            chapters_dir = guide_root / "chapters"
      
        5242
            chapters_dir.mkdir(parents=True)
      
        5243
            index_path = guide_root / "index.html"
      
        5244
            first_chapter = chapters_dir / "01-introduction.html"
      
        5245
            index_path.write_text(
      
        5246
                "\n".join(
      
        5247
                    [
      
        5248
                        '<a href="chapters/01-introduction.html">Introduction</a>',
      
        5249
                        '<a href="chapters/02-installation.html">Installation</a>',
      
        5250
                        '<a href="chapters/03-configuration.html">Configuration</a>',
      
        5251
                    ]
      
        5252
                )
      
        5253
            )
      
        5254
            first_chapter.write_text("<h1>Introduction</h1>\n")
      
        5255
        
        5256
            implementation_plan = temp_dir / "implementation.md"
      
        5257
            implementation_plan.write_text(
      
        5258
                "\n".join(
      
        5259
                    [
      
        5260
                        "# Implementation Plan",
      
        5261
                        "",
      
        5262
                        "## File Changes",
      
        5263
                        f"- `{guide_root / 'index.html'}`",
      
        5264
                        f"- `{chapters_dir}/`",
      
        5265
                        "",
      
        5266
                    ]
      
        5267
                )
      
        5268
            )
      
        5269
        
        5270
            context = build_context(
      
        5271
                temp_dir=temp_dir,
      
        5272
                messages=[],
      
        5273
                safeguards=FakeSafeguards(),
      
        5274
                assess_confidence=assess_confidence,
      
        5275
                verify_action=verify_action,
      
        5276
                auto_recover=False,
      
        5277
            )
      
        5278
            queued_messages: list[str] = []
      
        5279
            context.queue_steering_message_callback = queued_messages.append
      
        5280
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5281
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5282
            dod.implementation_plan = str(implementation_plan)
      
        5283
            dod.pending_items.extend(
      
        5284
                [
      
        5285
                    "First, examine the existing fortran guide structure and content to understand the format",
      
        5286
                    "Create chapter files following the established pattern",
      
        5287
                ]
      
        5288
            )
      
        5289
            dod.touched_files.extend([str(index_path), str(first_chapter)])
      
        5290
        
        5291
            tool_call = ToolCall(
      
        5292
                id="working-note",
      
        5293
                name="notepad_write_working",
      
        5294
                arguments={"content": "Created index and first chapter; next is chapter 2"},
      
        5295
            )
      
        5296
            executor = FakeExecutor(
      
        5297
                [
      
        5298
                    tool_outcome(
      
        5299
                        tool_call=tool_call,
      
        5300
                        output="Working note recorded",
      
        5301
                        is_error=False,
      
        5302
                    )
      
        5303
                ]
      
        5304
            )
      
        5305
        
        5306
            summary = TurnSummary(final_response="")
      
        5307
            await runner.execute_batch(
      
        5308
                tool_calls=[tool_call],
      
        5309
                tool_source="assistant",
      
        5310
                pending_tool_calls_seen=set(),
      
        5311
                emit=_noop_emit,
      
        5312
                summary=summary,
      
        5313
                dod=dod,
      
        5314
                executor=executor,  # type: ignore[arg-type]
      
        5315
                on_confirmation=None,
      
        5316
                on_user_question=None,
      
        5317
                emit_confirmation=None,
      
        5318
                consecutive_errors=0,
      
        5319
            )
      
        5320
        
        5321
            assert queued_messages
      
        5322
            message = queued_messages[-1]
      
        5323
            assert "Bookkeeping note is recorded. A declared output artifact is still missing." in message
      
        5324
            assert "Resume by creating `02-installation.html` now." in message
      
        5325
            assert "Continue with the next pending item: `First, examine the existing fortran guide structure" not in message
      
        5326
        
        5327
        
        5328
        @pytest.mark.asyncio
      
        5329
        async def test_tool_batch_runner_shallow_glob_does_not_handoff_before_content_read(
      
        5330
            temp_dir: Path,
      
        5331
        ) -> None:
      
        5332
            async def assess_confidence(
      
        5333
                tool_name: str,
      
        5334
                tool_args: dict,
      
        5335
                context: str,
      
        5336
            ) -> ConfidenceAssessment:
      
        5337
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        5338
        
        5339
            async def verify_action(
      
        5340
                tool_name: str,
      
        5341
                tool_args: dict,
      
        5342
                result: str,
      
        5343
                expected: str = "",
      
        5344
            ) -> ActionVerification:
      
        5345
                raise AssertionError("Verification should not run in this scenario")
      
        5346
        
        5347
            fortran_root = temp_dir / "Loader" / "guides" / "fortran"
      
        5348
            chapters_dir = fortran_root / "chapters"
      
        5349
            chapters_dir.mkdir(parents=True)
      
        5350
        
        5351
            implementation_plan = temp_dir / "implementation.md"
      
        5352
            implementation_plan.write_text(
      
        5353
                "\n".join(
      
        5354
                    [
      
        5355
                        "# Implementation Plan",
      
        5356
                        "",
      
        5357
                        "## File Changes",
      
        5358
                        f"- `{temp_dir / 'Loader' / 'guides' / 'nginx' / 'index.html'}`",
      
        5359
                        f"- `{temp_dir / 'Loader' / 'guides' / 'nginx' / 'chapters'}`",
      
        5360
                        "",
      
        5361
                    ]
      
        5362
                )
      
        5363
            )
      
        5364
        
        5365
            context = build_context(
      
        5366
                temp_dir=temp_dir,
      
        5367
                messages=[],
      
        5368
                safeguards=FakeSafeguards(),
      
        5369
                assess_confidence=assess_confidence,
      
        5370
                verify_action=verify_action,
      
        5371
                auto_recover=False,
      
        5372
            )
      
        5373
            queued_messages: list[str] = []
      
        5374
            context.queue_steering_message_callback = queued_messages.append
      
        5375
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5376
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5377
            dod.implementation_plan = str(implementation_plan)
      
        5378
            dod.pending_items.extend(
      
        5379
                [
      
        5380
                    "First, examine the existing fortran guide structure and content",
      
        5381
                    "Create the nginx directory structure",
      
        5382
                    "Develop the main index.html file for nginx guide",
      
        5383
                ]
      
        5384
            )
      
        5385
        
        5386
            tool_call = ToolCall(
      
        5387
                id="glob-1",
      
        5388
                name="glob",
      
        5389
                arguments={"pattern": "**", "path": str(fortran_root)},
      
        5390
            )
      
        5391
            executor = FakeExecutor(
      
        5392
                [
      
        5393
                    tool_outcome(
      
        5394
                        tool_call=tool_call,
      
        5395
                        output=f"{fortran_root}\n{chapters_dir}",
      
        5396
                        is_error=False,
      
        5397
                    )
      
        5398
                ]
      
        5399
            )
      
        5400
        
        5401
            summary = TurnSummary(final_response="")
      
        5402
            await runner.execute_batch(
      
        5403
                tool_calls=[tool_call],
      
        5404
                tool_source="assistant",
      
        5405
                pending_tool_calls_seen=set(),
      
        5406
                emit=_noop_emit,
      
        5407
                summary=summary,
      
        5408
                dod=dod,
      
        5409
                executor=executor,  # type: ignore[arg-type]
      
        5410
                on_confirmation=None,
      
        5411
                on_user_question=None,
      
        5412
                emit_confirmation=None,
      
        5413
                consecutive_errors=0,
      
        5414
            )
      
        5415
        
        5416
            assert queued_messages == []
      
        5417
        
        5418
        
        5419
        @pytest.mark.asyncio
      
        5420
        async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_valid(
      
        5421
            temp_dir: Path,
      
        5422
        ) -> None:
      
        5423
            async def assess_confidence(
      
        5424
                tool_name: str,
      
        5425
                tool_args: dict,
      
        5426
                context: str,
      
        5427
            ) -> ConfidenceAssessment:
      
        5428
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        5429
        
        5430
            async def verify_action(
      
        5431
                tool_name: str,
      
        5432
                tool_args: dict,
      
        5433
                result: str,
      
        5434
                expected: str = "",
      
        5435
            ) -> ActionVerification:
      
        5436
                raise AssertionError("Verification should not run in this scenario")
      
        5437
        
        5438
            prompt = (
      
        5439
                "Have a look at ~/Loader/guides/fortran/index.html, then "
      
        5440
                "~/Loader/guides/fortran/chapters. The table of contents links in "
      
        5441
                "index.html are inaccurate and the href’s are wrong. Let’s update the "
      
        5442
                "links and their link texts to be correct."
      
        5443
            )
      
        5444
            chapters = temp_dir / "chapters"
      
        5445
            chapters.mkdir()
      
        5446
            (chapters / "01-introduction.html").write_text(
      
        5447
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        5448
            )
      
        5449
            (chapters / "02-setup.html").write_text(
      
        5450
                "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
      
        5451
            )
      
        5452
            current_block = (
      
        5453
                "<h2>Table of Contents</h2>\n"
      
        5454
                '        <ul class="chapter-list">\n'
      
        5455
                '            <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
      
        5456
                '            <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
      
        5457
                "        </ul>\n"
      
        5458
            )
      
        5459
            index_path = temp_dir / "index.html"
      
        5460
            index_path.write_text(current_block)
      
        5461
        
        5462
            context = build_context(
      
        5463
                temp_dir=temp_dir,
      
        5464
                messages=[],
      
        5465
                safeguards=FakeSafeguards(),
      
        5466
                assess_confidence=assess_confidence,
      
        5467
                verify_action=verify_action,
      
        5468
                auto_recover=False,
      
        5469
            )
      
        5470
            context.session.current_task = prompt  # type: ignore[attr-defined]
      
        5471
            queued_messages: list[str] = []
      
        5472
            context.queue_steering_message_callback = queued_messages.append
      
        5473
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5474
            tool_call = ToolCall(
      
        5475
                id="edit-1",
      
        5476
                name="edit",
      
        5477
                arguments={
      
        5478
                    "file_path": str(index_path),
      
        5479
                    "old_string": current_block,
      
        5480
                    "new_string": current_block,
      
        5481
                },
      
        5482
            )
      
        5483
            executor = FakeExecutor(
      
        5484
                [
      
        5485
                    tool_outcome(
      
        5486
                        tool_call=tool_call,
      
        5487
                        output=(
      
        5488
                            "[Blocked - old_string and new_string are identical - no change "
      
        5489
                            "would occur] Suggestion: Provide different old and new strings"
      
        5490
                        ),
      
        5491
                        is_error=True,
      
        5492
                        state=ToolExecutionState.BLOCKED,
      
        5493
                    )
      
        5494
                ]
      
        5495
            )
      
        5496
        
        5497
            await runner.execute_batch(
      
        5498
                tool_calls=[tool_call],
      
        5499
                tool_source="assistant",
      
        5500
                pending_tool_calls_seen=set(),
      
        5501
                emit=_noop_emit,
      
        5502
                summary=TurnSummary(final_response=""),
      
        5503
                dod=create_definition_of_done(prompt),
      
        5504
                executor=executor,  # type: ignore[arg-type]
      
        5505
                on_confirmation=None,
      
        5506
                on_user_question=None,
      
        5507
                emit_confirmation=None,
      
        5508
                consecutive_errors=0,
      
        5509
            )
      
        5510
        
        5511
            assert queued_messages == []
      
        5512
        
        5513
        
        5514
        def test_tool_batch_runner_blocked_noop_edit_nudge_stays_on_active_repair_target(
      
        5515
            temp_dir: Path,
      
        5516
        ) -> None:
      
        5517
            async def assess_confidence(
      
        5518
                tool_name: str,
      
        5519
                tool_args: dict,
      
        5520
                context: str,
      
        5521
            ) -> ConfidenceAssessment:
      
        5522
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        5523
        
        5524
            async def verify_action(
      
        5525
                tool_name: str,
      
        5526
                tool_args: dict,
      
        5527
                result: str,
      
        5528
                expected: str = "",
      
        5529
            ) -> ActionVerification:
      
        5530
                raise AssertionError("Verification should not run in this scenario")
      
        5531
        
        5532
            repair_target = temp_dir / "guide" / "chapters" / "04-basic-usage.html"
      
        5533
            context = build_context(
      
        5534
                temp_dir=temp_dir,
      
        5535
                messages=[
      
        5536
                    Message(
      
        5537
                        role=Role.ASSISTANT,
      
        5538
                        content=(
      
        5539
                            "Repair focus:\n"
      
        5540
                            f"- Fix the broken local reference `05-advanced-topics.html` in `{repair_target}`.\n"
      
        5541
                            f"- Immediate next step: edit `{repair_target}`.\n"
      
        5542
                            f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '05-advanced-topics.html'}`; otherwise remove or replace `05-advanced-topics.html`.\n"
      
        5543
                        ),
      
        5544
                    )
      
        5545
                ],
      
        5546
                safeguards=FakeSafeguards(),
      
        5547
                assess_confidence=assess_confidence,
      
        5548
                verify_action=verify_action,
      
        5549
            )
      
        5550
            queued: list[str] = []
      
        5551
            context.queue_steering_message_callback = queued.append
      
        5552
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5553
            dod = create_definition_of_done("Repair a guide page.")
      
        5554
        
        5555
            runner._queue_blocked_html_edit_nudge(
      
        5556
                ToolCall(
      
        5557
                    id="edit-1",
      
        5558
                    name="edit",
      
        5559
                    arguments={
      
        5560
                        "file_path": str(repair_target),
      
        5561
                        "old_string": "same",
      
        5562
                        "new_string": "same",
      
        5563
                    },
      
        5564
                ),
      
        5565
                "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
      
        5566
                dod=dod,
      
        5567
            )
      
        5568
        
        5569
            assert queued
      
        5570
            assert str(repair_target) in queued[0]
      
        5571
            assert "no on-disk change" in queued[0]
      
        5572
            assert "replace the surrounding block" in queued[0]
      
        5573
            assert "Do not reopen unrelated reference materials" in queued[0]
      
        5574
        
        5575
        
        5576
        def test_tool_batch_runner_blocked_noop_edit_after_full_build_prefers_verification(
      
        5577
            temp_dir: Path,
      
        5578
        ) -> None:
      
        5579
            async def assess_confidence(
      
        5580
                tool_name: str,
      
        5581
                tool_args: dict,
      
        5582
                context: str,
      
        5583
            ) -> ConfidenceAssessment:
      
        5584
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        5585
        
        5586
            async def verify_action(
      
        5587
                tool_name: str,
      
        5588
                tool_args: dict,
      
        5589
                result: str,
      
        5590
                expected: str = "",
      
        5591
            ) -> ActionVerification:
      
        5592
                raise AssertionError("Verification should not run in this scenario")
      
        5593
        
        5594
            guide_root = temp_dir / "guide"
      
        5595
            chapters = guide_root / "chapters"
      
        5596
            chapters.mkdir(parents=True)
      
        5597
            index_path = guide_root / "index.html"
      
        5598
            chapter_one = chapters / "01-introduction.html"
      
        5599
            index_path.write_text("<html></html>\n")
      
        5600
            chapter_one.write_text("<html></html>\n")
      
        5601
        
        5602
            implementation_plan = temp_dir / "implementation.md"
      
        5603
            implementation_plan.write_text(
      
        5604
                "\n".join(
      
        5605
                    [
      
        5606
                        "# Implementation Plan",
      
        5607
                        "",
      
        5608
                        "## File Changes",
      
        5609
                        f"- `{index_path}`",
      
        5610
                        f"- `{chapter_one}`",
      
        5611
                        "",
      
        5612
                    ]
      
        5613
                )
      
        5614
            )
      
        5615
        
        5616
            context = build_context(
      
        5617
                temp_dir=temp_dir,
      
        5618
                messages=[
      
        5619
                    Message(
      
        5620
                        role=Role.ASSISTANT,
      
        5621
                        content=(
      
        5622
                            "Repair focus:\n"
      
        5623
                            f"- Confirm the final guide state in `{index_path}`.\n"
      
        5624
                            f"- Immediate next step: verify `{index_path}` if no concrete mismatch remains.\n"
      
        5625
                        ),
      
        5626
                    )
      
        5627
                ],
      
        5628
                safeguards=FakeSafeguards(),
      
        5629
                assess_confidence=assess_confidence,
      
        5630
                verify_action=verify_action,
      
        5631
            )
      
        5632
            queued: list[str] = []
      
        5633
            context.queue_steering_message_callback = queued.append
      
        5634
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5635
        
        5636
            dod = create_definition_of_done("Create a multi-file guide.")
      
        5637
            dod.implementation_plan = str(implementation_plan)
      
        5638
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        5639
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        5640
        
        5641
            runner._queue_blocked_html_edit_nudge(
      
        5642
                ToolCall(
      
        5643
                    id="edit-1",
      
        5644
                    name="edit",
      
        5645
                    arguments={
      
        5646
                        "file_path": str(index_path),
      
        5647
                        "old_string": "same",
      
        5648
                        "new_string": "same",
      
        5649
                    },
      
        5650
                ),
      
        5651
                "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
      
        5652
                dod=dod,
      
        5653
            )
      
        5654
        
        5655
            assert queued
      
        5656
            assert "All explicitly planned artifacts already exist." in queued[0]
      
        5657
            assert "Move to verification or final confirmation using the files already on disk." in queued[0]
      
        5658
            assert "replace the surrounding block" not in queued[0]
      
        5659
        
        5660
        
        5661
        async def _noop_emit(event: AgentEvent) -> None:
      
        5662
            return None
      
        5663
        
        5664
        
        5665
        @pytest.mark.asyncio
      
        5666
        async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
      
        5667
            temp_dir: Path,
      
        5668
        ) -> None:
      
        5669
            async def assess_confidence(
      
        5670
                tool_name: str,
      
        5671
                tool_args: dict,
      
        5672
                context: str,
      
        5673
            ) -> ConfidenceAssessment:
      
        5674
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        5675
        
        5676
            async def verify_action(
      
        5677
                tool_name: str,
      
        5678
                tool_args: dict,
      
        5679
                result: str,
      
        5680
                expected: str = "",
      
        5681
            ) -> ActionVerification:
      
        5682
                raise AssertionError("Verification should not run for this scenario")
      
        5683
        
        5684
            context = build_context(
      
        5685
                temp_dir=temp_dir,
      
        5686
                messages=[],
      
        5687
                safeguards=FakeSafeguards(),
      
        5688
                assess_confidence=assess_confidence,
      
        5689
                verify_action=verify_action,
      
        5690
            )
      
        5691
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5692
            tool_call = ToolCall(
      
        5693
                id="write-1",
      
        5694
                name="write",
      
        5695
                arguments={"file_path": str(temp_dir / "README.md"), "content": "updated\n"},
      
        5696
            )
      
        5697
            executor = FakeExecutor(
      
        5698
                [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
      
        5699
            )
      
        5700
            summary = TurnSummary(final_response="")
      
        5701
            dod = create_definition_of_done("Update README and verify it still works.")
      
        5702
            events: list[AgentEvent] = []
      
        5703
        
        5704
            async def emit(event: AgentEvent) -> None:
      
        5705
                events.append(event)
      
        5706
        
        5707
            await runner.execute_batch(
      
        5708
                tool_calls=[tool_call],
      
        5709
                tool_source="assistant",
      
        5710
                pending_tool_calls_seen=set(),
      
        5711
                emit=emit,
      
        5712
                summary=summary,
      
        5713
                dod=dod,
      
        5714
                executor=executor,  # type: ignore[arg-type]
      
        5715
                on_confirmation=None,
      
        5716
                on_user_question=None,
      
        5717
                emit_confirmation=None,
      
        5718
                consecutive_errors=0,
      
        5719
            )
      
        5720
        
        5721
            assert dod.last_verification_result == "planned"
      
        5722
            assert dod.verification_commands
      
        5723
            assert "Collect verification evidence" in dod.pending_items
      
        5724
            assert dod.active_verification_attempt_id == "verification-attempt-1"
      
        5725
            assert dod.active_verification_attempt_number == 1
      
        5726
            assert summary.workflow_timeline[-1].reason_code == "verification_planned"
      
        5727
            assert summary.workflow_timeline[-1].policy_outcome == "planned"
      
        5728
            assert summary.workflow_timeline[-1].verification_observations[0].status == "planned"
      
        5729
            assert (
      
        5730
                summary.workflow_timeline[-1].verification_observations[0].attempt_id
      
        5731
                == "verification-attempt-1"
      
        5732
            )
      
        5733
            assert (
      
        5734
                summary.workflow_timeline[-1].verification_observations[0].attempt_number == 1
      
        5735
            )
      
        5736
        
        5737
        
        5738
        @pytest.mark.asyncio
      
        5739
        async def test_tool_batch_runner_does_not_mark_verification_planned_after_setup_only_mkdir(
      
        5740
            temp_dir: Path,
      
        5741
        ) -> None:
      
        5742
            async def assess_confidence(
      
        5743
                tool_name: str,
      
        5744
                tool_args: dict,
      
        5745
                context: str,
      
        5746
            ) -> ConfidenceAssessment:
      
        5747
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        5748
        
        5749
            async def verify_action(
      
        5750
                tool_name: str,
      
        5751
                tool_args: dict,
      
        5752
                result: str,
      
        5753
                expected: str = "",
      
        5754
            ) -> ActionVerification:
      
        5755
                raise AssertionError("Verification should not run in this scenario")
      
        5756
        
        5757
            context = build_context(
      
        5758
                temp_dir=temp_dir,
      
        5759
                messages=[],
      
        5760
                safeguards=FakeSafeguards(),
      
        5761
                assess_confidence=assess_confidence,
      
        5762
                verify_action=verify_action,
      
        5763
            )
      
        5764
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5765
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        5766
            chapters = nginx_root / "chapters"
      
        5767
            implementation_plan = temp_dir / "implementation.md"
      
        5768
            implementation_plan.write_text(
      
        5769
                "\n".join(
      
        5770
                    [
      
        5771
                        "# Implementation Plan",
      
        5772
                        "",
      
        5773
                        "## File Changes",
      
        5774
                        f"- `{chapters}/`",
      
        5775
                        f"- `{nginx_root / 'index.html'}`",
      
        5776
                        "",
      
        5777
                    ]
      
        5778
                )
      
        5779
            )
      
        5780
        
        5781
            tool_call = ToolCall(
      
        5782
                id="mkdir-1",
      
        5783
                name="bash",
      
        5784
                arguments={"command": f"mkdir -p {chapters}"},
      
        5785
            )
      
        5786
            executor = FakeExecutor(
      
        5787
                [tool_outcome(tool_call=tool_call, output="", is_error=False)]
      
        5788
            )
      
        5789
            summary = TurnSummary(final_response="")
      
        5790
            dod = create_definition_of_done("Create an equally thorough nginx guide with chapters.")
      
        5791
            dod.implementation_plan = str(implementation_plan)
      
        5792
            events: list[AgentEvent] = []
      
        5793
        
        5794
            async def emit(event: AgentEvent) -> None:
      
        5795
                events.append(event)
      
        5796
        
        5797
            await runner.execute_batch(
      
        5798
                tool_calls=[tool_call],
      
        5799
                tool_source="assistant",
      
        5800
                pending_tool_calls_seen=set(),
      
        5801
                emit=emit,
      
        5802
                summary=summary,
      
        5803
                dod=dod,
      
        5804
                executor=executor,  # type: ignore[arg-type]
      
        5805
                on_confirmation=None,
      
        5806
                on_user_question=None,
      
        5807
                emit_confirmation=None,
      
        5808
                consecutive_errors=0,
      
        5809
            )
      
        5810
        
        5811
            assert dod.last_verification_result is None
      
        5812
            assert "Collect verification evidence" not in dod.pending_items
      
        5813
            assert not any(
      
        5814
                entry.reason_code == "verification_planned" for entry in summary.workflow_timeline
      
        5815
            )
      
        5816
        
        5817
        
        5818
        @pytest.mark.asyncio
      
        5819
        async def test_tool_batch_runner_does_not_mark_verification_planned_while_chapter_build_pending(
      
        5820
            temp_dir: Path,
      
        5821
        ) -> None:
      
        5822
            async def assess_confidence(
      
        5823
                tool_name: str,
      
        5824
                tool_args: dict,
      
        5825
                context: str,
      
        5826
            ) -> ConfidenceAssessment:
      
        5827
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        5828
        
        5829
            async def verify_action(
      
        5830
                tool_name: str,
      
        5831
                tool_args: dict,
      
        5832
                result: str,
      
        5833
                expected: str = "",
      
        5834
            ) -> ActionVerification:
      
        5835
                raise AssertionError("Verification should not run in this scenario")
      
        5836
        
        5837
            context = build_context(
      
        5838
                temp_dir=temp_dir,
      
        5839
                messages=[],
      
        5840
                safeguards=FakeSafeguards(),
      
        5841
                assess_confidence=assess_confidence,
      
        5842
                verify_action=verify_action,
      
        5843
            )
      
        5844
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5845
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        5846
            chapters = nginx_root / "chapters"
      
        5847
            chapters.mkdir(parents=True)
      
        5848
            index_path = nginx_root / "index.html"
      
        5849
            implementation_plan = temp_dir / "implementation.md"
      
        5850
            implementation_plan.write_text(
      
        5851
                "\n".join(
      
        5852
                    [
      
        5853
                        "# Implementation Plan",
      
        5854
                        "",
      
        5855
                        "## File Changes",
      
        5856
                        f"- `{nginx_root}/`",
      
        5857
                        f"- `{chapters}/`",
      
        5858
                        f"- `{index_path}`",
      
        5859
                        "",
      
        5860
                    ]
      
        5861
                )
      
        5862
            )
      
        5863
        
        5864
            tool_call = ToolCall(
      
        5865
                id="write-index",
      
        5866
                name="write",
      
        5867
                arguments={"file_path": str(index_path), "content": "<html></html>\n"},
      
        5868
            )
      
        5869
            executor = FakeExecutor(
      
        5870
                [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
      
        5871
            )
      
        5872
            summary = TurnSummary(final_response="")
      
        5873
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5874
            dod.implementation_plan = str(implementation_plan)
      
        5875
            dod.pending_items.extend(
      
        5876
                [
      
        5877
                    "Develop the main index.html file with proper structure",
      
        5878
                    "Create first nginx chapter",
      
        5879
                ]
      
        5880
            )
      
        5881
            events: list[AgentEvent] = []
      
        5882
        
        5883
            async def emit(event: AgentEvent) -> None:
      
        5884
                events.append(event)
      
        5885
        
        5886
            await runner.execute_batch(
      
        5887
                tool_calls=[tool_call],
      
        5888
                tool_source="assistant",
      
        5889
                pending_tool_calls_seen=set(),
      
        5890
                emit=emit,
      
        5891
                summary=summary,
      
        5892
                dod=dod,
      
        5893
                executor=executor,  # type: ignore[arg-type]
      
        5894
                on_confirmation=None,
      
        5895
                on_user_question=None,
      
        5896
                emit_confirmation=None,
      
        5897
                consecutive_errors=0,
      
        5898
            )
      
        5899
        
        5900
            assert dod.last_verification_result is None
      
        5901
            assert "Collect verification evidence" not in dod.pending_items
      
        5902
            assert "Create first nginx chapter" in dod.pending_items
      
        5903
            assert not any(
      
        5904
                entry.reason_code == "verification_planned" for entry in summary.workflow_timeline
      
        5905
            )
      
        5906
        
        5907
        
        5908
        @pytest.mark.asyncio
      
        5909
        async def test_tool_batch_runner_marks_passed_verification_stale_after_new_mutation(
      
        5910
            temp_dir: Path,
      
        5911
        ) -> None:
      
        5912
            async def assess_confidence(
      
        5913
                tool_name: str,
      
        5914
                tool_args: dict,
      
        5915
                context: str,
      
        5916
            ) -> ConfidenceAssessment:
      
        5917
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        5918
        
        5919
            async def verify_action(
      
        5920
                tool_name: str,
      
        5921
                tool_args: dict,
      
        5922
                result: str,
      
        5923
                expected: str = "",
      
        5924
            ) -> ActionVerification:
      
        5925
                raise AssertionError("Verification should not run for this scenario")
      
        5926
        
        5927
            context = build_context(
      
        5928
                temp_dir=temp_dir,
      
        5929
                messages=[],
      
        5930
                safeguards=FakeSafeguards(),
      
        5931
                assess_confidence=assess_confidence,
      
        5932
                verify_action=verify_action,
      
        5933
            )
      
        5934
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5935
            tool_call = ToolCall(
      
        5936
                id="write-1",
      
        5937
                name="write",
      
        5938
                arguments={"file_path": str(temp_dir / "README.md"), "content": "updated\n"},
      
        5939
            )
      
        5940
            executor = FakeExecutor(
      
        5941
                [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
      
        5942
            )
      
        5943
            summary = TurnSummary(final_response="")
      
        5944
            dod = create_definition_of_done("Update README and verify it still works.")
      
        5945
            dod.verification_commands = ["uv run pytest -q"]
      
        5946
            dod.last_verification_result = "passed"
      
        5947
            dod.verification_attempt_counter = 1
      
        5948
            dod.active_verification_attempt_id = "verification-attempt-1"
      
        5949
            dod.active_verification_attempt_number = 1
      
        5950
            dod.evidence = [
      
        5951
                VerificationEvidence(
      
        5952
                    command="uv run pytest -q",
      
        5953
                    passed=True,
      
        5954
                    stdout="401 passed",
      
        5955
                    kind="test",
      
        5956
                )
      
        5957
            ]
      
        5958
            dod.completed_items.append("Collect verification evidence")
      
        5959
            events: list[AgentEvent] = []
      
        5960
        
        5961
            async def emit(event: AgentEvent) -> None:
      
        5962
                events.append(event)
      
        5963
        
        5964
            await runner.execute_batch(
      
        5965
                tool_calls=[tool_call],
      
        5966
                tool_source="assistant",
      
        5967
                pending_tool_calls_seen=set(),
      
        5968
                emit=emit,
      
        5969
                summary=summary,
      
        5970
                dod=dod,
      
        5971
                executor=executor,  # type: ignore[arg-type]
      
        5972
                on_confirmation=None,
      
        5973
                on_user_question=None,
      
        5974
                emit_confirmation=None,
      
        5975
                consecutive_errors=0,
      
        5976
            )
      
        5977
        
        5978
            assert dod.last_verification_result == "stale"
      
        5979
            assert dod.evidence == []
      
        5980
            assert "Collect verification evidence" in dod.pending_items
      
        5981
            assert "Collect verification evidence" not in dod.completed_items
      
        5982
            assert dod.active_verification_attempt_id == "verification-attempt-2"
      
        5983
            assert dod.active_verification_attempt_number == 2
      
        5984
            assert summary.workflow_timeline[-1].reason_code == "verification_stale"
      
        5985
            assert summary.workflow_timeline[-1].policy_outcome == "stale"
      
        5986
            assert summary.workflow_timeline[-1].verification_observations[0].status == "stale"
      
        5987
            assert (
      
        5988
                summary.workflow_timeline[-1].verification_observations[0].attempt_id
      
        5989
                == "verification-attempt-1"
      
        5990
            )
      
        5991
            assert (
      
        5992
                summary.workflow_timeline[-1].verification_observations[0].attempt_number == 1
      
        5993
            )
      
        5994
            assert (
      
        5995
                summary.workflow_timeline[-1].verification_observations[0].supersedes_attempt_id
      
        5996
                == "verification-attempt-2"
      
        5997
            )
      
        5998
            assert (
      
        5999
                summary.workflow_timeline[-1].verification_observations[0].command
      
        6000
                == "uv run pytest -q"
      
        6001
            )
      
        6002
        
        6003
        
        6004
        def test_tool_batch_runner_blocked_active_repair_nudge_uses_repair_scope(temp_dir: Path) -> None:
      
        6005
            async def assess_confidence(
      
        6006
                tool_name: str,
      
        6007
                tool_args: dict,
      
        6008
                context: str,
      
        6009
            ) -> ConfidenceAssessment:
      
        6010
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6011
        
        6012
            async def verify_action(
      
        6013
                tool_name: str,
      
        6014
                tool_args: dict,
      
        6015
                result: str,
      
        6016
                expected: str = "",
      
        6017
            ) -> ActionVerification:
      
        6018
                raise AssertionError("Verification should not run in this scenario")
      
        6019
        
        6020
            repair_target = temp_dir / "guide" / "index.html"
      
        6021
            context = build_context(
      
        6022
                temp_dir=temp_dir,
      
        6023
                messages=[
      
        6024
                    Message(
      
        6025
                        role=Role.ASSISTANT,
      
        6026
                        content=(
      
        6027
                            "Repair focus:\n"
      
        6028
                            f"- Fix the broken local reference `chapters/01-getting-started.html` in `{repair_target}`.\n"
      
        6029
                            f"- Immediate next step: edit `{repair_target}`.\n"
      
        6030
                            f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`; otherwise remove or replace `chapters/01-getting-started.html`.\n"
      
        6031
                        ),
      
        6032
                    )
      
        6033
                ],
      
        6034
                safeguards=FakeSafeguards(),
      
        6035
                assess_confidence=assess_confidence,
      
        6036
                verify_action=verify_action,
      
        6037
            )
      
        6038
            queued: list[str] = []
      
        6039
            context.queue_steering_message_callback = queued.append
      
        6040
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6041
        
        6042
            runner._queue_blocked_active_repair_nudge(
      
        6043
                "[Blocked - active repair scope: verification already identified the repair target.]"
      
        6044
            )
      
        6045
        
        6046
            assert queued
      
        6047
            assert str(repair_target) in queued[0]
      
        6048
            assert str(temp_dir / "guide" / "chapters" / "01-getting-started.html") in queued[0]
      
        6049
            assert "Do not reopen unrelated reference materials" in queued[0]
      
        6050
        
        6051
        
        6052
        def test_tool_batch_runner_blocked_active_repair_mutation_nudge_uses_allowed_paths(
      
        6053
            temp_dir: Path,
      
        6054
        ) -> None:
      
        6055
            async def assess_confidence(
      
        6056
                tool_name: str,
      
        6057
                tool_args: dict,
      
        6058
                context: str,
      
        6059
            ) -> ConfidenceAssessment:
      
        6060
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6061
        
        6062
            async def verify_action(
      
        6063
                tool_name: str,
      
        6064
                tool_args: dict,
      
        6065
                result: str,
      
        6066
                expected: str = "",
      
        6067
            ) -> ActionVerification:
      
        6068
                raise AssertionError("Verification should not run in this scenario")
      
        6069
        
        6070
            repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
      
        6071
            stylesheet = temp_dir / "guide" / "styles.css"
      
        6072
            context = build_context(
      
        6073
                temp_dir=temp_dir,
      
        6074
                messages=[
      
        6075
                    Message(
      
        6076
                        role=Role.ASSISTANT,
      
        6077
                        content=(
      
        6078
                            "Repair focus:\n"
      
        6079
                            f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
      
        6080
                            f"- Immediate next step: edit `{repair_target}`.\n"
      
        6081
                            f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
      
        6082
                        ),
      
        6083
                    )
      
        6084
                ],
      
        6085
                safeguards=FakeSafeguards(),
      
        6086
                assess_confidence=assess_confidence,
      
        6087
                verify_action=verify_action,
      
        6088
            )
      
        6089
            queued: list[str] = []
      
        6090
            context.queue_steering_message_callback = queued.append
      
        6091
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6092
        
        6093
            runner._queue_blocked_active_repair_mutation_nudge(
      
        6094
                "[Blocked - active repair mutation scope: verification already identified the repair target.]"
      
        6095
            )
      
        6096
        
        6097
            assert queued
      
        6098
            assert str(repair_target) in queued[0]
      
        6099
            assert str(stylesheet) in queued[0]
      
        6100
            assert "before widening the change set" in queued[0]
      
        6101
        
        6102
        
        6103
        def test_tool_batch_runner_blocked_late_reference_drift_nudge_points_to_missing_artifact(
      
        6104
            temp_dir: Path,
      
        6105
        ) -> None:
      
        6106
            async def assess_confidence(
      
        6107
                tool_name: str,
      
        6108
                tool_args: dict,
      
        6109
                context: str,
      
        6110
            ) -> ConfidenceAssessment:
      
        6111
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6112
        
        6113
            async def verify_action(
      
        6114
                tool_name: str,
      
        6115
                tool_args: dict,
      
        6116
                result: str,
      
        6117
                expected: str = "",
      
        6118
            ) -> ActionVerification:
      
        6119
                raise AssertionError("Verification should not run in this scenario")
      
        6120
        
        6121
            context = build_context(
      
        6122
                temp_dir=temp_dir,
      
        6123
                messages=[],
      
        6124
                safeguards=FakeSafeguards(),
      
        6125
                assess_confidence=assess_confidence,
      
        6126
                verify_action=verify_action,
      
        6127
            )
      
        6128
            queued: list[str] = []
      
        6129
            context.queue_steering_message_callback = queued.append
      
        6130
            store = DefinitionOfDoneStore(temp_dir)
      
        6131
            dod = create_definition_of_done("Create a multi-file guide from a reference")
      
        6132
            plan_path = temp_dir / "implementation.md"
      
        6133
            plan_path.write_text(
      
        6134
                "# File Changes\n"
      
        6135
                "- `guide/index.html`\n"
      
        6136
                "- `guide/chapters/01-getting-started.html`\n"
      
        6137
                "- `guide/chapters/02-installation.html`\n"
      
        6138
                "- `guide/chapters/03-first-website.html`\n"
      
        6139
            )
      
        6140
            dod.implementation_plan = str(plan_path)
      
        6141
            (temp_dir / "guide" / "chapters").mkdir(parents=True, exist_ok=True)
      
        6142
            (temp_dir / "guide" / "index.html").write_text("index")
      
        6143
            (temp_dir / "guide" / "chapters" / "01-getting-started.html").write_text("one")
      
        6144
            (temp_dir / "guide" / "chapters" / "02-installation.html").write_text("two")
      
        6145
            runner = ToolBatchRunner(context, store)
      
        6146
        
        6147
            runner._queue_blocked_late_reference_drift_nudge(
      
        6148
                "[Blocked - late reference drift: several planned artifacts already exist.]",
      
        6149
                dod=dod,
      
        6150
            )
      
        6151
        
        6152
            assert queued
      
        6153
            assert "03-first-website.html" in queued[0]
      
        6154
            assert "older reference materials" in queued[0]
      
        6155
        
        6156
        
        6157
        def test_tool_batch_runner_blocked_completed_artifact_scope_nudge_prefers_verification(
      
        6158
            temp_dir: Path,
      
        6159
        ) -> None:
      
        6160
            async def assess_confidence(
      
        6161
                tool_name: str,
      
        6162
                tool_args: dict,
      
        6163
                context: str,
      
        6164
            ) -> ConfidenceAssessment:
      
        6165
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6166
        
        6167
            async def verify_action(
      
        6168
                tool_name: str,
      
        6169
                tool_args: dict,
      
        6170
                result: str,
      
        6171
                expected: str = "",
      
        6172
            ) -> ActionVerification:
      
        6173
                raise AssertionError("Verification should not run in this scenario")
      
        6174
        
        6175
            guide_root = temp_dir / "guide"
      
        6176
            chapters = guide_root / "chapters"
      
        6177
            guide_root.mkdir(parents=True)
      
        6178
            chapters.mkdir()
      
        6179
            index_path = guide_root / "index.html"
      
        6180
            chapter_one = chapters / "01-getting-started.html"
      
        6181
            chapter_two = chapters / "02-installation.html"
      
        6182
            index_path.write_text("index")
      
        6183
            chapter_one.write_text("one")
      
        6184
            chapter_two.write_text("two")
      
        6185
        
        6186
            implementation_plan = temp_dir / "implementation.md"
      
        6187
            implementation_plan.write_text(
      
        6188
                "\n".join(
      
        6189
                    [
      
        6190
                        "# Implementation Plan",
      
        6191
                        "",
      
        6192
                        "## File Changes",
      
        6193
                        f"- `{guide_root}`",
      
        6194
                        f"- `{chapters}`",
      
        6195
                        f"- `{index_path}`",
      
        6196
                        f"- `{chapter_one}`",
      
        6197
                        f"- `{chapter_two}`",
      
        6198
                        "",
      
        6199
                    ]
      
        6200
                )
      
        6201
            )
      
        6202
        
        6203
            context = build_context(
      
        6204
                temp_dir=temp_dir,
      
        6205
                messages=[],
      
        6206
                safeguards=FakeSafeguards(),
      
        6207
                assess_confidence=assess_confidence,
      
        6208
                verify_action=verify_action,
      
        6209
            )
      
        6210
            queued: list[str] = []
      
        6211
            context.queue_steering_message_callback = queued.append
      
        6212
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6213
            dod = create_definition_of_done("Create a multi-file guide from a reference")
      
        6214
            dod.implementation_plan = str(implementation_plan)
      
        6215
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        6216
            sync_todos_to_definition_of_done(
      
        6217
                dod,
      
        6218
                [
      
        6219
                    {
      
        6220
                        "content": "Verify all guide files are linked and complete",
      
        6221
                        "active_form": "Working on: Verify all guide files are linked and complete",
      
        6222
                        "status": "pending",
      
        6223
                    }
      
        6224
                ],
      
        6225
                project_root=temp_dir,
      
        6226
            )
      
        6227
        
        6228
            runner._queue_blocked_completed_artifact_scope_nudge(
      
        6229
                "[Blocked - completed artifact set scope: all explicitly planned artifacts already exist.]",
      
        6230
                dod=dod,
      
        6231
            )
      
        6232
        
        6233
            assert queued
      
        6234
            assert context.workflow_mode == "verify"
      
        6235
            assert "All explicitly planned artifacts already exist." in queued[0]
      
        6236
            assert "Verify all guide files are linked and complete" in queued[0]
      
        6237
            assert "Do not reopen earlier reference materials." in queued[0]
      
        6238
            assert "Verification should run next" in queued[0]
      
        6239
        
        6240
        
        6241
        def test_tool_batch_runner_blocked_post_build_audit_nudge_switches_to_verify(
      
        6242
            temp_dir: Path,
      
        6243
        ) -> None:
      
        6244
            async def assess_confidence(
      
        6245
                tool_name: str,
      
        6246
                tool_args: dict,
      
        6247
                context: str,
      
        6248
            ) -> ConfidenceAssessment:
      
        6249
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6250
        
        6251
            async def verify_action(
      
        6252
                tool_name: str,
      
        6253
                tool_args: dict,
      
        6254
                result: str,
      
        6255
                expected: str = "",
      
        6256
            ) -> ActionVerification:
      
        6257
                raise AssertionError("Verification should not run in this scenario")
      
        6258
        
        6259
            guide_root = temp_dir / "guide"
      
        6260
            chapters = guide_root / "chapters"
      
        6261
            guide_root.mkdir(parents=True)
      
        6262
            chapters.mkdir()
      
        6263
            index_path = guide_root / "index.html"
      
        6264
            chapter_one = chapters / "01-getting-started.html"
      
        6265
            chapter_two = chapters / "02-installation.html"
      
        6266
            index_path.write_text("index")
      
        6267
            chapter_one.write_text("one")
      
        6268
            chapter_two.write_text("two")
      
        6269
        
        6270
            implementation_plan = temp_dir / "implementation.md"
      
        6271
            implementation_plan.write_text(
      
        6272
                "\n".join(
      
        6273
                    [
      
        6274
                        "# Implementation Plan",
      
        6275
                        "",
      
        6276
                        "## File Changes",
      
        6277
                        f"- `{guide_root}`",
      
        6278
                        f"- `{chapters}`",
      
        6279
                        f"- `{index_path}`",
      
        6280
                        f"- `{chapter_one}`",
      
        6281
                        f"- `{chapter_two}`",
      
        6282
                        "",
      
        6283
                    ]
      
        6284
                )
      
        6285
            )
      
        6286
        
        6287
            context = build_context(
      
        6288
                temp_dir=temp_dir,
      
        6289
                messages=[],
      
        6290
                safeguards=FakeSafeguards(),
      
        6291
                assess_confidence=assess_confidence,
      
        6292
                verify_action=verify_action,
      
        6293
            )
      
        6294
            queued: list[str] = []
      
        6295
            context.queue_steering_message_callback = queued.append
      
        6296
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6297
            dod = create_definition_of_done("Create a multi-file guide from a reference")
      
        6298
            dod.implementation_plan = str(implementation_plan)
      
        6299
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        6300
        
        6301
            runner._queue_blocked_completed_artifact_scope_nudge(
      
        6302
                "[Blocked - post-build audit loop: all explicitly planned artifacts already exist.]",
      
        6303
                dod=dod,
      
        6304
            )
      
        6305
        
        6306
            assert queued
      
        6307
            assert context.workflow_mode == "verify"
      
        6308
            assert "All explicitly planned artifacts already exist." in queued[0]
      
        6309
            assert "move to verification or final confirmation" in queued[0]
      
        6310
        
        6311
        
        6312
        @pytest.mark.asyncio
      
        6313
        async def test_tool_batch_runner_does_not_halt_on_repeated_post_build_audit_blocks(
      
        6314
            temp_dir: Path,
      
        6315
        ) -> None:
      
        6316
            async def assess_confidence(
      
        6317
                tool_name: str,
      
        6318
                tool_args: dict,
      
        6319
                context: str,
      
        6320
            ) -> ConfidenceAssessment:
      
        6321
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6322
        
        6323
            async def verify_action(
      
        6324
                tool_name: str,
      
        6325
                tool_args: dict,
      
        6326
                result: str,
      
        6327
                expected: str = "",
      
        6328
            ) -> ActionVerification:
      
        6329
                raise AssertionError("Verification should not run in this scenario")
      
        6330
        
        6331
            guide_root = temp_dir / "guide"
      
        6332
            chapters = guide_root / "chapters"
      
        6333
            guide_root.mkdir(parents=True)
      
        6334
            chapters.mkdir()
      
        6335
            index_path = guide_root / "index.html"
      
        6336
            chapter_one = chapters / "01-getting-started.html"
      
        6337
            chapter_two = chapters / "02-installation.html"
      
        6338
            index_path.write_text("index")
      
        6339
            chapter_one.write_text("one")
      
        6340
            chapter_two.write_text("two")
      
        6341
        
        6342
            implementation_plan = temp_dir / "implementation.md"
      
        6343
            implementation_plan.write_text(
      
        6344
                "\n".join(
      
        6345
                    [
      
        6346
                        "# Implementation Plan",
      
        6347
                        "",
      
        6348
                        "## File Changes",
      
        6349
                        f"- `{guide_root}`",
      
        6350
                        f"- `{chapters}`",
      
        6351
                        f"- `{index_path}`",
      
        6352
                        f"- `{chapter_one}`",
      
        6353
                        f"- `{chapter_two}`",
      
        6354
                        "",
      
        6355
                    ]
      
        6356
                )
      
        6357
            )
      
        6358
        
        6359
            context = build_context(
      
        6360
                temp_dir=temp_dir,
      
        6361
                messages=[],
      
        6362
                safeguards=FakeSafeguards(),
      
        6363
                assess_confidence=assess_confidence,
      
        6364
                verify_action=verify_action,
      
        6365
            )
      
        6366
            queued: list[str] = []
      
        6367
            context.queue_steering_message_callback = queued.append
      
        6368
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6369
            dod = create_definition_of_done("Create a multi-file guide from a reference")
      
        6370
            dod.implementation_plan = str(implementation_plan)
      
        6371
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        6372
        
        6373
            blocked_message = (
      
        6374
                "[Blocked - post-build audit loop: all explicitly planned artifacts already exist.]"
      
        6375
            )
      
        6376
            tool_calls = [
      
        6377
                ToolCall(
      
        6378
                    id=f"audit-{index}",
      
        6379
                    name="bash",
      
        6380
                    arguments={"command": f"cd {temp_dir} && ls -la guide/chapters/"},
      
        6381
                )
      
        6382
                for index in range(1, 4)
      
        6383
            ]
      
        6384
            executor = FakeExecutor(
      
        6385
                [
      
        6386
                    tool_outcome(
      
        6387
                        tool_call=tool_call,
      
        6388
                        output=blocked_message,
      
        6389
                        is_error=True,
      
        6390
                        state=ToolExecutionState.BLOCKED,
      
        6391
                    )
      
        6392
                    for tool_call in tool_calls
      
        6393
                ]
      
        6394
            )
      
        6395
            events: list[AgentEvent] = []
      
        6396
        
        6397
            async def emit(event: AgentEvent) -> None:
      
        6398
                events.append(event)
      
        6399
        
        6400
            result = await runner.execute_batch(
      
        6401
                tool_calls=tool_calls,
      
        6402
                tool_source="native",
      
        6403
                pending_tool_calls_seen=set(),
      
        6404
                emit=emit,
      
        6405
                summary=TurnSummary(final_response=""),
      
        6406
                dod=dod,
      
        6407
                executor=executor,
      
        6408
                on_confirmation=None,
      
        6409
                on_user_question=None,
      
        6410
                emit_confirmation=None,
      
        6411
                consecutive_errors=0,
      
        6412
            )
      
        6413
        
        6414
            assert result.halted is False
      
        6415
            assert result.consecutive_errors == 0
      
        6416
            assert context.workflow_mode == "verify"
      
        6417
            assert queued
      
        6418
            assert any("move to verification or final confirmation" in message for message in queued)
      
        6419
        
        6420
        
        6421
        def test_tool_batch_runner_blocked_html_declared_target_nudge_uses_closest_declared_target(
      
        6422
            temp_dir: Path,
      
        6423
        ) -> None:
      
        6424
            async def assess_confidence(
      
        6425
                tool_name: str,
      
        6426
                tool_args: dict,
      
        6427
                context: str,
      
        6428
            ) -> ConfidenceAssessment:
      
        6429
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6430
        
        6431
            async def verify_action(
      
        6432
                tool_name: str,
      
        6433
                tool_args: dict,
      
        6434
                result: str,
      
        6435
                expected: str = "",
      
        6436
            ) -> ActionVerification:
      
        6437
                raise AssertionError("Verification should not run in this scenario")
      
        6438
        
        6439
            context = build_context(
      
        6440
                temp_dir=temp_dir,
      
        6441
                messages=[],
      
        6442
                safeguards=FakeSafeguards(),
      
        6443
                assess_confidence=assess_confidence,
      
        6444
                verify_action=verify_action,
      
        6445
            )
      
        6446
            queued: list[str] = []
      
        6447
            context.queue_steering_message_callback = queued.append
      
        6448
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6449
        
        6450
            runner._queue_blocked_html_declared_target_nudge(
      
        6451
                ToolCall(
      
        6452
                    id="write-ch1",
      
        6453
                    name="write",
      
        6454
                    arguments={"file_path": str(temp_dir / "guide" / "chapters" / "01-introduction.html")},
      
        6455
                ),
      
        6456
                (
      
        6457
                    "[Blocked - HTML page introduces new local targets outside the current declared artifact set] "
      
        6458
                    "Suggestion: Keep non-root HTML pages within the root-declared local-link set and avoid "
      
        6459
                    "introducing new sibling targets that the guide root does not declare, for example fix: 02-setup.html. "
      
        6460
                    "Already-declared local targets include: chapters/01-introduction.html, chapters/02-installation.html, "
      
        6461
                    "chapters/03-configuration.html. Closest declared local targets include: chapters/02-installation.html"
      
        6462
                ),
      
        6463
            )
      
        6464
        
        6465
            assert queued
      
        6466
            assert str(temp_dir / "guide" / "chapters" / "01-introduction.html") in queued[0]
      
        6467
            assert "`chapters/02-installation.html`" in queued[0]
      
        6468
            assert "same file now" in queued[0]
      
        6469
        
        6470
        
        6471
        def test_tool_batch_runner_blocked_html_declared_target_nudge_without_close_match(
      
        6472
            temp_dir: Path,
      
        6473
        ) -> None:
      
        6474
            async def assess_confidence(
      
        6475
                tool_name: str,
      
        6476
                tool_args: dict,
      
        6477
                context: str,
      
        6478
            ) -> ConfidenceAssessment:
      
        6479
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6480
        
        6481
            async def verify_action(
      
        6482
                tool_name: str,
      
        6483
                tool_args: dict,
      
        6484
                result: str,
      
        6485
                expected: str = "",
      
        6486
            ) -> ActionVerification:
      
        6487
                raise AssertionError("Verification should not run in this scenario")
      
        6488
        
        6489
            context = build_context(
      
        6490
                temp_dir=temp_dir,
      
        6491
                messages=[],
      
        6492
                safeguards=FakeSafeguards(),
      
        6493
                assess_confidence=assess_confidence,
      
        6494
                verify_action=verify_action,
      
        6495
            )
      
        6496
            queued: list[str] = []
      
        6497
            context.queue_steering_message_callback = queued.append
      
        6498
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6499
        
        6500
            runner._queue_blocked_html_declared_target_nudge(
      
        6501
                ToolCall(
      
        6502
                    id="write-ch1",
      
        6503
                    name="write",
      
        6504
                    arguments={"file_path": str(temp_dir / "guide" / "chapters" / "introduction.html")},
      
        6505
                ),
      
        6506
                (
      
        6507
                    "[Blocked - HTML page introduces new local targets outside the current declared artifact set] "
      
        6508
                    "Suggestion: Keep non-root HTML pages within the root-declared local-link set and avoid "
      
        6509
                    "introducing new sibling targets that the guide root does not declare; remove or replace "
      
        6510
                    "undeclared hrefs like: troubleshooting.html. "
      
        6511
                    "Already-declared local targets include: chapters/introduction.html, chapters/installation.html, "
      
        6512
                    "chapters/configuration.html."
      
        6513
                ),
      
        6514
            )
      
        6515
        
        6516
            assert queued
      
        6517
            assert "Remove the invented hrefs or keep local links within the declared target set" in queued[0]
      
        6518
            assert "`chapters/installation.html`" in queued[0]
      
        6519
            assert "closest declared target(s)" not in queued[0]
      
        6520
        
        6521
        
        6522
        def test_tool_batch_runner_blocked_html_declared_file_creation_nudge_points_to_root(
      
        6523
            temp_dir: Path,
      
        6524
        ) -> None:
      
        6525
            async def assess_confidence(
      
        6526
                tool_name: str,
      
        6527
                tool_args: dict,
      
        6528
                context: str,
      
        6529
            ) -> ConfidenceAssessment:
      
        6530
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6531
        
        6532
            async def verify_action(
      
        6533
                tool_name: str,
      
        6534
                tool_args: dict,
      
        6535
                result: str,
      
        6536
                expected: str = "",
      
        6537
            ) -> ActionVerification:
      
        6538
                raise AssertionError("Verification should not run in this scenario")
      
        6539
        
        6540
            context = build_context(
      
        6541
                temp_dir=temp_dir,
      
        6542
                messages=[],
      
        6543
                safeguards=FakeSafeguards(),
      
        6544
                assess_confidence=assess_confidence,
      
        6545
                verify_action=verify_action,
      
        6546
            )
      
        6547
            queued: list[str] = []
      
        6548
            context.queue_steering_message_callback = queued.append
      
        6549
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6550
            dod = create_definition_of_done("Create a guide.")
      
        6551
        
        6552
            target = temp_dir / "guide" / "chapters" / "troubleshooting.html"
      
        6553
            runner._queue_blocked_html_declared_file_creation_nudge(
      
        6554
                ToolCall(
      
        6555
                    id="write-troubleshooting",
      
        6556
                    name="write",
      
        6557
                    arguments={"file_path": str(target)},
      
        6558
                ),
      
        6559
                (
      
        6560
                    "[Blocked - HTML file creation falls outside the current declared artifact set] "
      
        6561
                    "Suggestion: Keep new non-root HTML files within the root-declared artifact set and "
      
        6562
                    f"update the guide root `{(temp_dir / 'guide' / 'index.html').resolve(strict=False)}` "
      
        6563
                    "before creating undeclared sibling pages, for example: chapters/troubleshooting.html. "
      
        6564
                    "Already-declared local targets include: chapters/advanced-topics.html, "
      
        6565
                    "chapters/basic-usage.html, chapters/configuration.html"
      
        6566
                ),
      
        6567
                dod=dod,
      
        6568
            )
      
        6569
        
        6570
            assert queued
      
        6571
            assert "update" in queued[0].lower()
      
        6572
            assert str((temp_dir / "guide" / "index.html").resolve(strict=False)) in queued[0]
      
        6573
            assert "`chapters/troubleshooting.html`" in queued[0]
      
        6574
            assert "retry the file creation" in queued[0]
      
        6575
        
        6576
        
        6577
        def test_tool_batch_runner_blocked_html_declared_file_creation_after_outputs_exist_prefers_verify(
      
        6578
            temp_dir: Path,
      
        6579
        ) -> None:
      
        6580
            async def assess_confidence(
      
        6581
                tool_name: str,
      
        6582
                tool_args: dict,
      
        6583
                context: str,
      
        6584
            ) -> ConfidenceAssessment:
      
        6585
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        6586
        
        6587
            async def verify_action(
      
        6588
                tool_name: str,
      
        6589
                tool_args: dict,
      
        6590
                result: str,
      
        6591
                expected: str = "",
      
        6592
            ) -> ActionVerification:
      
        6593
                raise AssertionError("Verification should not run in this scenario")
      
        6594
        
        6595
            guide = temp_dir / "guide"
      
        6596
            chapters = guide / "chapters"
      
        6597
            guide.mkdir()
      
        6598
            chapters.mkdir()
      
        6599
            index = guide / "index.html"
      
        6600
            index.write_text(
      
        6601
                "\n".join(
      
        6602
                    [
      
        6603
                        '<a href="chapters/01-introduction.html">Intro</a>',
      
        6604
                        '<a href="chapters/02-installation.html">Install</a>',
      
        6605
                        '<a href="../index.html">Back</a>',
      
        6606
                        "",
      
        6607
                    ]
      
        6608
                )
      
        6609
            )
      
        6610
            (chapters / "01-introduction.html").write_text("<html></html>\n")
      
        6611
            (chapters / "02-installation.html").write_text("<html></html>\n")
      
        6612
        
        6613
            implementation_plan = temp_dir / "implementation.md"
      
        6614
            implementation_plan.write_text(
      
        6615
                "\n".join(
      
        6616
                    [
      
        6617
                        "# Implementation Plan",
      
        6618
                        "",
      
        6619
                        "## File Changes",
      
        6620
                        f"- `{index}`",
      
        6621
                        f"- `{chapters / '01-introduction.html'}`",
      
        6622
                        f"- `{chapters / '02-installation.html'}`",
      
        6623
                        "",
      
        6624
                    ]
      
        6625
                )
      
        6626
            )
      
        6627
        
        6628
            context = build_context(
      
        6629
                temp_dir=temp_dir,
      
        6630
                messages=[],
      
        6631
                safeguards=FakeSafeguards(),
      
        6632
                assess_confidence=assess_confidence,
      
        6633
                verify_action=verify_action,
      
        6634
            )
      
        6635
            queued: list[str] = []
      
        6636
            context.queue_steering_message_callback = queued.append
      
        6637
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6638
            dod = create_definition_of_done("Create a guide.")
      
        6639
            dod.implementation_plan = str(implementation_plan)
      
        6640
            dod.verification_commands = [f"ls -la {guide}"]
      
        6641
            dod.touched_files = [str(index), str(chapters / "01-introduction.html"), str(chapters / "02-installation.html")]
      
        6642
        
        6643
            target = guide / "chapters" / "08-advanced-configuration.html"
      
        6644
            runner._queue_blocked_html_declared_file_creation_nudge(
      
        6645
                ToolCall(
      
        6646
                    id="write-extra",
      
        6647
                    name="write",
      
        6648
                    arguments={"file_path": str(target)},
      
        6649
                ),
      
        6650
                (
      
        6651
                    "[Blocked - HTML file creation falls outside the current declared artifact set] "
      
        6652
                    "Suggestion: Keep new non-root HTML files within the root-declared artifact set and "
      
        6653
                    f"update the guide root `{index.resolve(strict=False)}` before creating undeclared sibling pages, "
      
        6654
                    "for example: chapters/08-advanced-configuration.html."
      
        6655
                ),
      
        6656
                dod=dod,
      
        6657
            )
      
        6658
        
        6659
            assert queued
      
        6660
            assert "All explicitly planned artifacts already exist on disk." in queued[0]
      
        6661
            assert "Do not expand the output set with `chapters/08-advanced-configuration.html`." in queued[0]
      
        6662
            assert "Move to verification or final confirmation using the files already on disk." in queued[0]
      
        6663
            assert "update the guide root" not in queued[0]
      
        6664
        
        6665
        
        6666
        def test_tool_batch_runner_blocked_html_missing_target_after_outputs_exist_prefers_verify(
      
        6667
            temp_dir: Path,
      
        6668
        ) -> None:
      
        6669
            async def assess_confidence(
      
        6670
                tool_name: str,
      
        6671
                tool_args: dict,
      
        6672
                context: str,
      
        6673
            ) -> ConfidenceAssessment:
      
        6674
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        6675
        
        6676
            async def verify_action(
      
        6677
                tool_name: str,
      
        6678
                tool_args: dict,
      
        6679
                result: str,
      
        6680
                expected: str = "",
      
        6681
            ) -> ActionVerification:
      
        6682
                raise AssertionError("Verification should not run in this scenario")
      
        6683
        
        6684
            guide = temp_dir / "guide"
      
        6685
            chapters = guide / "chapters"
      
        6686
            guide.mkdir()
      
        6687
            chapters.mkdir()
      
        6688
            index = guide / "index.html"
      
        6689
            index.write_text(
      
        6690
                "\n".join(
      
        6691
                    [
      
        6692
                        '<a href="chapters/01-introduction.html">Intro</a>',
      
        6693
                        '<a href="chapters/02-installation.html">Install</a>',
      
        6694
                        '<a href="../index.html">Back</a>',
      
        6695
                        "",
      
        6696
                    ]
      
        6697
                )
      
        6698
            )
      
        6699
            (chapters / "01-introduction.html").write_text("<html></html>\n")
      
        6700
            (chapters / "02-installation.html").write_text("<html></html>\n")
      
        6701
        
        6702
            implementation_plan = temp_dir / "implementation.md"
      
        6703
            implementation_plan.write_text(
      
        6704
                "\n".join(
      
        6705
                    [
      
        6706
                        "# Implementation Plan",
      
        6707
                        "",
      
        6708
                        "## File Changes",
      
        6709
                        f"- `{index}`",
      
        6710
                        f"- `{chapters / '01-introduction.html'}`",
      
        6711
                        f"- `{chapters / '02-installation.html'}`",
      
        6712
                        "",
      
        6713
                    ]
      
        6714
                )
      
        6715
            )
      
        6716
        
        6717
            context = build_context(
      
        6718
                temp_dir=temp_dir,
      
        6719
                messages=[],
      
        6720
                safeguards=FakeSafeguards(),
      
        6721
                assess_confidence=assess_confidence,
      
        6722
                verify_action=verify_action,
      
        6723
            )
      
        6724
            queued: list[str] = []
      
        6725
            context.queue_steering_message_callback = queued.append
      
        6726
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6727
            dod = create_definition_of_done("Create a guide.")
      
        6728
            dod.implementation_plan = str(implementation_plan)
      
        6729
            dod.verification_commands = [f"ls -la {guide}"]
      
        6730
            dod.touched_files = [str(index), str(chapters / "01-introduction.html"), str(chapters / "02-installation.html")]
      
        6731
        
        6732
            runner._queue_blocked_html_missing_target_nudge(
      
        6733
                ToolCall(
      
        6734
                    id="edit-root",
      
        6735
                    name="edit",
      
        6736
                    arguments={"file_path": str(index)},
      
        6737
                ),
      
        6738
                (
      
        6739
                    "[Blocked - Edited HTML links point to files that do not exist] "
      
        6740
                    "Suggestion: Use only existing local targets for href values and avoid introducing missing links, "
      
        6741
                    "for example fix: chapters/08-advanced-configuration.html"
      
        6742
                ),
      
        6743
                dod=dod,
      
        6744
            )
      
        6745
        
        6746
            assert queued
      
        6747
            assert "All explicitly planned artifacts already exist on disk." in queued[0]
      
        6748
            assert "Do not introduce new local-link targets beyond the current output set." in queued[0]
      
        6749
            assert "Repair the existing generated files instead of expanding the guide." in queued[0]
      
        6750
        
        6751
        
        6752
        @pytest.mark.asyncio
      
        6753
        async def test_tool_batch_runner_blocked_empty_file_path_nudges_concrete_next_artifact(
      
        6754
            temp_dir: Path,
      
        6755
        ) -> None:
      
        6756
            async def assess_confidence(
      
        6757
                tool_name: str,
      
        6758
                tool_args: dict,
      
        6759
                context: str,
      
        6760
            ) -> ConfidenceAssessment:
      
        6761
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6762
        
        6763
            async def verify_action(
      
        6764
                tool_name: str,
      
        6765
                tool_args: dict,
      
        6766
                result: str,
      
        6767
                expected: str = "",
      
        6768
            ) -> ActionVerification:
      
        6769
                raise AssertionError("Verification should not run in this scenario")
      
        6770
        
        6771
            guide_root = temp_dir / "guides" / "nginx"
      
        6772
            chapters = guide_root / "chapters"
      
        6773
            chapters.mkdir(parents=True)
      
        6774
            index_path = guide_root / "index.html"
      
        6775
            chapter_one = chapters / "01-introduction.html"
      
        6776
            chapter_two = chapters / "02-installation.html"
      
        6777
            index_path.write_text("<html></html>\n")
      
        6778
            chapter_one.write_text("<h1>Intro</h1>\n")
      
        6779
        
        6780
            implementation_plan = temp_dir / "implementation.md"
      
        6781
            implementation_plan.write_text(
      
        6782
                "\n".join(
      
        6783
                    [
      
        6784
                        "# Implementation Plan",
      
        6785
                        "",
      
        6786
                        "## File Changes",
      
        6787
                        f"- `{index_path}`",
      
        6788
                        f"- `{chapter_one}`",
      
        6789
                        f"- `{chapter_two}`",
      
        6790
                        "",
      
        6791
                    ]
      
        6792
                )
      
        6793
            )
      
        6794
        
        6795
            context = build_context(
      
        6796
                temp_dir=temp_dir,
      
        6797
                messages=[],
      
        6798
                safeguards=FakeSafeguards(),
      
        6799
                assess_confidence=assess_confidence,
      
        6800
                verify_action=verify_action,
      
        6801
                auto_recover=False,
      
        6802
            )
      
        6803
            queued: list[str] = []
      
        6804
            context.queue_steering_message_callback = queued.append
      
        6805
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6806
            tool_call = ToolCall(
      
        6807
                id="write-2",
      
        6808
                name="write",
      
        6809
                arguments={"file_path": "", "content": "<html></html>\n"},
      
        6810
            )
      
        6811
            blocked_message = "[Blocked - Empty file path] Suggestion: Provide a valid file path"
      
        6812
            executor = FakeExecutor(
      
        6813
                [
      
        6814
                    ToolExecutionOutcome(
      
        6815
                        tool_call=tool_call,
      
        6816
                        state=ToolExecutionState.BLOCKED,
      
        6817
                        message=Message.tool_result_message(
      
        6818
                            tool_call_id=tool_call.id,
      
        6819
                            display_content=blocked_message,
      
        6820
                            result_content=blocked_message,
      
        6821
                            is_error=True,
      
        6822
                        ),
      
        6823
                        event_content=blocked_message,
      
        6824
                        is_error=True,
      
        6825
                        result_output=blocked_message,
      
        6826
                    )
      
        6827
                ]
      
        6828
            )
      
        6829
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        6830
            dod.implementation_plan = str(implementation_plan)
      
        6831
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        6832
            dod.pending_items.append("Creating Chapter 2: Installation and Setup")
      
        6833
        
        6834
            await runner.execute_batch(
      
        6835
                tool_calls=[tool_call],
      
        6836
                tool_source="assistant",
      
        6837
                pending_tool_calls_seen=set(),
      
        6838
                emit=_noop_emit,
      
        6839
                summary=TurnSummary(final_response=""),
      
        6840
                dod=dod,
      
        6841
                executor=executor,  # type: ignore[arg-type]
      
        6842
                on_confirmation=None,
      
        6843
                on_user_question=None,
      
        6844
                emit_confirmation=None,
      
        6845
                consecutive_errors=0,
      
        6846
            )
      
        6847
        
        6848
            assert queued
      
        6849
            assert "did not provide a valid `file_path`" in queued[0]
      
        6850
            assert "Resume by creating `02-installation.html` now." in queued[0]
      
        6851
            assert (
      
        6852
                f"Prefer one `write` call for `{display_runtime_path(chapter_two)}` instead of more rereads."
      
        6853
                in queued[0]
      
        6854
            )
      
        6855
            assert context.recovery_context is not None
      
        6856
            assert context.recovery_context.attempts[-1].error == blocked_message