loader Public

Watch 0 Fork 0 Star 0
Python · 257580 bytes Raw Blame History
  
        1
        """Tests for tool-batch execution on RuntimeContext."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        from pathlib import Path
      
        6
        from types import SimpleNamespace
      
        7
        
        8
        import pytest
      
        9
        
        10
        from loader.llm.base import Message, Role, ToolCall
      
        11
        from loader.runtime.context import RuntimeContext
      
        12
        from loader.runtime.dod import (
      
        13
            DefinitionOfDoneStore,
      
        14
            VerificationEvidence,
      
        15
            create_definition_of_done,
      
        16
        )
      
        17
        from loader.runtime.events import AgentEvent, TurnSummary
      
        18
        from loader.runtime.executor import ToolExecutionOutcome, ToolExecutionState
      
        19
        from loader.runtime.path_display import display_runtime_path
      
        20
        from loader.runtime.permissions import (
      
        21
            PermissionMode,
      
        22
            build_permission_policy,
      
        23
            load_permission_rules,
      
        24
        )
      
        25
        from loader.runtime.reasoning_types import (
      
        26
            ActionVerification,
      
        27
            ConfidenceAssessment,
      
        28
            ConfidenceLevel,
      
        29
        )
      
        30
        from loader.runtime.recovery import RecoveryContext
      
        31
        from loader.runtime.tool_batches import (
      
        32
            ToolBatchRunner,
      
        33
        )
      
        34
        from loader.runtime.tool_batches import (
      
        35
            _should_prioritize_missing_artifact as tool_batches_should_prioritize_missing_artifact,
      
        36
        )
      
        37
        from loader.runtime.workflow import sync_todos_to_definition_of_done
      
        38
        from loader.tools.base import ToolResult as RegistryToolResult
      
        39
        from loader.tools.base import create_default_registry
      
        40
        from tests.helpers.runtime_harness import ScriptedBackend
      
        41
        
        42
        
        43
        class FakeSession:
      
        44
            def __init__(self, messages: list[Message]) -> None:
      
        45
                self.messages = list(messages)
      
        46
                self.workflow_timeline = []
      
        47
        
        48
            def append(self, message: Message) -> None:
      
        49
                self.messages.append(message)
      
        50
        
        51
            def append_workflow_timeline_entry(self, entry) -> None:
      
        52
                self.workflow_timeline.append(entry)
      
        53
        
        54
        
        55
        class FakeCodeFilter:
      
        56
            def reset(self) -> None:
      
        57
                return None
      
        58
        
        59
        
        60
        class FakeSafeguards:
      
        61
            def __init__(self, *, detect_loop_result: tuple[bool, str] = (False, "")) -> None:
      
        62
                self.action_tracker = object()
      
        63
                self.validator = object()
      
        64
                self.code_filter = FakeCodeFilter()
      
        65
                self._detect_loop_result = detect_loop_result
      
        66
        
        67
            def filter_stream_chunk(self, content: str) -> str:
      
        68
                return content
      
        69
        
        70
            def filter_complete_content(self, content: str) -> str:
      
        71
                return content
      
        72
        
        73
            def should_steer(self) -> bool:
      
        74
                return False
      
        75
        
        76
            def get_steering_message(self) -> str | None:
      
        77
                return None
      
        78
        
        79
            def record_response(self, content: str) -> None:
      
        80
                return None
      
        81
        
        82
            def detect_text_loop(self, content: str) -> tuple[bool, str]:
      
        83
                return False, ""
      
        84
        
        85
            def detect_loop(self) -> tuple[bool, str]:
      
        86
                return self._detect_loop_result
      
        87
        
        88
        
        89
        class FakeExecutor:
      
        90
            def __init__(self, outcomes: list[ToolExecutionOutcome]) -> None:
      
        91
                self._outcomes = list(outcomes)
      
        92
                self.calls: list[ToolCall] = []
      
        93
        
        94
            async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
      
        95
                self.calls.append(tool_call)
      
        96
                if not self._outcomes:
      
        97
                    raise AssertionError("No fake tool outcome queued")
      
        98
                return self._outcomes.pop(0)
      
        99
        
        100
        
        101
        def build_context(
      
        102
            *,
      
        103
            temp_dir: Path,
      
        104
            messages: list[Message],
      
        105
            safeguards: FakeSafeguards,
      
        106
            assess_confidence,
      
        107
            verify_action,
      
        108
            recovery_context: RecoveryContext | None = None,
      
        109
            confidence_scoring: bool = False,
      
        110
            verification: bool = False,
      
        111
            auto_recover: bool = True,
      
        112
            min_confidence_for_action: int = 3,
      
        113
        ) -> RuntimeContext:
      
        114
            registry = create_default_registry(temp_dir)
      
        115
            registry.configure_workspace_root(temp_dir)
      
        116
            rule_status = load_permission_rules(temp_dir)
      
        117
            policy = build_permission_policy(
      
        118
                active_mode=PermissionMode.WORKSPACE_WRITE,
      
        119
                workspace_root=temp_dir,
      
        120
                tool_requirements=registry.get_tool_requirements(),
      
        121
                rules=rule_status.rules,
      
        122
            )
      
        123
            context = RuntimeContext(
      
        124
                project_root=temp_dir,
      
        125
                backend=ScriptedBackend(),
      
        126
                registry=registry,
      
        127
                session=FakeSession(messages),  # type: ignore[arg-type]
      
        128
                config=SimpleNamespace(
      
        129
                    force_react=False,
      
        130
                    max_recovery_attempts=2,
      
        131
                    auto_recover=auto_recover,
      
        132
                    reasoning=SimpleNamespace(
      
        133
                        rollback=False,
      
        134
                        show_rollback_plan=False,
      
        135
                        completion_check=True,
      
        136
                        max_continuation_prompts=5,
      
        137
                        self_critique=False,
      
        138
                        confidence_scoring=confidence_scoring,
      
        139
                        min_confidence_for_action=min_confidence_for_action,
      
        140
                        verification=verification,
      
        141
                    ),
      
        142
                ),
      
        143
                capability_profile=SimpleNamespace(supports_native_tools=True),  # type: ignore[arg-type]
      
        144
                project_context=None,
      
        145
                permission_policy=policy,
      
        146
                permission_config_status=rule_status,
      
        147
                workflow_mode="execute",
      
        148
                safeguards=safeguards,
      
        149
                reasoning=SimpleNamespace(
      
        150
                    assess_confidence=assess_confidence,
      
        151
                    verify_action=verify_action,
      
        152
                ),
      
        153
                recovery_context=recovery_context,
      
        154
            )
      
        155
            return context
      
        156
        
        157
        
        158
        def tool_outcome(
      
        159
            *,
      
        160
            tool_call: ToolCall,
      
        161
            output: str,
      
        162
            is_error: bool,
      
        163
            state: ToolExecutionState = ToolExecutionState.EXECUTED,
      
        164
            metadata: dict[str, object] | None = None,
      
        165
        ) -> ToolExecutionOutcome:
      
        166
            return ToolExecutionOutcome(
      
        167
                tool_call=tool_call,
      
        168
                state=state,
      
        169
                message=Message.tool_result_message(
      
        170
                    tool_call_id=tool_call.id,
      
        171
                    display_content=output,
      
        172
                    result_content=output,
      
        173
                    is_error=is_error,
      
        174
                ),
      
        175
                event_content=output,
      
        176
                is_error=is_error,
      
        177
                result_output=output,
      
        178
                registry_result=RegistryToolResult(
      
        179
                    output=output,
      
        180
                    is_error=is_error,
      
        181
                    metadata=metadata or {},
      
        182
                ),
      
        183
            )
      
        184
        
        185
        
        186
        @pytest.mark.asyncio
      
        187
        async def test_tool_batch_runner_uses_context_for_confidence_gate(temp_dir: Path) -> None:
      
        188
            captured: dict[str, str] = {}
      
        189
        
        190
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        191
                captured["context"] = context
      
        192
                return ConfidenceAssessment(
      
        193
                    action=f"{tool_name} with {tool_args}",
      
        194
                    tool_name=tool_name,
      
        195
                    tool_args=tool_args,
      
        196
                    level=ConfidenceLevel.LOW,
      
        197
                    reasoning="Need to inspect the target first.",
      
        198
                    risks=["Unknown target file"],
      
        199
                )
      
        200
        
        201
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        202
                raise AssertionError("Verification should not run for skipped actions")
      
        203
        
        204
            context = build_context(
      
        205
                temp_dir=temp_dir,
      
        206
                messages=[
      
        207
                    Message(role=Role.USER, content="Please inspect the project."),
      
        208
                    Message(role=Role.ASSISTANT, content="I will read the file next."),
      
        209
                ],
      
        210
                safeguards=FakeSafeguards(),
      
        211
                assess_confidence=assess_confidence,
      
        212
                verify_action=verify_action,
      
        213
                confidence_scoring=True,
      
        214
                min_confidence_for_action=3,
      
        215
            )
      
        216
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        217
            tool_call = ToolCall(id="read-1", name="read", arguments={"file_path": "README.md"})
      
        218
            events: list[AgentEvent] = []
      
        219
        
        220
            async def emit(event: AgentEvent) -> None:
      
        221
                events.append(event)
      
        222
        
        223
            executor = FakeExecutor([tool_outcome(tool_call=tool_call, output="unused", is_error=False)])
      
        224
            result = await runner.execute_batch(
      
        225
                tool_calls=[tool_call],
      
        226
                tool_source="assistant",
      
        227
                pending_tool_calls_seen=set(),
      
        228
                emit=emit,
      
        229
                summary=TurnSummary(final_response=""),
      
        230
                dod=create_definition_of_done("Read the docs"),
      
        231
                executor=executor,  # type: ignore[arg-type]
      
        232
                on_confirmation=None,
      
        233
                on_user_question=None,
      
        234
                emit_confirmation=None,
      
        235
                consecutive_errors=0,
      
        236
            )
      
        237
        
        238
            assert result.actions_taken == []
      
        239
            assert executor.calls == []
      
        240
            assert "Please inspect the project." in captured["context"]
      
        241
            assert context.session.messages[-1].role == Role.USER
      
        242
            assert "[LOW CONFIDENCE WARNING]" in context.session.messages[-1].content
      
        243
            event_types = [event.type for event in events]
      
        244
            assert "confidence" in event_types
      
        245
        
        246
        
        247
        @pytest.mark.asyncio
      
        248
        async def test_tool_batch_runner_tracks_recovery_with_legacy_context(temp_dir: Path) -> None:
      
        249
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        250
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        251
        
        252
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        253
                raise AssertionError("Verification should not run for failed actions")
      
        254
        
        255
            context = build_context(
      
        256
                temp_dir=temp_dir,
      
        257
                messages=[],
      
        258
                safeguards=FakeSafeguards(),
      
        259
                assess_confidence=assess_confidence,
      
        260
                verify_action=verify_action,
      
        261
                auto_recover=True,
      
        262
            )
      
        263
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        264
            tool_call = ToolCall(id="bash-1", name="bash", arguments={"command": "pytest"})
      
        265
            executor = FakeExecutor([tool_outcome(tool_call=tool_call, output="command failed", is_error=True)])
      
        266
            summary = TurnSummary(final_response="")
      
        267
            events: list[AgentEvent] = []
      
        268
        
        269
            async def emit(event: AgentEvent) -> None:
      
        270
                events.append(event)
      
        271
        
        272
            await runner.execute_batch(
      
        273
                tool_calls=[tool_call],
      
        274
                tool_source="assistant",
      
        275
                pending_tool_calls_seen=set(),
      
        276
                emit=emit,
      
        277
                summary=summary,
      
        278
                dod=create_definition_of_done("Run tests"),
      
        279
                executor=executor,  # type: ignore[arg-type]
      
        280
                on_confirmation=None,
      
        281
                on_user_question=None,
      
        282
                emit_confirmation=None,
      
        283
                consecutive_errors=0,
      
        284
            )
      
        285
        
        286
            assert context.recovery_context is not None
      
        287
            assert summary.tool_result_messages
      
        288
            assert context.session.messages[-1] == summary.tool_result_messages[-1]
      
        289
            assert any(event.type == "recovery" for event in events)
      
        290
        
        291
        
        292
        @pytest.mark.asyncio
      
        293
        async def test_tool_batch_runner_emits_tool_metadata(temp_dir: Path) -> None:
      
        294
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        295
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        296
        
        297
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        298
                raise AssertionError("Verification should not run for this scenario")
      
        299
        
        300
            context = build_context(
      
        301
                temp_dir=temp_dir,
      
        302
                messages=[],
      
        303
                safeguards=FakeSafeguards(),
      
        304
                assess_confidence=assess_confidence,
      
        305
                verify_action=verify_action,
      
        306
                auto_recover=False,
      
        307
            )
      
        308
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        309
            tool_call = ToolCall(
      
        310
                id="bash-1",
      
        311
                name="bash",
      
        312
                arguments={"command": "python -m http.server 8000", "background": True},
      
        313
            )
      
        314
            metadata = {
      
        315
                "job_id": "bash-1",
      
        316
                "status": "running",
      
        317
                "background": True,
      
        318
            }
      
        319
            executor = FakeExecutor(
      
        320
                [
      
        321
                    tool_outcome(
      
        322
                        tool_call=tool_call,
      
        323
                        output="Started bash job bash-1",
      
        324
                        is_error=False,
      
        325
                        metadata=metadata,
      
        326
                    )
      
        327
                ]
      
        328
            )
      
        329
            events: list[AgentEvent] = []
      
        330
        
        331
            async def emit(event: AgentEvent) -> None:
      
        332
                events.append(event)
      
        333
        
        334
            await runner.execute_batch(
      
        335
                tool_calls=[tool_call],
      
        336
                tool_source="assistant",
      
        337
                pending_tool_calls_seen=set(),
      
        338
                emit=emit,
      
        339
                summary=TurnSummary(final_response=""),
      
        340
                dod=create_definition_of_done("Launch a preview server"),
      
        341
                executor=executor,  # type: ignore[arg-type]
      
        342
                on_confirmation=None,
      
        343
                on_user_question=None,
      
        344
                emit_confirmation=None,
      
        345
                consecutive_errors=0,
      
        346
            )
      
        347
        
        348
            tool_result = next(event for event in events if event.type == "tool_result")
      
        349
            assert tool_result.tool_metadata == metadata
      
        350
        
        351
        
        352
        @pytest.mark.asyncio
      
        353
        async def test_tool_batch_runner_verifies_with_context_services(temp_dir: Path) -> None:
      
        354
            verification_calls: list[str] = []
      
        355
        
        356
            async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
      
        357
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        358
        
        359
            async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
      
        360
                verification_calls.append(result)
      
        361
                return ActionVerification(
      
        362
                    tool_name=tool_name,
      
        363
                    tool_args=tool_args,
      
        364
                    expected_outcome="Success",
      
        365
                    actual_result=result,
      
        366
                    verified=False,
      
        367
                    discrepancies=["File contents did not match"],
      
        368
                    needs_correction=True,
      
        369
                    correction_suggestion="Read the file before editing again.",
      
        370
                )
      
        371
        
        372
            existing_recovery = RecoveryContext(
      
        373
                original_tool="edit",
      
        374
                original_args={"file_path": "README.md"},
      
        375
            )
      
        376
            context = build_context(
      
        377
                temp_dir=temp_dir,
      
        378
                messages=[],
      
        379
                safeguards=FakeSafeguards(),
      
        380
                assess_confidence=assess_confidence,
      
        381
                verify_action=verify_action,
      
        382
                recovery_context=existing_recovery,
      
        383
                verification=True,
      
        384
            )
      
        385
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        386
            tool_call = ToolCall(id="read-1", name="read", arguments={"file_path": "README.md"})
      
        387
            executor = FakeExecutor([tool_outcome(tool_call=tool_call, output="file contents", is_error=False)])
      
        388
            events: list[AgentEvent] = []
      
        389
        
        390
            async def emit(event: AgentEvent) -> None:
      
        391
                events.append(event)
      
        392
        
        393
            await runner.execute_batch(
      
        394
                tool_calls=[tool_call],
      
        395
                tool_source="assistant",
      
        396
                pending_tool_calls_seen=set(),
      
        397
                emit=emit,
      
        398
                summary=TurnSummary(final_response=""),
      
        399
                dod=create_definition_of_done("Read the docs"),
      
        400
                executor=executor,  # type: ignore[arg-type]
      
        401
                on_confirmation=None,
      
        402
                on_user_question=None,
      
        403
                emit_confirmation=None,
      
        404
                consecutive_errors=0,
      
        405
            )
      
        406
        
        407
            assert verification_calls == ["file contents"]
      
        408
            assert context.recovery_context is existing_recovery
      
        409
            assert existing_recovery.successful_steps == [
      
        410
                ("read", {"file_path": "README.md"})
      
        411
            ]
      
        412
            assert context.session.messages[-1].role == Role.TOOL
      
        413
            assert context.session.messages[-1].content == "file contents"
      
        414
            assert any(event.type == "verification" for event in events)
      
        415
        
        416
        
        417
        @pytest.mark.asyncio
      
        418
        async def test_tool_batch_runner_preserves_recovery_context_across_diagnostic_success(
      
        419
            temp_dir: Path,
      
        420
        ) -> None:
      
        421
            async def assess_confidence(
      
        422
                tool_name: str,
      
        423
                tool_args: dict,
      
        424
                context: str,
      
        425
            ) -> ConfidenceAssessment:
      
        426
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        427
        
        428
            async def verify_action(
      
        429
                tool_name: str,
      
        430
                tool_args: dict,
      
        431
                result: str,
      
        432
                expected: str = "",
      
        433
            ) -> ActionVerification:
      
        434
                raise AssertionError("Verification should not run for this scenario")
      
        435
        
        436
            existing_recovery = RecoveryContext(
      
        437
                original_tool="read",
      
        438
                original_args={"file_path": "chapters/04-data-types.html"},
      
        439
            )
      
        440
            existing_recovery.add_attempt(
      
        441
                "read",
      
        442
                {"file_path": "chapters/04-data-types.html"},
      
        443
                "File not found",
      
        444
            )
      
        445
            context = build_context(
      
        446
                temp_dir=temp_dir,
      
        447
                messages=[],
      
        448
                safeguards=FakeSafeguards(),
      
        449
                assess_confidence=assess_confidence,
      
        450
                verify_action=verify_action,
      
        451
                recovery_context=existing_recovery,
      
        452
                auto_recover=False,
      
        453
            )
      
        454
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        455
            tool_call = ToolCall(
      
        456
                id="bash-1",
      
        457
                name="bash",
      
        458
                arguments={"command": "ls chapters"},
      
        459
            )
      
        460
            executor = FakeExecutor(
      
        461
                [tool_outcome(tool_call=tool_call, output="01-introduction.html", is_error=False)]
      
        462
            )
      
        463
        
        464
            summary = TurnSummary(final_response="")
      
        465
            await runner.execute_batch(
      
        466
                tool_calls=[tool_call],
      
        467
                tool_source="assistant",
      
        468
                pending_tool_calls_seen=set(),
      
        469
                emit=_noop_emit,
      
        470
                summary=summary,
      
        471
                dod=create_definition_of_done("Fix the chapter links"),
      
        472
                executor=executor,  # type: ignore[arg-type]
      
        473
                on_confirmation=None,
      
        474
                on_user_question=None,
      
        475
                emit_confirmation=None,
      
        476
                consecutive_errors=0,
      
        477
            )
      
        478
        
        479
            assert context.recovery_context is existing_recovery
      
        480
            assert existing_recovery.successful_steps == [
      
        481
                ("bash", {"command": "ls chapters"})
      
        482
            ]
      
        483
        
        484
        
        485
        @pytest.mark.asyncio
      
        486
        async def test_tool_batch_runner_clears_recovery_context_after_successful_mutation(
      
        487
            temp_dir: Path,
      
        488
        ) -> None:
      
        489
            async def assess_confidence(
      
        490
                tool_name: str,
      
        491
                tool_args: dict,
      
        492
                context: str,
      
        493
            ) -> ConfidenceAssessment:
      
        494
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        495
        
        496
            async def verify_action(
      
        497
                tool_name: str,
      
        498
                tool_args: dict,
      
        499
                result: str,
      
        500
                expected: str = "",
      
        501
            ) -> ActionVerification:
      
        502
                raise AssertionError("Verification should not run for this scenario")
      
        503
        
        504
            existing_recovery = RecoveryContext(
      
        505
                original_tool="read",
      
        506
                original_args={"file_path": "chapters/04-data-types.html"},
      
        507
            )
      
        508
            existing_recovery.add_attempt(
      
        509
                "read",
      
        510
                {"file_path": "chapters/04-data-types.html"},
      
        511
                "File not found",
      
        512
            )
      
        513
            context = build_context(
      
        514
                temp_dir=temp_dir,
      
        515
                messages=[],
      
        516
                safeguards=FakeSafeguards(),
      
        517
                assess_confidence=assess_confidence,
      
        518
                verify_action=verify_action,
      
        519
                recovery_context=existing_recovery,
      
        520
                auto_recover=False,
      
        521
            )
      
        522
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        523
            tool_call = ToolCall(
      
        524
                id="patch-1",
      
        525
                name="patch",
      
        526
                arguments={
      
        527
                    "file_path": "index.html",
      
        528
                    "hunks": [{"old_start": 1, "old_lines": 1, "new_start": 1, "new_lines": 1, "lines": ["-a", "+b"]}],
      
        529
                },
      
        530
            )
      
        531
            executor = FakeExecutor(
      
        532
                [tool_outcome(tool_call=tool_call, output="Patched index.html", is_error=False)]
      
        533
            )
      
        534
        
        535
            summary = TurnSummary(final_response="")
      
        536
            await runner.execute_batch(
      
        537
                tool_calls=[tool_call],
      
        538
                tool_source="assistant",
      
        539
                pending_tool_calls_seen=set(),
      
        540
                emit=_noop_emit,
      
        541
                summary=summary,
      
        542
                dod=create_definition_of_done("Fix the chapter links"),
      
        543
                executor=executor,  # type: ignore[arg-type]
      
        544
                on_confirmation=None,
      
        545
                on_user_question=None,
      
        546
                emit_confirmation=None,
      
        547
                consecutive_errors=0,
      
        548
            )
      
        549
        
        550
            assert context.recovery_context is None
      
        551
        
        552
        
        553
        @pytest.mark.asyncio
      
        554
        async def test_tool_batch_runner_queues_duplicate_observation_nudge(
      
        555
            temp_dir: Path,
      
        556
        ) -> None:
      
        557
            async def assess_confidence(
      
        558
                tool_name: str,
      
        559
                tool_args: dict,
      
        560
                context: str,
      
        561
            ) -> ConfidenceAssessment:
      
        562
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        563
        
        564
            async def verify_action(
      
        565
                tool_name: str,
      
        566
                tool_args: dict,
      
        567
                result: str,
      
        568
                expected: str = "",
      
        569
            ) -> ActionVerification:
      
        570
                raise AssertionError("Verification should not run for this scenario")
      
        571
        
        572
            messages = [
      
        573
                Message(
      
        574
                    role=Role.TOOL,
      
        575
                    content=(
      
        576
                        "Observation [glob]: Result: "
      
        577
                        f"{temp_dir}/chapters/01-introduction.html\n"
      
        578
                        f"{temp_dir}/chapters/02-setup.html\n"
      
        579
                        f"{temp_dir}/chapters/03-basics.html"
      
        580
                    ),
      
        581
                    tool_results=[],
      
        582
                ),
      
        583
                Message(
      
        584
                    role=Role.ASSISTANT,
      
        585
                    content="I already inspected the first chapter title.",
      
        586
                    tool_calls=[
      
        587
                        ToolCall(
      
        588
                            id="read-ch1",
      
        589
                            name="read",
      
        590
                            arguments={"file_path": str(temp_dir / 'chapters' / '01-introduction.html')},
      
        591
                        )
      
        592
                    ],
      
        593
                ),
      
        594
                Message.tool_result_message(
      
        595
                    tool_call_id="read-ch1",
      
        596
                    display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
      
        597
                    result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
      
        598
                ),
      
        599
                Message(
      
        600
                    role=Role.ASSISTANT,
      
        601
                    content="I should update the index now.",
      
        602
                    tool_calls=[
      
        603
                        ToolCall(
      
        604
                            id="read-index",
      
        605
                            name="read",
      
        606
                            arguments={"file_path": str(temp_dir / 'index.html')},
      
        607
                        )
      
        608
                    ],
      
        609
                ),
      
        610
            ]
      
        611
            context = build_context(
      
        612
                temp_dir=temp_dir,
      
        613
                messages=messages,
      
        614
                safeguards=FakeSafeguards(),
      
        615
                assess_confidence=assess_confidence,
      
        616
                verify_action=verify_action,
      
        617
                auto_recover=False,
      
        618
            )
      
        619
            (temp_dir / "chapters").mkdir()
      
        620
            (temp_dir / "index.html").write_text("<ul></ul>\n")
      
        621
            (temp_dir / "chapters" / "01-introduction.html").write_text("<h1>Intro</h1>\n")
      
        622
            (temp_dir / "chapters" / "02-setup.html").write_text("<h1>Setup</h1>\n")
      
        623
            (temp_dir / "chapters" / "03-basics.html").write_text("<h1>Basics</h1>\n")
      
        624
            implementation_plan = temp_dir / "implementation.md"
      
        625
            implementation_plan.write_text(
      
        626
                "\n".join(
      
        627
                    [
      
        628
                        "# Implementation Plan",
      
        629
                        "",
      
        630
                        "## File Changes",
      
        631
                        f"- `{temp_dir / 'index.html'}`",
      
        632
                        f"- `{temp_dir / 'chapters' / '01-introduction.html'}`",
      
        633
                        f"- `{temp_dir / 'chapters' / '02-setup.html'}`",
      
        634
                        f"- `{temp_dir / 'chapters' / '03-basics.html'}`",
      
        635
                        f"- `{temp_dir / 'chapters' / '04-variables.html'}`",
      
        636
                    ]
      
        637
                )
      
        638
            )
      
        639
            context.session.current_task = (
      
        640
                f"Update {temp_dir / 'index.html'} with the right chapter links."
      
        641
            )
      
        642
            persistent_messages: list[str] = []
      
        643
            ephemeral_messages: list[str] = []
      
        644
            context.queue_steering_message_callback = persistent_messages.append
      
        645
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        646
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        647
            tool_call = ToolCall(
      
        648
                id="read-dup",
      
        649
                name="read",
      
        650
                arguments={"file_path": str(temp_dir / "index.html")},
      
        651
            )
      
        652
            duplicate_message = (
      
        653
                "[Skipped - duplicate action: Already read "
      
        654
                f"{temp_dir / 'index.html'} recently without any intervening changes; "
      
        655
                "reuse the earlier read result instead of rereading]"
      
        656
            )
      
        657
            executor = FakeExecutor(
      
        658
                [
      
        659
                    ToolExecutionOutcome(
      
        660
                        tool_call=tool_call,
      
        661
                        state=ToolExecutionState.DUPLICATE,
      
        662
                        message=Message.tool_result_message(
      
        663
                            tool_call_id=tool_call.id,
      
        664
                            display_content=duplicate_message,
      
        665
                            result_content=duplicate_message,
      
        666
                        ),
      
        667
                        event_content=duplicate_message,
      
        668
                        is_error=False,
      
        669
                        result_output=duplicate_message,
      
        670
                    )
      
        671
                ]
      
        672
            )
      
        673
        
        674
            summary = TurnSummary(final_response="")
      
        675
            dod = create_definition_of_done("Fix the chapter links")
      
        676
            dod.implementation_plan = str(implementation_plan)
      
        677
            dod.pending_items.append("Create the remaining chapter files")
      
        678
            await runner.execute_batch(
      
        679
                tool_calls=[tool_call],
      
        680
                tool_source="assistant",
      
        681
                pending_tool_calls_seen=set(),
      
        682
                emit=_noop_emit,
      
        683
                summary=summary,
      
        684
                dod=dod,
      
        685
                executor=executor,  # type: ignore[arg-type]
      
        686
                on_confirmation=None,
      
        687
                on_user_question=None,
      
        688
                emit_confirmation=None,
      
        689
                consecutive_errors=0,
      
        690
            )
      
        691
        
        692
            assert len(persistent_messages) == 1
      
        693
            assert "Reuse the earlier observation instead of repeating it." in persistent_messages[0]
      
        694
            assert "A declared output artifact is still missing." in persistent_messages[0]
      
        695
            assert "Resume by creating `04-variables.html` now." in persistent_messages[0]
      
        696
            assert (
      
        697
                "Prefer one `write` call for "
      
        698
                f"`{display_runtime_path(temp_dir / 'chapters' / '04-variables.html')}` instead of more rereads."
      
        699
                in persistent_messages[0]
      
        700
            )
      
        701
            assert ephemeral_messages == []
      
        702
        
        703
        
        704
        @pytest.mark.asyncio
      
        705
        async def test_tool_batch_runner_duplicate_read_keeps_root_declared_missing_html_output_active(
      
        706
            temp_dir: Path,
      
        707
        ) -> None:
      
        708
            async def assess_confidence(
      
        709
                tool_name: str,
      
        710
                tool_args: dict,
      
        711
                context: str,
      
        712
            ) -> ConfidenceAssessment:
      
        713
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        714
        
        715
            async def verify_action(
      
        716
                tool_name: str,
      
        717
                tool_args: dict,
      
        718
                result: str,
      
        719
                expected: str = "",
      
        720
            ) -> ActionVerification:
      
        721
                raise AssertionError("Verification should not run for this scenario")
      
        722
        
        723
            guide_root = temp_dir / "guide"
      
        724
            chapters = guide_root / "chapters"
      
        725
            chapters.mkdir(parents=True)
      
        726
            index = guide_root / "index.html"
      
        727
            chapter_one = chapters / "01-introduction.html"
      
        728
            index.write_text(
      
        729
                '<a href="chapters/01-introduction.html">Intro</a>\n'
      
        730
                '<a href="chapters/02-installation.html">Install</a>\n'
      
        731
            )
      
        732
            chapter_one.write_text("<h1>Intro</h1>\n")
      
        733
        
        734
            implementation_plan = temp_dir / "implementation.md"
      
        735
            implementation_plan.write_text(
      
        736
                "\n".join(
      
        737
                    [
      
        738
                        "# Implementation Plan",
      
        739
                        "",
      
        740
                        "## File Changes",
      
        741
                        f"- `{index}`",
      
        742
                        f"- `{chapters}/` (directory for chapter files)",
      
        743
                    ]
      
        744
                )
      
        745
            )
      
        746
        
        747
            messages = [
      
        748
                Message(
      
        749
                    role=Role.ASSISTANT,
      
        750
                    content="I should keep building the guide.",
      
        751
                    tool_calls=[
      
        752
                        ToolCall(
      
        753
                            id="read-index",
      
        754
                            name="read",
      
        755
                            arguments={"file_path": str(index)},
      
        756
                        )
      
        757
                    ],
      
        758
                ),
      
        759
            ]
      
        760
            context = build_context(
      
        761
                temp_dir=temp_dir,
      
        762
                messages=messages,
      
        763
                safeguards=FakeSafeguards(),
      
        764
                assess_confidence=assess_confidence,
      
        765
                verify_action=verify_action,
      
        766
                auto_recover=False,
      
        767
            )
      
        768
            context.session.current_task = f"Build the guide rooted at {index}."
      
        769
            persistent_messages: list[str] = []
      
        770
            ephemeral_messages: list[str] = []
      
        771
            context.queue_steering_message_callback = persistent_messages.append
      
        772
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        773
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        774
            tool_call = ToolCall(
      
        775
                id="read-dup-rooted",
      
        776
                name="read",
      
        777
                arguments={"file_path": str(index)},
      
        778
            )
      
        779
            duplicate_message = (
      
        780
                "[Skipped - duplicate action: Already read "
      
        781
                f"{index} recently without any intervening changes; "
      
        782
                "reuse the earlier read result instead of rereading]"
      
        783
            )
      
        784
            executor = FakeExecutor(
      
        785
                [
      
        786
                    ToolExecutionOutcome(
      
        787
                        tool_call=tool_call,
      
        788
                        state=ToolExecutionState.DUPLICATE,
      
        789
                        message=Message.tool_result_message(
      
        790
                            tool_call_id=tool_call.id,
      
        791
                            display_content=duplicate_message,
      
        792
                            result_content=duplicate_message,
      
        793
                        ),
      
        794
                        event_content=duplicate_message,
      
        795
                        is_error=False,
      
        796
                        result_output=duplicate_message,
      
        797
                    )
      
        798
                ]
      
        799
            )
      
        800
        
        801
            summary = TurnSummary(final_response="")
      
        802
            dod = create_definition_of_done("Create a multi-file HTML guide with chapters.")
      
        803
            dod.implementation_plan = str(implementation_plan)
      
        804
            dod.touched_files = [str(index), str(chapter_one)]
      
        805
            dod.completed_items = ["Create chapter files with appropriate content"]
      
        806
            dod.pending_items.append("Create the remaining chapter files")
      
        807
        
        808
            await runner.execute_batch(
      
        809
                tool_calls=[tool_call],
      
        810
                tool_source="assistant",
      
        811
                pending_tool_calls_seen=set(),
      
        812
                emit=_noop_emit,
      
        813
                summary=summary,
      
        814
                dod=dod,
      
        815
                executor=executor,  # type: ignore[arg-type]
      
        816
                on_confirmation=None,
      
        817
                on_user_question=None,
      
        818
                emit_confirmation=None,
      
        819
                consecutive_errors=0,
      
        820
            )
      
        821
        
        822
            assert len(persistent_messages) == 1
      
        823
            assert "Create the remaining chapter files" in persistent_messages[0]
      
        824
            assert "Resume by creating `02-installation.html` now." in persistent_messages[0]
      
        825
            assert "All explicitly planned artifacts already exist on disk." not in persistent_messages[0]
      
        826
            assert ephemeral_messages == []
      
        827
        
        828
        
        829
        @pytest.mark.asyncio
      
        830
        async def test_tool_batch_runner_todo_write_does_not_regress_completed_file_todo(
      
        831
            temp_dir: Path,
      
        832
        ) -> None:
      
        833
            async def assess_confidence(
      
        834
                tool_name: str,
      
        835
                tool_args: dict,
      
        836
                context: str,
      
        837
            ) -> ConfidenceAssessment:
      
        838
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        839
        
        840
            async def verify_action(
      
        841
                tool_name: str,
      
        842
                tool_args: dict,
      
        843
                result: str,
      
        844
                expected: str = "",
      
        845
            ) -> ActionVerification:
      
        846
                raise AssertionError("Verification should not run for this scenario")
      
        847
        
        848
            context = build_context(
      
        849
                temp_dir=temp_dir,
      
        850
                messages=[],
      
        851
                safeguards=FakeSafeguards(),
      
        852
                assess_confidence=assess_confidence,
      
        853
                verify_action=verify_action,
      
        854
                auto_recover=False,
      
        855
            )
      
        856
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        857
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        858
            sync_todos_to_definition_of_done(
      
        859
                dod,
      
        860
                [
      
        861
                    {
      
        862
                        "content": "Create 03-first-website.html",
      
        863
                        "active_form": "Creating 03-first-website.html",
      
        864
                        "status": "pending",
      
        865
                    },
      
        866
                    {
      
        867
                        "content": "Create 04-configuration-basics.html",
      
        868
                        "active_form": "Creating 04-configuration-basics.html",
      
        869
                        "status": "pending",
      
        870
                    },
      
        871
                ],
      
        872
            )
      
        873
        
        874
            chapter_path = temp_dir / "guides" / "nginx" / "chapters" / "03-first-website.html"
      
        875
            chapter_path.parent.mkdir(parents=True)
      
        876
            write_call = ToolCall(
      
        877
                id="write-ch3",
      
        878
                name="write",
      
        879
                arguments={"file_path": str(chapter_path), "content": "<html></html>\n"},
      
        880
            )
      
        881
            stale_todo_call = ToolCall(
      
        882
                id="todo-stale",
      
        883
                name="TodoWrite",
      
        884
                arguments={
      
        885
                    "todos": [
      
        886
                        {
      
        887
                            "content": "Create 03-first-website.html",
      
        888
                            "active_form": "Creating 03-first-website.html",
      
        889
                            "status": "pending",
      
        890
                        },
      
        891
                        {
      
        892
                            "content": "Create 04-configuration-basics.html",
      
        893
                            "active_form": "Creating 04-configuration-basics.html",
      
        894
                            "status": "pending",
      
        895
                        },
      
        896
                    ]
      
        897
                },
      
        898
            )
      
        899
            executor = FakeExecutor(
      
        900
                [
      
        901
                    tool_outcome(
      
        902
                        tool_call=write_call,
      
        903
                        output=f"Successfully wrote {chapter_path}",
      
        904
                        is_error=False,
      
        905
                    ),
      
        906
                    tool_outcome(
      
        907
                        tool_call=stale_todo_call,
      
        908
                        output="Todos updated",
      
        909
                        is_error=False,
      
        910
                        metadata={
      
        911
                            "new_todos": [
      
        912
                                {
      
        913
                                    "content": "Create 03-first-website.html",
      
        914
                                    "active_form": "Creating 03-first-website.html",
      
        915
                                    "status": "pending",
      
        916
                                },
      
        917
                                {
      
        918
                                    "content": "Create 04-configuration-basics.html",
      
        919
                                    "active_form": "Creating 04-configuration-basics.html",
      
        920
                                    "status": "pending",
      
        921
                                },
      
        922
                            ]
      
        923
                        },
      
        924
                    ),
      
        925
                ]
      
        926
            )
      
        927
        
        928
            summary = TurnSummary(final_response="")
      
        929
            await runner.execute_batch(
      
        930
                tool_calls=[write_call, stale_todo_call],
      
        931
                tool_source="assistant",
      
        932
                pending_tool_calls_seen=set(),
      
        933
                emit=_noop_emit,
      
        934
                summary=summary,
      
        935
                dod=dod,
      
        936
                executor=executor,  # type: ignore[arg-type]
      
        937
                on_confirmation=None,
      
        938
                on_user_question=None,
      
        939
                emit_confirmation=None,
      
        940
                consecutive_errors=0,
      
        941
            )
      
        942
        
        943
            assert "Create 03-first-website.html" in dod.completed_items
      
        944
            assert "Create 03-first-website.html" not in dod.pending_items
      
        945
            assert "Create 04-configuration-basics.html" in dod.pending_items
      
        946
        
        947
        
        948
        @pytest.mark.asyncio
      
        949
        async def test_tool_batch_runner_proactively_queues_verified_html_inventory(
      
        950
            temp_dir: Path,
      
        951
        ) -> None:
      
        952
            async def assess_confidence(
      
        953
                tool_name: str,
      
        954
                tool_args: dict,
      
        955
                context: str,
      
        956
            ) -> ConfidenceAssessment:
      
        957
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        958
        
        959
            async def verify_action(
      
        960
                tool_name: str,
      
        961
                tool_args: dict,
      
        962
                result: str,
      
        963
                expected: str = "",
      
        964
            ) -> ActionVerification:
      
        965
                raise AssertionError("Verification should not run for this scenario")
      
        966
        
        967
            chapters = temp_dir / "chapters"
      
        968
            chapters.mkdir()
      
        969
            (chapters / "01-introduction.html").write_text(
      
        970
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        971
            )
      
        972
            (chapters / "02-setup.html").write_text(
      
        973
                "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
      
        974
            )
      
        975
            (temp_dir / "index.html").write_text("<ul></ul>\n")
      
        976
        
        977
            context = build_context(
      
        978
                temp_dir=temp_dir,
      
        979
                messages=[],
      
        980
                safeguards=FakeSafeguards(),
      
        981
                assess_confidence=assess_confidence,
      
        982
                verify_action=verify_action,
      
        983
                auto_recover=False,
      
        984
            )
      
        985
            context.session.current_task = (
      
        986
                f"Update {temp_dir / 'index.html'} so the chapter links match the sibling files."
      
        987
            )
      
        988
            persistent_messages: list[str] = []
      
        989
            ephemeral_messages: list[str] = []
      
        990
            context.queue_steering_message_callback = persistent_messages.append
      
        991
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        992
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        993
            tool_call = ToolCall(
      
        994
                id="glob-1",
      
        995
                name="glob",
      
        996
                arguments={"path": str(chapters), "pattern": "*.html"},
      
        997
            )
      
        998
            executor = FakeExecutor(
      
        999
                [
      
        1000
                    tool_outcome(
      
        1001
                        tool_call=tool_call,
      
        1002
                        output="\n".join(
      
        1003
                            [
      
        1004
                                str(chapters / "01-introduction.html"),
      
        1005
                                str(chapters / "02-setup.html"),
      
        1006
                            ]
      
        1007
                        ),
      
        1008
                        is_error=False,
      
        1009
                    )
      
        1010
                ]
      
        1011
            )
      
        1012
        
        1013
            summary = TurnSummary(final_response="")
      
        1014
            await runner.execute_batch(
      
        1015
                tool_calls=[tool_call],
      
        1016
                tool_source="assistant",
      
        1017
                pending_tool_calls_seen=set(),
      
        1018
                emit=_noop_emit,
      
        1019
                summary=summary,
      
        1020
                dod=create_definition_of_done("Fix the chapter links"),
      
        1021
                executor=executor,  # type: ignore[arg-type]
      
        1022
                on_confirmation=None,
      
        1023
                on_user_question=None,
      
        1024
                emit_confirmation=None,
      
        1025
                consecutive_errors=0,
      
        1026
            )
      
        1027
        
        1028
            assert persistent_messages == []
      
        1029
            assert ephemeral_messages == []
      
        1030
            assert len(summary.tool_result_messages) == 1
      
        1031
            assert "Verified chapter inventory:" not in summary.tool_result_messages[0].content
      
        1032
        
        1033
        
        1034
        @pytest.mark.asyncio
      
        1035
        async def test_tool_batch_runner_marks_validated_html_toc_completion_after_successful_edit(
      
        1036
            temp_dir: Path,
      
        1037
        ) -> None:
      
        1038
            async def assess_confidence(
      
        1039
                tool_name: str,
      
        1040
                tool_args: dict,
      
        1041
                context: str,
      
        1042
            ) -> ConfidenceAssessment:
      
        1043
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1044
        
        1045
            async def verify_action(
      
        1046
                tool_name: str,
      
        1047
                tool_args: dict,
      
        1048
                result: str,
      
        1049
                expected: str = "",
      
        1050
            ) -> ActionVerification:
      
        1051
                raise AssertionError("Verification should not run for this scenario")
      
        1052
        
        1053
            chapters = temp_dir / "chapters"
      
        1054
            chapters.mkdir()
      
        1055
            (chapters / "01-introduction.html").write_text(
      
        1056
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        1057
            )
      
        1058
            (chapters / "02-setup.html").write_text(
      
        1059
                "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
      
        1060
            )
      
        1061
            index_path = temp_dir / "index.html"
      
        1062
            old_block = (
      
        1063
                '<ul class="chapter-list">\n'
      
        1064
                '    <li><a href="chapters/01-old.html">Chapter 1: Old</a></li>\n'
      
        1065
                '    <li><a href="chapters/02-old.html">Chapter 2: Old</a></li>\n'
      
        1066
                "</ul>\n"
      
        1067
            )
      
        1068
            new_block = (
      
        1069
                '<ul class="chapter-list">\n'
      
        1070
                '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
      
        1071
                '    <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
      
        1072
                "</ul>\n"
      
        1073
            )
      
        1074
            index_path.write_text(new_block)
      
        1075
        
        1076
            context = build_context(
      
        1077
                temp_dir=temp_dir,
      
        1078
                messages=[],
      
        1079
                safeguards=FakeSafeguards(),
      
        1080
                assess_confidence=assess_confidence,
      
        1081
                verify_action=verify_action,
      
        1082
                auto_recover=False,
      
        1083
            )
      
        1084
            context.session.current_task = (
      
        1085
                "Update index.html so every chapter link and title matches the real HTML files in chapters/."
      
        1086
            )
      
        1087
            persistent_messages: list[str] = []
      
        1088
            ephemeral_messages: list[str] = []
      
        1089
            context.queue_steering_message_callback = persistent_messages.append
      
        1090
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1091
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1092
            tool_call = ToolCall(
      
        1093
                id="edit-1",
      
        1094
                name="edit",
      
        1095
                arguments={
      
        1096
                    "file_path": str(index_path),
      
        1097
                    "old_string": old_block,
      
        1098
                    "new_string": new_block,
      
        1099
                },
      
        1100
            )
      
        1101
            executor = FakeExecutor(
      
        1102
                [
      
        1103
                    tool_outcome(
      
        1104
                        tool_call=tool_call,
      
        1105
                        output=f"Successfully edited {index_path}",
      
        1106
                        is_error=False,
      
        1107
                    )
      
        1108
                ]
      
        1109
            )
      
        1110
        
        1111
            summary = TurnSummary(final_response="")
      
        1112
            await runner.execute_batch(
      
        1113
                tool_calls=[tool_call],
      
        1114
                tool_source="assistant",
      
        1115
                pending_tool_calls_seen=set(),
      
        1116
                emit=_noop_emit,
      
        1117
                summary=summary,
      
        1118
                dod=create_definition_of_done(
      
        1119
                    "Update index.html so every chapter link and title matches the real HTML files in chapters/."
      
        1120
                ),
      
        1121
                executor=executor,  # type: ignore[arg-type]
      
        1122
                on_confirmation=None,
      
        1123
                on_user_question=None,
      
        1124
                emit_confirmation=None,
      
        1125
                consecutive_errors=0,
      
        1126
            )
      
        1127
        
        1128
            assert all(
      
        1129
                "Semantic verification preview:" not in message.content
      
        1130
                for message in summary.tool_result_messages
      
        1131
            )
      
        1132
            assert persistent_messages == []
      
        1133
            assert ephemeral_messages == []
      
        1134
        
        1135
        
        1136
        @pytest.mark.asyncio
      
        1137
        async def test_tool_batch_runner_does_not_apply_html_toc_handoff_to_reference_read(
      
        1138
            temp_dir: Path,
      
        1139
        ) -> None:
      
        1140
            async def assess_confidence(
      
        1141
                tool_name: str,
      
        1142
                tool_args: dict,
      
        1143
                context: str,
      
        1144
            ) -> ConfidenceAssessment:
      
        1145
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1146
        
        1147
            async def verify_action(
      
        1148
                tool_name: str,
      
        1149
                tool_args: dict,
      
        1150
                result: str,
      
        1151
                expected: str = "",
      
        1152
            ) -> ActionVerification:
      
        1153
                raise AssertionError("Verification should not run for this scenario")
      
        1154
        
        1155
            chapters = temp_dir / "chapters"
      
        1156
            chapters.mkdir()
      
        1157
            (chapters / "01-introduction.html").write_text(
      
        1158
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        1159
            )
      
        1160
            (chapters / "02-setup.html").write_text(
      
        1161
                "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
      
        1162
            )
      
        1163
            index_path = temp_dir / "index.html"
      
        1164
            index_path.write_text(
      
        1165
                "<h2>Table of Contents</h2>\n"
      
        1166
                '<ul class="chapter-list">\n'
      
        1167
                '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
      
        1168
                '    <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
      
        1169
                "</ul>\n"
      
        1170
            )
      
        1171
        
        1172
            prompt = (
      
        1173
                "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
      
        1174
                "for the structure and cadence of the guide. We are going to make an all "
      
        1175
                "new equally thorough guide on how to use the nginx tool."
      
        1176
            )
      
        1177
        
        1178
            context = build_context(
      
        1179
                temp_dir=temp_dir,
      
        1180
                messages=[],
      
        1181
                safeguards=FakeSafeguards(),
      
        1182
                assess_confidence=assess_confidence,
      
        1183
                verify_action=verify_action,
      
        1184
                auto_recover=False,
      
        1185
            )
      
        1186
            context.session.current_task = prompt  # type: ignore[attr-defined]
      
        1187
            persistent_messages: list[str] = []
      
        1188
            ephemeral_messages: list[str] = []
      
        1189
            context.queue_steering_message_callback = persistent_messages.append
      
        1190
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1191
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1192
            tool_call = ToolCall(
      
        1193
                id="read-index",
      
        1194
                name="read",
      
        1195
                arguments={"file_path": str(index_path)},
      
        1196
            )
      
        1197
            executor = FakeExecutor(
      
        1198
                [
      
        1199
                    tool_outcome(
      
        1200
                        tool_call=tool_call,
      
        1201
                        output=index_path.read_text(),
      
        1202
                        is_error=False,
      
        1203
                    )
      
        1204
                ]
      
        1205
            )
      
        1206
        
        1207
            summary = TurnSummary(final_response="")
      
        1208
            await runner.execute_batch(
      
        1209
                tool_calls=[tool_call],
      
        1210
                tool_source="assistant",
      
        1211
                pending_tool_calls_seen=set(),
      
        1212
                emit=_noop_emit,
      
        1213
                summary=summary,
      
        1214
                dod=create_definition_of_done(prompt),
      
        1215
                executor=executor,  # type: ignore[arg-type]
      
        1216
                on_confirmation=None,
      
        1217
                on_user_question=None,
      
        1218
                emit_confirmation=None,
      
        1219
                consecutive_errors=0,
      
        1220
            )
      
        1221
        
        1222
            assert persistent_messages == []
      
        1223
            assert ephemeral_messages == []
      
        1224
            assert all(
      
        1225
                "Semantic verification preview:" not in message.content
      
        1226
                for message in summary.tool_result_messages
      
        1227
            )
      
        1228
        
        1229
        
        1230
        @pytest.mark.asyncio
      
        1231
        async def test_tool_batch_runner_queues_next_pending_todo_after_discovery_progress(
      
        1232
            temp_dir: Path,
      
        1233
        ) -> None:
      
        1234
            async def assess_confidence(
      
        1235
                tool_name: str,
      
        1236
                tool_args: dict,
      
        1237
                context: str,
      
        1238
            ) -> ConfidenceAssessment:
      
        1239
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1240
        
        1241
            async def verify_action(
      
        1242
                tool_name: str,
      
        1243
                tool_args: dict,
      
        1244
                result: str,
      
        1245
                expected: str = "",
      
        1246
            ) -> ActionVerification:
      
        1247
                raise AssertionError("Verification should not run for this scenario")
      
        1248
        
        1249
            reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
      
        1250
            reference.parent.mkdir(parents=True)
      
        1251
            reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      
        1252
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        1253
            chapters = nginx_root / "chapters"
      
        1254
            implementation_plan = temp_dir / "implementation.md"
      
        1255
            implementation_plan.write_text(
      
        1256
                "\n".join(
      
        1257
                    [
      
        1258
                        "# Implementation Plan",
      
        1259
                        "",
      
        1260
                        "## File Changes",
      
        1261
                        f"- `{chapters}/`",
      
        1262
                        f"- `{nginx_root / 'index.html'}`",
      
        1263
                        "",
      
        1264
                    ]
      
        1265
                )
      
        1266
            )
      
        1267
        
        1268
            context = build_context(
      
        1269
                temp_dir=temp_dir,
      
        1270
                messages=[],
      
        1271
                safeguards=FakeSafeguards(),
      
        1272
                assess_confidence=assess_confidence,
      
        1273
                verify_action=verify_action,
      
        1274
                auto_recover=False,
      
        1275
            )
      
        1276
            persistent_messages: list[str] = []
      
        1277
            ephemeral_messages: list[str] = []
      
        1278
            context.queue_steering_message_callback = persistent_messages.append
      
        1279
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1280
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1281
            dod = create_definition_of_done("Create an equally thorough nginx guide.")
      
        1282
            dod.implementation_plan = str(implementation_plan)
      
        1283
            sync_todos_to_definition_of_done(
      
        1284
                dod,
      
        1285
                [
      
        1286
                    {
      
        1287
                        "content": "Examine the existing Fortran guide structure to understand the cadence and format",
      
        1288
                        "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
      
        1289
                        "status": "pending",
      
        1290
                    },
      
        1291
                    {
      
        1292
                        "content": "Create the nginx directory structure",
      
        1293
                        "active_form": "Working on: Create the nginx directory structure",
      
        1294
                        "status": "pending",
      
        1295
                    },
      
        1296
                    {
      
        1297
                        "content": "Create the nginx index.html file",
      
        1298
                        "active_form": "Working on: Create the nginx index.html file",
      
        1299
                        "status": "pending",
      
        1300
                    },
      
        1301
                ],
      
        1302
            )
      
        1303
            tool_call = ToolCall(
      
        1304
                id="read-reference",
      
        1305
                name="read",
      
        1306
                arguments={"file_path": str(reference)},
      
        1307
            )
      
        1308
            executor = FakeExecutor(
      
        1309
                [
      
        1310
                    tool_outcome(
      
        1311
                        tool_call=tool_call,
      
        1312
                        output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
      
        1313
                        is_error=False,
      
        1314
                    )
      
        1315
                ]
      
        1316
            )
      
        1317
        
        1318
            summary = TurnSummary(final_response="")
      
        1319
            await runner.execute_batch(
      
        1320
                tool_calls=[tool_call],
      
        1321
                tool_source="assistant",
      
        1322
                pending_tool_calls_seen=set(),
      
        1323
                emit=_noop_emit,
      
        1324
                summary=summary,
      
        1325
                dod=dod,
      
        1326
                executor=executor,  # type: ignore[arg-type]
      
        1327
                on_confirmation=None,
      
        1328
                on_user_question=None,
      
        1329
                emit_confirmation=None,
      
        1330
                consecutive_errors=0,
      
        1331
            )
      
        1332
        
        1333
            assert (
      
        1334
                "Examine the existing Fortran guide structure to understand the cadence and format"
      
        1335
                in dod.completed_items
      
        1336
            )
      
        1337
            assert any(
      
        1338
                "Continue with the next pending item: `Create the nginx directory structure`"
      
        1339
                in message
      
        1340
                for message in persistent_messages
      
        1341
            )
      
        1342
            assert any(
      
        1343
                "Resume by creating `chapters/` now." in message
      
        1344
                for message in persistent_messages
      
        1345
            )
      
        1346
            assert all("01-introduction.html" not in message for message in persistent_messages)
      
        1347
            assert ephemeral_messages == []
      
        1348
        
        1349
        
        1350
        @pytest.mark.asyncio
      
        1351
        async def test_tool_batch_runner_queues_setup_directory_before_file_when_plan_lists_index_first(
      
        1352
            temp_dir: Path,
      
        1353
        ) -> None:
      
        1354
            async def assess_confidence(
      
        1355
                tool_name: str,
      
        1356
                tool_args: dict,
      
        1357
                context: str,
      
        1358
            ) -> ConfidenceAssessment:
      
        1359
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1360
        
        1361
            async def verify_action(
      
        1362
                tool_name: str,
      
        1363
                tool_args: dict,
      
        1364
                result: str,
      
        1365
                expected: str = "",
      
        1366
            ) -> ActionVerification:
      
        1367
                raise AssertionError("Verification should not run for this scenario")
      
        1368
        
        1369
            reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
      
        1370
            reference.parent.mkdir(parents=True)
      
        1371
            reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      
        1372
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        1373
            chapters = nginx_root / "chapters"
      
        1374
            implementation_plan = temp_dir / "implementation.md"
      
        1375
            implementation_plan.write_text(
      
        1376
                "\n".join(
      
        1377
                    [
      
        1378
                        "# Implementation Plan",
      
        1379
                        "",
      
        1380
                        "## File Changes",
      
        1381
                        f"- `{nginx_root / 'index.html'}`",
      
        1382
                        f"- `{chapters}/`",
      
        1383
                        "",
      
        1384
                    ]
      
        1385
                )
      
        1386
            )
      
        1387
        
        1388
            context = build_context(
      
        1389
                temp_dir=temp_dir,
      
        1390
                messages=[],
      
        1391
                safeguards=FakeSafeguards(),
      
        1392
                assess_confidence=assess_confidence,
      
        1393
                verify_action=verify_action,
      
        1394
                auto_recover=False,
      
        1395
            )
      
        1396
            persistent_messages: list[str] = []
      
        1397
            ephemeral_messages: list[str] = []
      
        1398
            context.queue_steering_message_callback = persistent_messages.append
      
        1399
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1400
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1401
            dod = create_definition_of_done("Create an equally thorough nginx guide.")
      
        1402
            dod.implementation_plan = str(implementation_plan)
      
        1403
            sync_todos_to_definition_of_done(
      
        1404
                dod,
      
        1405
                [
      
        1406
                    {
      
        1407
                        "content": "Examine the existing Fortran guide structure to understand the cadence and format",
      
        1408
                        "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
      
        1409
                        "status": "pending",
      
        1410
                    },
      
        1411
                    {
      
        1412
                        "content": "Create the nginx directory structure",
      
        1413
                        "active_form": "Working on: Create the nginx directory structure",
      
        1414
                        "status": "pending",
      
        1415
                    },
      
        1416
                    {
      
        1417
                        "content": "Create the nginx index.html file",
      
        1418
                        "active_form": "Working on: Create the nginx index.html file",
      
        1419
                        "status": "pending",
      
        1420
                    },
      
        1421
                ],
      
        1422
                project_root=temp_dir,
      
        1423
            )
      
        1424
            tool_call = ToolCall(
      
        1425
                id="read-reference-index-first",
      
        1426
                name="read",
      
        1427
                arguments={"file_path": str(reference)},
      
        1428
            )
      
        1429
            executor = FakeExecutor(
      
        1430
                [
      
        1431
                    tool_outcome(
      
        1432
                        tool_call=tool_call,
      
        1433
                        output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
      
        1434
                        is_error=False,
      
        1435
                    )
      
        1436
                ]
      
        1437
            )
      
        1438
        
        1439
            summary = TurnSummary(final_response="")
      
        1440
            await runner.execute_batch(
      
        1441
                tool_calls=[tool_call],
      
        1442
                tool_source="assistant",
      
        1443
                pending_tool_calls_seen=set(),
      
        1444
                emit=_noop_emit,
      
        1445
                summary=summary,
      
        1446
                dod=dod,
      
        1447
                executor=executor,  # type: ignore[arg-type]
      
        1448
                on_confirmation=None,
      
        1449
                on_user_question=None,
      
        1450
                emit_confirmation=None,
      
        1451
                consecutive_errors=0,
      
        1452
            )
      
        1453
        
        1454
            assert persistent_messages
      
        1455
            assert any(
      
        1456
                "Continue with the next pending item: `Create the nginx directory structure`"
      
        1457
                in message
      
        1458
                for message in persistent_messages
      
        1459
            )
      
        1460
            assert any(
      
        1461
                "Resume by creating `chapters/` now." in message
      
        1462
                for message in persistent_messages
      
        1463
            )
      
        1464
            assert all(
      
        1465
                "Next step: create `index.html`." not in message
      
        1466
                for message in persistent_messages
      
        1467
            )
      
        1468
            assert ephemeral_messages == []
      
        1469
        
        1470
        
        1471
        @pytest.mark.asyncio
      
        1472
        async def test_tool_batch_runner_duplicate_reference_read_prefers_next_pending_todo(
      
        1473
            temp_dir: Path,
      
        1474
        ) -> None:
      
        1475
            async def assess_confidence(
      
        1476
                tool_name: str,
      
        1477
                tool_args: dict,
      
        1478
                context: str,
      
        1479
            ) -> ConfidenceAssessment:
      
        1480
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1481
        
        1482
            async def verify_action(
      
        1483
                tool_name: str,
      
        1484
                tool_args: dict,
      
        1485
                result: str,
      
        1486
                expected: str = "",
      
        1487
            ) -> ActionVerification:
      
        1488
                raise AssertionError("Verification should not run for this scenario")
      
        1489
        
        1490
            reference = temp_dir / "fortran" / "index.html"
      
        1491
            reference.parent.mkdir(parents=True)
      
        1492
            reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")
      
        1493
        
        1494
            messages = [
      
        1495
                Message(
      
        1496
                    role=Role.TOOL,
      
        1497
                    content=(
      
        1498
                        "Observation [read]: Result: "
      
        1499
                        "<h1>Fortran Beginner's Guide</h1>\n"
      
        1500
                    ),
      
        1501
                )
      
        1502
            ]
      
        1503
            context = build_context(
      
        1504
                temp_dir=temp_dir,
      
        1505
                messages=messages,
      
        1506
                safeguards=FakeSafeguards(),
      
        1507
                assess_confidence=assess_confidence,
      
        1508
                verify_action=verify_action,
      
        1509
                auto_recover=False,
      
        1510
            )
      
        1511
            prompt = (
      
        1512
                "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
      
        1513
                "for the structure and cadence of the guide. We are going to make an all "
      
        1514
                "new equally thorough guide on how to use the nginx tool."
      
        1515
            )
      
        1516
            context.session.current_task = prompt
      
        1517
            persistent_messages: list[str] = []
      
        1518
            ephemeral_messages: list[str] = []
      
        1519
            context.queue_steering_message_callback = persistent_messages.append
      
        1520
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1521
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1522
            dod = create_definition_of_done(prompt)
      
        1523
            sync_todos_to_definition_of_done(
      
        1524
                dod,
      
        1525
                [
      
        1526
                    {
      
        1527
                        "content": "Examine the existing Fortran guide structure to understand the cadence and format",
      
        1528
                        "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
      
        1529
                        "status": "completed",
      
        1530
                    },
      
        1531
                    {
      
        1532
                        "content": "Create the nginx directory structure",
      
        1533
                        "active_form": "Working on: Create the nginx directory structure",
      
        1534
                        "status": "pending",
      
        1535
                    },
      
        1536
                    {
      
        1537
                        "content": "Create the nginx index.html file",
      
        1538
                        "active_form": "Working on: Create the nginx index.html file",
      
        1539
                        "status": "pending",
      
        1540
                    },
      
        1541
                ],
      
        1542
            )
      
        1543
            tool_call = ToolCall(
      
        1544
                id="read-dup",
      
        1545
                name="read",
      
        1546
                arguments={"file_path": str(reference)},
      
        1547
            )
      
        1548
            duplicate_message = (
      
        1549
                "[Skipped - duplicate action: Already read "
      
        1550
                f"{reference} recently without any intervening changes; "
      
        1551
                "reuse the earlier read result instead of rereading]"
      
        1552
            )
      
        1553
            executor = FakeExecutor(
      
        1554
                [
      
        1555
                    ToolExecutionOutcome(
      
        1556
                        tool_call=tool_call,
      
        1557
                        state=ToolExecutionState.DUPLICATE,
      
        1558
                        message=Message.tool_result_message(
      
        1559
                            tool_call_id=tool_call.id,
      
        1560
                            display_content=duplicate_message,
      
        1561
                            result_content=duplicate_message,
      
        1562
                        ),
      
        1563
                        event_content=duplicate_message,
      
        1564
                        is_error=False,
      
        1565
                        result_output=duplicate_message,
      
        1566
                    )
      
        1567
                ]
      
        1568
            )
      
        1569
        
        1570
            summary = TurnSummary(final_response="")
      
        1571
            await runner.execute_batch(
      
        1572
                tool_calls=[tool_call],
      
        1573
                tool_source="assistant",
      
        1574
                pending_tool_calls_seen=set(),
      
        1575
                emit=_noop_emit,
      
        1576
                summary=summary,
      
        1577
                dod=dod,
      
        1578
                executor=executor,  # type: ignore[arg-type]
      
        1579
                on_confirmation=None,
      
        1580
                on_user_question=None,
      
        1581
                emit_confirmation=None,
      
        1582
                consecutive_errors=0,
      
        1583
            )
      
        1584
        
        1585
            assert len(persistent_messages) == 1
      
        1586
            assert "Reuse the earlier observation instead of repeating it." in persistent_messages[0]
      
        1587
            assert (
      
        1588
                "Continue with the next pending item: `Create the nginx directory structure`"
      
        1589
                in persistent_messages[0]
      
        1590
            )
      
        1591
            assert "Update `" not in persistent_messages[0]
      
        1592
            assert ephemeral_messages == []
      
        1593
        
        1594
        
        1595
        @pytest.mark.asyncio
      
        1596
        async def test_tool_batch_runner_successful_reference_read_prioritizes_concrete_missing_artifact(
      
        1597
            temp_dir: Path,
      
        1598
        ) -> None:
      
        1599
            async def assess_confidence(
      
        1600
                tool_name: str,
      
        1601
                tool_args: dict,
      
        1602
                context: str,
      
        1603
            ) -> ConfidenceAssessment:
      
        1604
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        1605
        
        1606
            async def verify_action(
      
        1607
                tool_name: str,
      
        1608
                tool_args: dict,
      
        1609
                result: str,
      
        1610
                expected: str = "",
      
        1611
            ) -> ActionVerification:
      
        1612
                raise AssertionError("Verification should not run for this scenario")
      
        1613
        
        1614
            guide_root = temp_dir / "Loader" / "guides" / "nginx"
      
        1615
            chapters = guide_root / "chapters"
      
        1616
            chapters.mkdir(parents=True)
      
        1617
            chapter_one = chapters / "01-introduction.html"
      
        1618
            chapter_one.write_text("<html></html>\n")
      
        1619
            index_path = guide_root / "index.html"
      
        1620
        
        1621
            reference = temp_dir / "Loader" / "guides" / "fortran" / "chapters" / "01-introduction.html"
      
        1622
            reference.parent.mkdir(parents=True, exist_ok=True)
      
        1623
            reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      
        1624
        
        1625
            implementation_plan = temp_dir / "implementation.md"
      
        1626
            implementation_plan.write_text(
      
        1627
                "\n".join(
      
        1628
                    [
      
        1629
                        "# Implementation Plan",
      
        1630
                        "",
      
        1631
                        "## File Changes",
      
        1632
                        f"- `{guide_root}/`",
      
        1633
                        f"- `{chapters}/`",
      
        1634
                        f"- `{index_path}`",
      
        1635
                        f"- `{chapter_one}`",
      
        1636
                        f"- `{chapters / '02-installation.html'}`",
      
        1637
                        "",
      
        1638
                    ]
      
        1639
                )
      
        1640
            )
      
        1641
        
        1642
            context = build_context(
      
        1643
                temp_dir=temp_dir,
      
        1644
                messages=[],
      
        1645
                safeguards=FakeSafeguards(),
      
        1646
                assess_confidence=assess_confidence,
      
        1647
                verify_action=verify_action,
      
        1648
                auto_recover=False,
      
        1649
            )
      
        1650
            persistent_messages: list[str] = []
      
        1651
            ephemeral_messages: list[str] = []
      
        1652
            context.queue_steering_message_callback = persistent_messages.append
      
        1653
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1654
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1655
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        1656
            dod.implementation_plan = str(implementation_plan)
      
        1657
            dod.touched_files.append(str(chapter_one))
      
        1658
            sync_todos_to_definition_of_done(
      
        1659
                dod,
      
        1660
                [
      
        1661
                    {
      
        1662
                        "content": "Examine the existing Fortran guide structure to understand the format and cadence",
      
        1663
                        "active_form": "Working on: Examine the existing Fortran guide structure to understand the format and cadence",
      
        1664
                        "status": "pending",
      
        1665
                    },
      
        1666
                    {
      
        1667
                        "content": "Create each chapter file with appropriate content",
      
        1668
                        "active_form": "Working on: Create each chapter file with appropriate content",
      
        1669
                        "status": "pending",
      
        1670
                    },
      
        1671
                    {
      
        1672
                        "content": "Ensure all files follow the same structure and style as the Fortran guide",
      
        1673
                        "active_form": "Working on: Ensure all files follow the same structure and style as the Fortran guide",
      
        1674
                        "status": "pending",
      
        1675
                    },
      
        1676
                ],
      
        1677
            )
      
        1678
            tool_call = ToolCall(
      
        1679
                id="read-reference-chapter",
      
        1680
                name="read",
      
        1681
                arguments={"file_path": str(reference)},
      
        1682
            )
      
        1683
            read_output = "Observation [read]: Result: <h1>Introduction</h1>\n<p>Guide cadence.</p>\n"
      
        1684
            executor = FakeExecutor(
      
        1685
                [
      
        1686
                    ToolExecutionOutcome(
      
        1687
                        tool_call=tool_call,
      
        1688
                        state=ToolExecutionState.EXECUTED,
      
        1689
                        message=Message.tool_result_message(
      
        1690
                            tool_call_id=tool_call.id,
      
        1691
                            display_content=read_output,
      
        1692
                            result_content=read_output,
      
        1693
                        ),
      
        1694
                        event_content=read_output,
      
        1695
                        is_error=False,
      
        1696
                        result_output=read_output,
      
        1697
                    )
      
        1698
                ]
      
        1699
            )
      
        1700
        
        1701
            summary = TurnSummary(final_response="")
      
        1702
            await runner.execute_batch(
      
        1703
                tool_calls=[tool_call],
      
        1704
                tool_source="assistant",
      
        1705
                pending_tool_calls_seen=set(),
      
        1706
                emit=_noop_emit,
      
        1707
                summary=summary,
      
        1708
                dod=dod,
      
        1709
                executor=executor,  # type: ignore[arg-type]
      
        1710
                on_confirmation=None,
      
        1711
                on_user_question=None,
      
        1712
                emit_confirmation=None,
      
        1713
                consecutive_errors=0,
      
        1714
            )
      
        1715
        
        1716
            assert persistent_messages
      
        1717
            assert any(
      
        1718
                "Confirmed progress: `Examine the existing Fortran guide structure to understand the format and cadence`"
      
        1719
                in message
      
        1720
                for message in persistent_messages
      
        1721
            )
      
        1722
            assert any("Resume by creating `index.html` now." in message for message in persistent_messages)
      
        1723
            assert not any(
      
        1724
                "Continue with the next pending item: `Create each chapter file with appropriate content`"
      
        1725
                in message
      
        1726
                for message in persistent_messages
      
        1727
            )
      
        1728
            assert ephemeral_messages == []
      
        1729
        
        1730
        
        1731
        @pytest.mark.asyncio
      
        1732
        async def test_tool_batch_runner_duplicate_read_ignores_unplanned_expansion_after_plan_complete(
      
        1733
            temp_dir: Path,
      
        1734
        ) -> None:
      
        1735
            async def assess_confidence(
      
        1736
                tool_name: str,
      
        1737
                tool_args: dict,
      
        1738
                context: str,
      
        1739
            ) -> ConfidenceAssessment:
      
        1740
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        1741
        
        1742
            async def verify_action(
      
        1743
                tool_name: str,
      
        1744
                tool_args: dict,
      
        1745
                result: str,
      
        1746
                expected: str = "",
      
        1747
            ) -> ActionVerification:
      
        1748
                raise AssertionError("Verification should not run for this scenario")
      
        1749
        
        1750
            guide_root = temp_dir / "guides" / "nginx"
      
        1751
            chapters = guide_root / "chapters"
      
        1752
            guide_root.mkdir(parents=True)
      
        1753
            chapters.mkdir()
      
        1754
            index_path = guide_root / "index.html"
      
        1755
            chapter_one = chapters / "01-getting-started.html"
      
        1756
            chapter_two = chapters / "02-installation.html"
      
        1757
            index_path.write_text("<html></html>\n")
      
        1758
            chapter_one.write_text("<h1>One</h1>\n")
      
        1759
            chapter_two.write_text("<h1>Two</h1>\n")
      
        1760
        
        1761
            implementation_plan = temp_dir / "implementation.md"
      
        1762
            implementation_plan.write_text(
      
        1763
                "\n".join(
      
        1764
                    [
      
        1765
                        "# Implementation Plan",
      
        1766
                        "",
      
        1767
                        "## File Changes",
      
        1768
                        f"- `{guide_root}/`",
      
        1769
                        f"- `{chapters}/`",
      
        1770
                        f"- `{index_path}`",
      
        1771
                        f"- `{chapter_one}`",
      
        1772
                        f"- `{chapter_two}`",
      
        1773
                        "",
      
        1774
                    ]
      
        1775
                )
      
        1776
            )
      
        1777
        
        1778
            context = build_context(
      
        1779
                temp_dir=temp_dir,
      
        1780
                messages=[],
      
        1781
                safeguards=FakeSafeguards(),
      
        1782
                assess_confidence=assess_confidence,
      
        1783
                verify_action=verify_action,
      
        1784
                auto_recover=False,
      
        1785
            )
      
        1786
            persistent_messages: list[str] = []
      
        1787
            ephemeral_messages: list[str] = []
      
        1788
            context.queue_steering_message_callback = persistent_messages.append
      
        1789
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1790
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1791
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        1792
            dod.implementation_plan = str(implementation_plan)
      
        1793
            dod.pending_items = [
      
        1794
                "Create 07-performance-tuning.html",
      
        1795
                "Verify all guide files are linked and complete",
      
        1796
                "Complete the requested work",
      
        1797
            ]
      
        1798
        
        1799
            tool_call = ToolCall(
      
        1800
                id="read-dup",
      
        1801
                name="read",
      
        1802
                arguments={"file_path": str(chapter_one)},
      
        1803
            )
      
        1804
            duplicate_message = (
      
        1805
                "[Skipped - duplicate action: Already read "
      
        1806
                f"{chapter_one} recently without any intervening changes; "
      
        1807
                "reuse the earlier read result instead of rereading]"
      
        1808
            )
      
        1809
            executor = FakeExecutor(
      
        1810
                [
      
        1811
                    ToolExecutionOutcome(
      
        1812
                        tool_call=tool_call,
      
        1813
                        state=ToolExecutionState.DUPLICATE,
      
        1814
                        message=Message.tool_result_message(
      
        1815
                            tool_call_id=tool_call.id,
      
        1816
                            display_content=duplicate_message,
      
        1817
                            result_content=duplicate_message,
      
        1818
                        ),
      
        1819
                        event_content=duplicate_message,
      
        1820
                        is_error=False,
      
        1821
                        result_output=duplicate_message,
      
        1822
                    )
      
        1823
                ]
      
        1824
            )
      
        1825
        
        1826
            summary = TurnSummary(final_response="")
      
        1827
            await runner.execute_batch(
      
        1828
                tool_calls=[tool_call],
      
        1829
                tool_source="assistant",
      
        1830
                pending_tool_calls_seen=set(),
      
        1831
                emit=_noop_emit,
      
        1832
                summary=summary,
      
        1833
                dod=dod,
      
        1834
                executor=executor,  # type: ignore[arg-type]
      
        1835
                on_confirmation=None,
      
        1836
                on_user_question=None,
      
        1837
                emit_confirmation=None,
      
        1838
                consecutive_errors=0,
      
        1839
            )
      
        1840
        
        1841
            assert len(persistent_messages) == 1
      
        1842
            assert "Verify all guide files are linked and complete" in persistent_messages[0]
      
        1843
            assert "Create 07-performance-tuning.html" not in persistent_messages[0]
      
        1844
            assert ephemeral_messages == []
      
        1845
        
        1846
        
        1847
        @pytest.mark.asyncio
      
        1848
        async def test_tool_batch_runner_duplicate_read_after_plan_complete_pushes_verification_handoff(
      
        1849
            temp_dir: Path,
      
        1850
        ) -> None:
      
        1851
            async def assess_confidence(
      
        1852
                tool_name: str,
      
        1853
                tool_args: dict,
      
        1854
                context: str,
      
        1855
            ) -> ConfidenceAssessment:
      
        1856
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        1857
        
        1858
            async def verify_action(
      
        1859
                tool_name: str,
      
        1860
                tool_args: dict,
      
        1861
                result: str,
      
        1862
                expected: str = "",
      
        1863
            ) -> ActionVerification:
      
        1864
                raise AssertionError("Verification should not run for this scenario")
      
        1865
        
        1866
            guide_root = temp_dir / "guides" / "nginx"
      
        1867
            chapters = guide_root / "chapters"
      
        1868
            guide_root.mkdir(parents=True)
      
        1869
            chapters.mkdir()
      
        1870
            index_path = guide_root / "index.html"
      
        1871
            chapter_one = chapters / "01-getting-started.html"
      
        1872
            chapter_two = chapters / "02-installation.html"
      
        1873
            index_path.write_text("<html></html>\n")
      
        1874
            chapter_one.write_text("<h1>One</h1>\n")
      
        1875
            chapter_two.write_text("<h1>Two</h1>\n")
      
        1876
        
        1877
            implementation_plan = temp_dir / "implementation.md"
      
        1878
            implementation_plan.write_text(
      
        1879
                "\n".join(
      
        1880
                    [
      
        1881
                        "# Implementation Plan",
      
        1882
                        "",
      
        1883
                        "## File Changes",
      
        1884
                        f"- `{guide_root}/`",
      
        1885
                        f"- `{chapters}/`",
      
        1886
                        f"- `{index_path}`",
      
        1887
                        f"- `{chapter_one}`",
      
        1888
                        f"- `{chapter_two}`",
      
        1889
                        "",
      
        1890
                    ]
      
        1891
                )
      
        1892
            )
      
        1893
        
        1894
            context = build_context(
      
        1895
                temp_dir=temp_dir,
      
        1896
                messages=[],
      
        1897
                safeguards=FakeSafeguards(),
      
        1898
                assess_confidence=assess_confidence,
      
        1899
                verify_action=verify_action,
      
        1900
                auto_recover=False,
      
        1901
            )
      
        1902
            persistent_messages: list[str] = []
      
        1903
            ephemeral_messages: list[str] = []
      
        1904
            context.queue_steering_message_callback = persistent_messages.append
      
        1905
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        1906
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        1907
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        1908
            dod.implementation_plan = str(implementation_plan)
      
        1909
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        1910
            dod.pending_items = [
      
        1911
                "Create 07-performance-tuning.html",
      
        1912
                "Complete the requested work",
      
        1913
            ]
      
        1914
        
        1915
            tool_call = ToolCall(
      
        1916
                id="read-dup",
      
        1917
                name="read",
      
        1918
                arguments={"file_path": str(chapter_one)},
      
        1919
            )
      
        1920
            duplicate_message = (
      
        1921
                "[Skipped - duplicate action: Already read "
      
        1922
                f"{chapter_one} recently without any intervening changes; "
      
        1923
                "reuse the earlier read result instead of rereading]"
      
        1924
            )
      
        1925
            executor = FakeExecutor(
      
        1926
                [
      
        1927
                    ToolExecutionOutcome(
      
        1928
                        tool_call=tool_call,
      
        1929
                        state=ToolExecutionState.DUPLICATE,
      
        1930
                        message=Message.tool_result_message(
      
        1931
                            tool_call_id=tool_call.id,
      
        1932
                            display_content=duplicate_message,
      
        1933
                            result_content=duplicate_message,
      
        1934
                        ),
      
        1935
                        event_content=duplicate_message,
      
        1936
                        is_error=False,
      
        1937
                        result_output=duplicate_message,
      
        1938
                    )
      
        1939
                ]
      
        1940
            )
      
        1941
        
        1942
            summary = TurnSummary(final_response="")
      
        1943
            await runner.execute_batch(
      
        1944
                tool_calls=[tool_call],
      
        1945
                tool_source="assistant",
      
        1946
                pending_tool_calls_seen=set(),
      
        1947
                emit=_noop_emit,
      
        1948
                summary=summary,
      
        1949
                dod=dod,
      
        1950
                executor=executor,  # type: ignore[arg-type]
      
        1951
                on_confirmation=None,
      
        1952
                on_user_question=None,
      
        1953
                emit_confirmation=None,
      
        1954
                consecutive_errors=0,
      
        1955
            )
      
        1956
        
        1957
            assert len(persistent_messages) == 1
      
        1958
            assert "All explicitly planned artifacts already exist on disk." in persistent_messages[0]
      
        1959
            assert (
      
        1960
                "Move to verification or final confirmation using the files already on disk."
      
        1961
                in persistent_messages[0]
      
        1962
            )
      
        1963
            assert "Create 07-performance-tuning.html" not in persistent_messages[0]
      
        1964
            assert ephemeral_messages == []
      
        1965
        
        1966
        
        1967
        @pytest.mark.asyncio
      
        1968
        async def test_tool_batch_runner_duplicate_read_after_plan_complete_ignores_stale_creation_todos(
      
        1969
            temp_dir: Path,
      
        1970
        ) -> None:
      
        1971
            async def assess_confidence(
      
        1972
                tool_name: str,
      
        1973
                tool_args: dict,
      
        1974
                context: str,
      
        1975
            ) -> ConfidenceAssessment:
      
        1976
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        1977
        
        1978
            async def verify_action(
      
        1979
                tool_name: str,
      
        1980
                tool_args: dict,
      
        1981
                result: str,
      
        1982
                expected: str = "",
      
        1983
            ) -> ActionVerification:
      
        1984
                raise AssertionError("Verification should not run for this scenario")
      
        1985
        
        1986
            guide_root = temp_dir / "guides" / "nginx"
      
        1987
            chapters = guide_root / "chapters"
      
        1988
            guide_root.mkdir(parents=True)
      
        1989
            chapters.mkdir()
      
        1990
            index_path = guide_root / "index.html"
      
        1991
            chapter_one = chapters / "01-getting-started.html"
      
        1992
            chapter_two = chapters / "02-installation.html"
      
        1993
            index_path.write_text("<html></html>\n")
      
        1994
            chapter_one.write_text("<h1>One</h1>\n")
      
        1995
            chapter_two.write_text("<h1>Two</h1>\n")
      
        1996
        
        1997
            implementation_plan = temp_dir / "implementation.md"
      
        1998
            implementation_plan.write_text(
      
        1999
                "\n".join(
      
        2000
                    [
      
        2001
                        "# Implementation Plan",
      
        2002
                        "",
      
        2003
                        "## File Changes",
      
        2004
                        f"- `{guide_root}/`",
      
        2005
                        f"- `{chapters}/`",
      
        2006
                        f"- `{index_path}`",
      
        2007
                        f"- `{chapter_one}`",
      
        2008
                        f"- `{chapter_two}`",
      
        2009
                        "",
      
        2010
                    ]
      
        2011
                )
      
        2012
            )
      
        2013
        
        2014
            context = build_context(
      
        2015
                temp_dir=temp_dir,
      
        2016
                messages=[],
      
        2017
                safeguards=FakeSafeguards(),
      
        2018
                assess_confidence=assess_confidence,
      
        2019
                verify_action=verify_action,
      
        2020
                auto_recover=False,
      
        2021
            )
      
        2022
            persistent_messages: list[str] = []
      
        2023
            ephemeral_messages: list[str] = []
      
        2024
            context.queue_steering_message_callback = persistent_messages.append
      
        2025
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2026
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2027
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2028
            dod.implementation_plan = str(implementation_plan)
      
        2029
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        2030
            dod.pending_items = [
      
        2031
                "Create 01-getting-started.html",
      
        2032
                "Creating 02-installation.html",
      
        2033
                "Complete the requested work",
      
        2034
            ]
      
        2035
        
        2036
            tool_call = ToolCall(
      
        2037
                id="read-dup-built-stale",
      
        2038
                name="read",
      
        2039
                arguments={"file_path": str(chapter_one)},
      
        2040
            )
      
        2041
            duplicate_message = (
      
        2042
                "[Skipped - duplicate action: Already read "
      
        2043
                f"{chapter_one} recently without any intervening changes; "
      
        2044
                "reuse the earlier read result instead of rereading]"
      
        2045
            )
      
        2046
            executor = FakeExecutor(
      
        2047
                [
      
        2048
                    ToolExecutionOutcome(
      
        2049
                        tool_call=tool_call,
      
        2050
                        state=ToolExecutionState.DUPLICATE,
      
        2051
                        message=Message.tool_result_message(
      
        2052
                            tool_call_id=tool_call.id,
      
        2053
                            display_content=duplicate_message,
      
        2054
                            result_content=duplicate_message,
      
        2055
                        ),
      
        2056
                        event_content=duplicate_message,
      
        2057
                        is_error=False,
      
        2058
                        result_output=duplicate_message,
      
        2059
                    )
      
        2060
                ]
      
        2061
            )
      
        2062
        
        2063
            summary = TurnSummary(final_response="")
      
        2064
            await runner.execute_batch(
      
        2065
                tool_calls=[tool_call],
      
        2066
                tool_source="assistant",
      
        2067
                pending_tool_calls_seen=set(),
      
        2068
                emit=_noop_emit,
      
        2069
                summary=summary,
      
        2070
                dod=dod,
      
        2071
                executor=executor,  # type: ignore[arg-type]
      
        2072
                on_confirmation=None,
      
        2073
                on_user_question=None,
      
        2074
                emit_confirmation=None,
      
        2075
                consecutive_errors=0,
      
        2076
            )
      
        2077
        
        2078
            assert len(persistent_messages) == 1
      
        2079
            assert "All explicitly planned artifacts already exist on disk." in persistent_messages[0]
      
        2080
            assert (
      
        2081
                "Move to verification or final confirmation using the files already on disk."
      
        2082
                in persistent_messages[0]
      
        2083
            )
      
        2084
            assert "Create 01-getting-started.html" not in persistent_messages[0]
      
        2085
            assert "Creating 02-installation.html" not in persistent_messages[0]
      
        2086
            assert ephemeral_messages == []
      
        2087
        
        2088
        
        2089
        @pytest.mark.asyncio
      
        2090
        async def test_tool_batch_runner_successful_read_after_plan_complete_pushes_review_handoff(
      
        2091
            temp_dir: Path,
      
        2092
        ) -> None:
      
        2093
            async def assess_confidence(
      
        2094
                tool_name: str,
      
        2095
                tool_args: dict,
      
        2096
                context: str,
      
        2097
            ) -> ConfidenceAssessment:
      
        2098
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        2099
        
        2100
            async def verify_action(
      
        2101
                tool_name: str,
      
        2102
                tool_args: dict,
      
        2103
                result: str,
      
        2104
                expected: str = "",
      
        2105
            ) -> ActionVerification:
      
        2106
                raise AssertionError("Verification should not run for this scenario")
      
        2107
        
        2108
            guide_root = temp_dir / "guides" / "nginx"
      
        2109
            chapters = guide_root / "chapters"
      
        2110
            guide_root.mkdir(parents=True)
      
        2111
            chapters.mkdir()
      
        2112
            index_path = guide_root / "index.html"
      
        2113
            chapter_one = chapters / "01-getting-started.html"
      
        2114
            chapter_two = chapters / "02-installation.html"
      
        2115
            index_path.write_text("<html></html>\n")
      
        2116
            chapter_one.write_text("<h1>One</h1>\n")
      
        2117
            chapter_two.write_text("<h1>Two</h1>\n")
      
        2118
        
        2119
            implementation_plan = temp_dir / "implementation.md"
      
        2120
            implementation_plan.write_text(
      
        2121
                "\n".join(
      
        2122
                    [
      
        2123
                        "# Implementation Plan",
      
        2124
                        "",
      
        2125
                        "## File Changes",
      
        2126
                        f"- `{guide_root}/`",
      
        2127
                        f"- `{chapters}/`",
      
        2128
                        f"- `{index_path}`",
      
        2129
                        f"- `{chapter_one}`",
      
        2130
                        f"- `{chapter_two}`",
      
        2131
                        "",
      
        2132
                    ]
      
        2133
                )
      
        2134
            )
      
        2135
        
        2136
            context = build_context(
      
        2137
                temp_dir=temp_dir,
      
        2138
                messages=[],
      
        2139
                safeguards=FakeSafeguards(),
      
        2140
                assess_confidence=assess_confidence,
      
        2141
                verify_action=verify_action,
      
        2142
                auto_recover=False,
      
        2143
            )
      
        2144
            persistent_messages: list[str] = []
      
        2145
            ephemeral_messages: list[str] = []
      
        2146
            context.queue_steering_message_callback = persistent_messages.append
      
        2147
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2148
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2149
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2150
            dod.implementation_plan = str(implementation_plan)
      
        2151
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        2152
            sync_todos_to_definition_of_done(
      
        2153
                dod,
      
        2154
                [
      
        2155
                    {
      
        2156
                        "content": "Create 01-getting-started.html",
      
        2157
                        "active_form": "Creating 01-getting-started.html",
      
        2158
                        "status": "pending",
      
        2159
                    },
      
        2160
                    {
      
        2161
                        "content": "Ensure all files are properly linked and formatted consistently",
      
        2162
                        "active_form": "Reviewing guide consistency and linkage",
      
        2163
                        "status": "pending",
      
        2164
                    },
      
        2165
                ],
      
        2166
            )
      
        2167
        
        2168
            tool_call = ToolCall(
      
        2169
                id="read-built-review",
      
        2170
                name="read",
      
        2171
                arguments={"file_path": str(chapter_one)},
      
        2172
            )
      
        2173
            executor = FakeExecutor(
      
        2174
                [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)]
      
        2175
            )
      
        2176
        
        2177
            summary = TurnSummary(final_response="")
      
        2178
            await runner.execute_batch(
      
        2179
                tool_calls=[tool_call],
      
        2180
                tool_source="assistant",
      
        2181
                pending_tool_calls_seen=set(),
      
        2182
                emit=_noop_emit,
      
        2183
                summary=summary,
      
        2184
                dod=dod,
      
        2185
                executor=executor,  # type: ignore[arg-type]
      
        2186
                on_confirmation=None,
      
        2187
                on_user_question=None,
      
        2188
                emit_confirmation=None,
      
        2189
                consecutive_errors=0,
      
        2190
            )
      
        2191
        
        2192
            assert persistent_messages == []
      
        2193
            assert len(ephemeral_messages) == 1
      
        2194
            message = ephemeral_messages[0]
      
        2195
            assert "All explicitly planned artifacts already exist." in message
      
        2196
            assert "Ensure all files are properly linked and formatted consistently" in message
      
        2197
            assert "Create 01-getting-started.html" not in message
      
        2198
            assert "do not keep broad-rereading the output set" in message
      
        2199
            assert "If no specific mismatch remains, move to verification now." in message
      
        2200
        
        2201
        
        2202
        @pytest.mark.asyncio
      
        2203
        async def test_tool_batch_runner_successful_read_after_plan_complete_switches_to_verify(
      
        2204
            temp_dir: Path,
      
        2205
        ) -> None:
      
        2206
            async def assess_confidence(
      
        2207
                tool_name: str,
      
        2208
                tool_args: dict,
      
        2209
                context: str,
      
        2210
            ) -> ConfidenceAssessment:
      
        2211
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        2212
        
        2213
            async def verify_action(
      
        2214
                tool_name: str,
      
        2215
                tool_args: dict,
      
        2216
                result: str,
      
        2217
                expected: str = "",
      
        2218
            ) -> ActionVerification:
      
        2219
                raise AssertionError("Verification should not run for this scenario")
      
        2220
        
        2221
            guide_root = temp_dir / "guides" / "nginx"
      
        2222
            chapters = guide_root / "chapters"
      
        2223
            guide_root.mkdir(parents=True)
      
        2224
            chapters.mkdir()
      
        2225
            index_path = guide_root / "index.html"
      
        2226
            chapter_one = chapters / "01-getting-started.html"
      
        2227
            chapter_two = chapters / "02-installation.html"
      
        2228
            index_path.write_text("<html></html>\n")
      
        2229
            chapter_one.write_text("<h1>One</h1>\n")
      
        2230
            chapter_two.write_text("<h1>Two</h1>\n")
      
        2231
        
        2232
            implementation_plan = temp_dir / "implementation.md"
      
        2233
            implementation_plan.write_text(
      
        2234
                "\n".join(
      
        2235
                    [
      
        2236
                        "# Implementation Plan",
      
        2237
                        "",
      
        2238
                        "## File Changes",
      
        2239
                        f"- `{guide_root}/`",
      
        2240
                        f"- `{chapters}/`",
      
        2241
                        f"- `{index_path}`",
      
        2242
                        f"- `{chapter_one}`",
      
        2243
                        f"- `{chapter_two}`",
      
        2244
                        "",
      
        2245
                    ]
      
        2246
                )
      
        2247
            )
      
        2248
        
        2249
            context = build_context(
      
        2250
                temp_dir=temp_dir,
      
        2251
                messages=[],
      
        2252
                safeguards=FakeSafeguards(),
      
        2253
                assess_confidence=assess_confidence,
      
        2254
                verify_action=verify_action,
      
        2255
                auto_recover=False,
      
        2256
            )
      
        2257
            persistent_messages: list[str] = []
      
        2258
            ephemeral_messages: list[str] = []
      
        2259
            context.queue_steering_message_callback = persistent_messages.append
      
        2260
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2261
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2262
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2263
            dod.implementation_plan = str(implementation_plan)
      
        2264
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        2265
        
        2266
            tool_call = ToolCall(
      
        2267
                id="read-built-verify",
      
        2268
                name="read",
      
        2269
                arguments={"file_path": str(chapter_one)},
      
        2270
            )
      
        2271
            executor = FakeExecutor(
      
        2272
                [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)]
      
        2273
            )
      
        2274
        
        2275
            summary = TurnSummary(final_response="")
      
        2276
            await runner.execute_batch(
      
        2277
                tool_calls=[tool_call],
      
        2278
                tool_source="assistant",
      
        2279
                pending_tool_calls_seen=set(),
      
        2280
                emit=_noop_emit,
      
        2281
                summary=summary,
      
        2282
                dod=dod,
      
        2283
                executor=executor,  # type: ignore[arg-type]
      
        2284
                on_confirmation=None,
      
        2285
                on_user_question=None,
      
        2286
                emit_confirmation=None,
      
        2287
                consecutive_errors=0,
      
        2288
            )
      
        2289
        
        2290
            assert len(persistent_messages) == 1
      
        2291
            assert "All explicitly planned artifacts already exist." in persistent_messages[0]
      
        2292
            assert "Verification should run next." in persistent_messages[0]
      
        2293
            assert "stop broad rereads" in persistent_messages[0]
      
        2294
            assert ephemeral_messages == []
      
        2295
            assert context.workflow_mode == "verify"
      
        2296
        
        2297
        
        2298
        @pytest.mark.asyncio
      
        2299
        async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
      
        2300
            temp_dir: Path,
      
        2301
        ) -> None:
      
        2302
            async def assess_confidence(
      
        2303
                tool_name: str,
      
        2304
                tool_args: dict,
      
        2305
                context: str,
      
        2306
            ) -> ConfidenceAssessment:
      
        2307
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2308
        
        2309
            async def verify_action(
      
        2310
                tool_name: str,
      
        2311
                tool_args: dict,
      
        2312
                result: str,
      
        2313
                expected: str = "",
      
        2314
            ) -> ActionVerification:
      
        2315
                raise AssertionError("Verification should not run for this scenario")
      
        2316
        
        2317
            reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
      
        2318
            reference.parent.mkdir(parents=True)
      
        2319
            reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      
        2320
        
        2321
            context = build_context(
      
        2322
                temp_dir=temp_dir,
      
        2323
                messages=[],
      
        2324
                safeguards=FakeSafeguards(),
      
        2325
                assess_confidence=assess_confidence,
      
        2326
                verify_action=verify_action,
      
        2327
                auto_recover=False,
      
        2328
            )
      
        2329
            persistent_messages: list[str] = []
      
        2330
            ephemeral_messages: list[str] = []
      
        2331
            context.queue_steering_message_callback = persistent_messages.append
      
        2332
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2333
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2334
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2335
            sync_todos_to_definition_of_done(
      
        2336
                dod,
      
        2337
                [
      
        2338
                    {
      
        2339
                        "content": "Examine the existing Fortran guide structure to understand the cadence and format",
      
        2340
                        "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
      
        2341
                        "status": "pending",
      
        2342
                    },
      
        2343
                    {
      
        2344
                        "content": "Create the nginx index.html file",
      
        2345
                        "active_form": "Working on: Create the nginx index.html file",
      
        2346
                        "status": "pending",
      
        2347
                    },
      
        2348
                ],
      
        2349
            )
      
        2350
            tool_call = ToolCall(
      
        2351
                id="read-reference",
      
        2352
                name="read",
      
        2353
                arguments={"file_path": str(reference)},
      
        2354
            )
      
        2355
            executor = FakeExecutor(
      
        2356
                [
      
        2357
                    tool_outcome(
      
        2358
                        tool_call=tool_call,
      
        2359
                        output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
      
        2360
                        is_error=False,
      
        2361
                    )
      
        2362
                ]
      
        2363
            )
      
        2364
        
        2365
            summary = TurnSummary(final_response="")
      
        2366
            await runner.execute_batch(
      
        2367
                tool_calls=[tool_call],
      
        2368
                tool_source="assistant",
      
        2369
                pending_tool_calls_seen=set(),
      
        2370
                emit=_noop_emit,
      
        2371
                summary=summary,
      
        2372
                dod=dod,
      
        2373
                executor=executor,  # type: ignore[arg-type]
      
        2374
                on_confirmation=None,
      
        2375
                on_user_question=None,
      
        2376
                emit_confirmation=None,
      
        2377
                consecutive_errors=0,
      
        2378
            )
      
        2379
        
        2380
            assert any(
      
        2381
                "Continue with the next pending item: `Create the nginx index.html file`"
      
        2382
                in message
      
        2383
                for message in persistent_messages
      
        2384
            )
      
        2385
            assert any(
      
        2386
                "stop gathering more reference material and perform the change now" in message
      
        2387
                for message in persistent_messages
      
        2388
            )
      
        2389
            assert ephemeral_messages == []
      
        2390
        
        2391
        
        2392
        @pytest.mark.asyncio
      
        2393
        async def test_tool_batch_runner_discovery_completion_handoff_stays_persistent(
      
        2394
            temp_dir: Path,
      
        2395
        ) -> None:
      
        2396
            async def assess_confidence(
      
        2397
                tool_name: str,
      
        2398
                tool_args: dict,
      
        2399
                context: str,
      
        2400
            ) -> ConfidenceAssessment:
      
        2401
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2402
        
        2403
            async def verify_action(
      
        2404
                tool_name: str,
      
        2405
                tool_args: dict,
      
        2406
                result: str,
      
        2407
                expected: str = "",
      
        2408
            ) -> ActionVerification:
      
        2409
                raise AssertionError("Verification should not run for this scenario")
      
        2410
        
        2411
            reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
      
        2412
            reference.parent.mkdir(parents=True)
      
        2413
            reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
      
        2414
        
        2415
            context = build_context(
      
        2416
                temp_dir=temp_dir,
      
        2417
                messages=[],
      
        2418
                safeguards=FakeSafeguards(),
      
        2419
                assess_confidence=assess_confidence,
      
        2420
                verify_action=verify_action,
      
        2421
                auto_recover=False,
      
        2422
            )
      
        2423
            persistent_messages: list[str] = []
      
        2424
            ephemeral_messages: list[str] = []
      
        2425
            context.queue_steering_message_callback = persistent_messages.append
      
        2426
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2427
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2428
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2429
            sync_todos_to_definition_of_done(
      
        2430
                dod,
      
        2431
                [
      
        2432
                    {
      
        2433
                        "content": "First, examine the existing fortran guide structure and content",
      
        2434
                        "active_form": "Working on: First, examine the existing fortran guide structure and content",
      
        2435
                        "status": "pending",
      
        2436
                    },
      
        2437
                    {
      
        2438
                        "content": "Create the nginx directory structure",
      
        2439
                        "active_form": "Working on: Create the nginx directory structure",
      
        2440
                        "status": "pending",
      
        2441
                    },
      
        2442
                ],
      
        2443
            )
      
        2444
            tool_call = ToolCall(
      
        2445
                id="read-reference",
      
        2446
                name="read",
      
        2447
                arguments={"file_path": str(reference)},
      
        2448
            )
      
        2449
            executor = FakeExecutor(
      
        2450
                [
      
        2451
                    tool_outcome(
      
        2452
                        tool_call=tool_call,
      
        2453
                        output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
      
        2454
                        is_error=False,
      
        2455
                    )
      
        2456
                ]
      
        2457
            )
      
        2458
        
        2459
            summary = TurnSummary(final_response="")
      
        2460
            await runner.execute_batch(
      
        2461
                tool_calls=[tool_call],
      
        2462
                tool_source="assistant",
      
        2463
                pending_tool_calls_seen=set(),
      
        2464
                emit=_noop_emit,
      
        2465
                summary=summary,
      
        2466
                dod=dod,
      
        2467
                executor=executor,  # type: ignore[arg-type]
      
        2468
                on_confirmation=None,
      
        2469
                on_user_question=None,
      
        2470
                emit_confirmation=None,
      
        2471
                consecutive_errors=0,
      
        2472
            )
      
        2473
        
        2474
            assert persistent_messages
      
        2475
            assert any(
      
        2476
                "Continue with the next pending item: `Create the nginx directory structure`"
      
        2477
                in message
      
        2478
                for message in persistent_messages
      
        2479
            )
      
        2480
            assert ephemeral_messages == []
      
        2481
        
        2482
        
        2483
        @pytest.mark.asyncio
      
        2484
        async def test_tool_batch_runner_missing_artifact_nudge_names_next_file_after_setup_mkdir(
      
        2485
            temp_dir: Path,
      
        2486
        ) -> None:
      
        2487
            async def assess_confidence(
      
        2488
                tool_name: str,
      
        2489
                tool_args: dict,
      
        2490
                context: str,
      
        2491
            ) -> ConfidenceAssessment:
      
        2492
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2493
        
        2494
            async def verify_action(
      
        2495
                tool_name: str,
      
        2496
                tool_args: dict,
      
        2497
                result: str,
      
        2498
                expected: str = "",
      
        2499
            ) -> ActionVerification:
      
        2500
                raise AssertionError("Verification should not run for this scenario")
      
        2501
        
        2502
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        2503
            chapters = nginx_root / "chapters"
      
        2504
            implementation_plan = temp_dir / "implementation.md"
      
        2505
            implementation_plan.write_text(
      
        2506
                "\n".join(
      
        2507
                    [
      
        2508
                        "# Implementation Plan",
      
        2509
                        "",
      
        2510
                        "## File Changes",
      
        2511
                        f"- `{chapters}/`",
      
        2512
                        f"- `{nginx_root / 'index.html'}`",
      
        2513
                        "",
      
        2514
                    ]
      
        2515
                )
      
        2516
            )
      
        2517
        
        2518
            context = build_context(
      
        2519
                temp_dir=temp_dir,
      
        2520
                messages=[],
      
        2521
                safeguards=FakeSafeguards(),
      
        2522
                assess_confidence=assess_confidence,
      
        2523
                verify_action=verify_action,
      
        2524
                auto_recover=False,
      
        2525
            )
      
        2526
            persistent_messages: list[str] = []
      
        2527
            ephemeral_messages: list[str] = []
      
        2528
            context.queue_steering_message_callback = persistent_messages.append
      
        2529
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2530
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2531
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2532
            dod.implementation_plan = str(implementation_plan)
      
        2533
            sync_todos_to_definition_of_done(
      
        2534
                dod,
      
        2535
                [
      
        2536
                    {
      
        2537
                        "content": "Create the nginx directory structure",
      
        2538
                        "active_form": "Creating the nginx directory structure",
      
        2539
                        "status": "pending",
      
        2540
                    },
      
        2541
                    {
      
        2542
                        "content": "Develop the main index.html file with proper structure",
      
        2543
                        "active_form": "Developing the main index.html file with proper structure",
      
        2544
                        "status": "pending",
      
        2545
                    },
      
        2546
                ],
      
        2547
            )
      
        2548
        
        2549
            tool_call = ToolCall(
      
        2550
                id="mkdir-nginx",
      
        2551
                name="bash",
      
        2552
                arguments={"command": f"mkdir -p {chapters}"},
      
        2553
            )
      
        2554
            executor = FakeExecutor(
      
        2555
                [
      
        2556
                    tool_outcome(
      
        2557
                        tool_call=tool_call,
      
        2558
                        output="",
      
        2559
                        is_error=False,
      
        2560
                    )
      
        2561
                ]
      
        2562
            )
      
        2563
        
        2564
            summary = TurnSummary(final_response="")
      
        2565
            await runner.execute_batch(
      
        2566
                tool_calls=[tool_call],
      
        2567
                tool_source="assistant",
      
        2568
                pending_tool_calls_seen=set(),
      
        2569
                emit=_noop_emit,
      
        2570
                summary=summary,
      
        2571
                dod=dod,
      
        2572
                executor=executor,  # type: ignore[arg-type]
      
        2573
                on_confirmation=None,
      
        2574
                on_user_question=None,
      
        2575
                emit_confirmation=None,
      
        2576
                consecutive_errors=0,
      
        2577
            )
      
        2578
        
        2579
            assert persistent_messages
      
        2580
            message = persistent_messages[-1]
      
        2581
            assert "Directory setup is complete." in message
      
        2582
            assert "Next step: create `index.html`." in message
      
        2583
            assert "Write a compact but real initial version of that file now" in message
      
        2584
            assert ephemeral_messages == []
      
        2585
        
        2586
        
        2587
        @pytest.mark.asyncio
      
        2588
        async def test_tool_batch_runner_first_chapter_handoff_stays_persistent_until_substantive_output_exists(
      
        2589
            temp_dir: Path,
      
        2590
        ) -> None:
      
        2591
            async def assess_confidence(
      
        2592
                tool_name: str,
      
        2593
                tool_args: dict,
      
        2594
                context: str,
      
        2595
            ) -> ConfidenceAssessment:
      
        2596
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2597
        
        2598
            async def verify_action(
      
        2599
                tool_name: str,
      
        2600
                tool_args: dict,
      
        2601
                result: str,
      
        2602
                expected: str = "",
      
        2603
            ) -> ActionVerification:
      
        2604
                raise AssertionError("Verification should not run for this scenario")
      
        2605
        
        2606
            nginx_root = temp_dir / "guides" / "nginx"
      
        2607
            chapters = nginx_root / "chapters"
      
        2608
            chapters.mkdir(parents=True)
      
        2609
            index_path = nginx_root / "index.html"
      
        2610
        
        2611
            implementation_plan = temp_dir / "implementation.md"
      
        2612
            implementation_plan.write_text(
      
        2613
                "\n".join(
      
        2614
                    [
      
        2615
                        "# Implementation Plan",
      
        2616
                        "",
      
        2617
                        "## File Changes",
      
        2618
                        f"- `{chapters}/`",
      
        2619
                        f"- `{index_path}`",
      
        2620
                        f"- `{chapters / '01-introduction.html'}`",
      
        2621
                        "",
      
        2622
                    ]
      
        2623
                )
      
        2624
            )
      
        2625
        
        2626
            context = build_context(
      
        2627
                temp_dir=temp_dir,
      
        2628
                messages=[],
      
        2629
                safeguards=FakeSafeguards(),
      
        2630
                assess_confidence=assess_confidence,
      
        2631
                verify_action=verify_action,
      
        2632
                auto_recover=False,
      
        2633
            )
      
        2634
            persistent_messages: list[str] = []
      
        2635
            ephemeral_messages: list[str] = []
      
        2636
            context.queue_steering_message_callback = persistent_messages.append
      
        2637
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2638
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2639
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2640
            dod.implementation_plan = str(implementation_plan)
      
        2641
            sync_todos_to_definition_of_done(
      
        2642
                dod,
      
        2643
                [
      
        2644
                    {
      
        2645
                        "content": "Create the main index.html file with proper structure",
      
        2646
                        "active_form": "Creating the main index.html file with proper structure",
      
        2647
                        "status": "pending",
      
        2648
                    },
      
        2649
                    {
      
        2650
                        "content": "Create each chapter file with appropriate content",
      
        2651
                        "active_form": "Creating each chapter file with appropriate content",
      
        2652
                        "status": "pending",
      
        2653
                    },
      
        2654
                ],
      
        2655
            )
      
        2656
        
        2657
            tool_call = ToolCall(
      
        2658
                id="write-index",
      
        2659
                name="write",
      
        2660
                arguments={
      
        2661
                    "file_path": str(index_path),
      
        2662
                    "content": "<html></html>\n",
      
        2663
                },
      
        2664
            )
      
        2665
            executor = FakeExecutor(
      
        2666
                [
      
        2667
                    tool_outcome(
      
        2668
                        tool_call=tool_call,
      
        2669
                        output=f"Successfully wrote 14 bytes to {index_path}",
      
        2670
                        is_error=False,
      
        2671
                    )
      
        2672
                ]
      
        2673
            )
      
        2674
        
        2675
            summary = TurnSummary(final_response="")
      
        2676
            await runner.execute_batch(
      
        2677
                tool_calls=[tool_call],
      
        2678
                tool_source="assistant",
      
        2679
                pending_tool_calls_seen=set(),
      
        2680
                emit=_noop_emit,
      
        2681
                summary=summary,
      
        2682
                dod=dod,
      
        2683
                executor=executor,  # type: ignore[arg-type]
      
        2684
                on_confirmation=None,
      
        2685
                on_user_question=None,
      
        2686
                emit_confirmation=None,
      
        2687
                consecutive_errors=0,
      
        2688
            )
      
        2689
        
        2690
            assert persistent_messages
      
        2691
            assert ephemeral_messages == []
      
        2692
            message = persistent_messages[-1]
      
        2693
            assert "Confirmed progress:" in message
      
        2694
            assert "Next step: create `01-introduction.html`." in message
      
        2695
            assert (
      
        2696
                f"Prefer one `write(file_path=..., content=...)` call for `{(chapters / '01-introduction.html').resolve(strict=False)}` now."
      
        2697
                in message
      
        2698
            )
      
        2699
            assert "Write a compact but real initial version of that file now" not in message
      
        2700
            assert "Do not reread reference material or spend the next turn on bookkeeping." in message
      
        2701
        
        2702
        
        2703
        @pytest.mark.asyncio
      
        2704
        async def test_tool_batch_runner_directory_handoff_uses_home_relative_path(
      
        2705
            temp_dir: Path,
      
        2706
            monkeypatch: pytest.MonkeyPatch,
      
        2707
        ) -> None:
      
        2708
            monkeypatch.setenv("HOME", str(temp_dir.resolve(strict=False)))
      
        2709
        
        2710
            async def assess_confidence(
      
        2711
                tool_name: str,
      
        2712
                tool_args: dict,
      
        2713
                context: str,
      
        2714
            ) -> ConfidenceAssessment:
      
        2715
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2716
        
        2717
            async def verify_action(
      
        2718
                tool_name: str,
      
        2719
                tool_args: dict,
      
        2720
                result: str,
      
        2721
                expected: str = "",
      
        2722
            ) -> ActionVerification:
      
        2723
                raise AssertionError("Verification should not run for this scenario")
      
        2724
        
        2725
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        2726
            chapters = nginx_root / "chapters"
      
        2727
            index_path = nginx_root / "index.html"
      
        2728
        
        2729
            implementation_plan = temp_dir / "implementation.md"
      
        2730
            implementation_plan.write_text(
      
        2731
                "\n".join(
      
        2732
                    [
      
        2733
                        "# Implementation Plan",
      
        2734
                        "",
      
        2735
                        "## File Changes",
      
        2736
                        f"- `{chapters}/`",
      
        2737
                        f"- `{index_path}`",
      
        2738
                        "",
      
        2739
                    ]
      
        2740
                )
      
        2741
            )
      
        2742
        
        2743
            context = build_context(
      
        2744
                temp_dir=temp_dir,
      
        2745
                messages=[],
      
        2746
                safeguards=FakeSafeguards(),
      
        2747
                assess_confidence=assess_confidence,
      
        2748
                verify_action=verify_action,
      
        2749
                auto_recover=False,
      
        2750
            )
      
        2751
            persistent_messages: list[str] = []
      
        2752
            context.queue_steering_message_callback = persistent_messages.append
      
        2753
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2754
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2755
            dod.implementation_plan = str(implementation_plan)
      
        2756
            sync_todos_to_definition_of_done(
      
        2757
                dod,
      
        2758
                [
      
        2759
                    {
      
        2760
                        "content": "Create the nginx directory structure",
      
        2761
                        "active_form": "Creating the nginx directory structure",
      
        2762
                        "status": "pending",
      
        2763
                    },
      
        2764
                    {
      
        2765
                        "content": "Develop the main index.html file with proper structure",
      
        2766
                        "active_form": "Developing the main index.html file with proper structure",
      
        2767
                        "status": "pending",
      
        2768
                    },
      
        2769
                ],
      
        2770
            )
      
        2771
        
        2772
            tool_call = ToolCall(
      
        2773
                id="mkdir-nginx-home",
      
        2774
                name="bash",
      
        2775
                arguments={"command": f"mkdir -p {chapters}"},
      
        2776
            )
      
        2777
            executor = FakeExecutor(
      
        2778
                [
      
        2779
                    tool_outcome(
      
        2780
                        tool_call=tool_call,
      
        2781
                        output="",
      
        2782
                        is_error=False,
      
        2783
                    )
      
        2784
                ]
      
        2785
            )
      
        2786
        
        2787
            summary = TurnSummary(final_response="")
      
        2788
            await runner.execute_batch(
      
        2789
                tool_calls=[tool_call],
      
        2790
                tool_source="assistant",
      
        2791
                pending_tool_calls_seen=set(),
      
        2792
                emit=_noop_emit,
      
        2793
                summary=summary,
      
        2794
                dod=dod,
      
        2795
                executor=executor,  # type: ignore[arg-type]
      
        2796
                on_confirmation=None,
      
        2797
                on_user_question=None,
      
        2798
                emit_confirmation=None,
      
        2799
                consecutive_errors=0,
      
        2800
            )
      
        2801
        
        2802
            assert persistent_messages
      
        2803
            message = persistent_messages[-1]
      
        2804
            assert "Next step: create `index.html`." in message
      
        2805
            assert "`~/Loader/guides/nginx/index.html`" in message
      
        2806
            assert "Write a compact but real initial version of that file now" in message
      
        2807
        
        2808
        
        2809
        @pytest.mark.asyncio
      
        2810
        async def test_tool_batch_runner_redirects_post_write_self_audit_to_next_missing_artifact(
      
        2811
            temp_dir: Path,
      
        2812
        ) -> None:
      
        2813
            async def assess_confidence(
      
        2814
                tool_name: str,
      
        2815
                tool_args: dict,
      
        2816
                context: str,
      
        2817
            ) -> ConfidenceAssessment:
      
        2818
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        2819
        
        2820
            async def verify_action(
      
        2821
                tool_name: str,
      
        2822
                tool_args: dict,
      
        2823
                result: str,
      
        2824
                expected: str = "",
      
        2825
            ) -> ActionVerification:
      
        2826
                raise AssertionError("Verification should not run in this scenario")
      
        2827
        
        2828
            nginx_root = temp_dir / "guides" / "nginx"
      
        2829
            chapters = nginx_root / "chapters"
      
        2830
            chapters.mkdir(parents=True)
      
        2831
            index_path = nginx_root / "index.html"
      
        2832
            index_path.write_text(
      
        2833
                "\n".join(
      
        2834
                    [
      
        2835
                        "<html>",
      
        2836
                        '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>',
      
        2837
                        '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
      
        2838
                        "</html>",
      
        2839
                    ]
      
        2840
                )
      
        2841
                + "\n"
      
        2842
            )
      
        2843
        
        2844
            implementation_plan = temp_dir / "implementation.md"
      
        2845
            implementation_plan.write_text(
      
        2846
                "\n".join(
      
        2847
                    [
      
        2848
                        "# Implementation Plan",
      
        2849
                        "",
      
        2850
                        "## File Changes",
      
        2851
                        f"- `{nginx_root}/`",
      
        2852
                        f"- `{chapters}/`",
      
        2853
                        f"- `{index_path}`",
      
        2854
                        f"- `{chapters / '01-introduction.html'}`",
      
        2855
                        "",
      
        2856
                    ]
      
        2857
                )
      
        2858
            )
      
        2859
        
        2860
            context = build_context(
      
        2861
                temp_dir=temp_dir,
      
        2862
                messages=[],
      
        2863
                safeguards=FakeSafeguards(),
      
        2864
                assess_confidence=assess_confidence,
      
        2865
                verify_action=verify_action,
      
        2866
                auto_recover=False,
      
        2867
            )
      
        2868
            persistent_messages: list[str] = []
      
        2869
            ephemeral_messages: list[str] = []
      
        2870
            context.queue_steering_message_callback = persistent_messages.append
      
        2871
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2872
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2873
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2874
            dod.implementation_plan = str(implementation_plan)
      
        2875
            dod.touched_files.append(str(index_path))
      
        2876
            dod.completed_items.append("Develop the main index.html file for the nginx guide")
      
        2877
            dod.pending_items.append("Create chapter files for the nginx guide")
      
        2878
        
        2879
            tool_call = ToolCall(
      
        2880
                id="read-index-self-audit",
      
        2881
                name="read",
      
        2882
                arguments={"file_path": str(index_path)},
      
        2883
            )
      
        2884
            executor = FakeExecutor(
      
        2885
                [
      
        2886
                    tool_outcome(
      
        2887
                        tool_call=tool_call,
      
        2888
                        output="1\t<html>\n",
      
        2889
                        is_error=False,
      
        2890
                    )
      
        2891
                ]
      
        2892
            )
      
        2893
        
        2894
            summary = TurnSummary(final_response="")
      
        2895
            await runner.execute_batch(
      
        2896
                tool_calls=[tool_call],
      
        2897
                tool_source="assistant",
      
        2898
                pending_tool_calls_seen=set(),
      
        2899
                emit=_noop_emit,
      
        2900
                summary=summary,
      
        2901
                dod=dod,
      
        2902
                executor=executor,  # type: ignore[arg-type]
      
        2903
                on_confirmation=None,
      
        2904
                on_user_question=None,
      
        2905
                emit_confirmation=None,
      
        2906
                consecutive_errors=0,
      
        2907
            )
      
        2908
        
        2909
            assert persistent_messages
      
        2910
            message = persistent_messages[-1]
      
        2911
            assert "You already have the current contents of `index.html` from the successful write." in message
      
        2912
            assert "Resume by creating `01-introduction.html` now." in message
      
        2913
            assert "Do not spend another turn rereading the file you just wrote or on TodoWrite alone." in message
      
        2914
            assert ephemeral_messages == []
      
        2915
        
        2916
        
        2917
        @pytest.mark.asyncio
      
        2918
        async def test_tool_batch_runner_preserves_first_file_handoff_after_recovery_prompt(
      
        2919
            temp_dir: Path,
      
        2920
        ) -> None:
      
        2921
            async def assess_confidence(
      
        2922
                tool_name: str,
      
        2923
                tool_args: dict,
      
        2924
                context: str,
      
        2925
            ) -> ConfidenceAssessment:
      
        2926
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        2927
        
        2928
            async def verify_action(
      
        2929
                tool_name: str,
      
        2930
                tool_args: dict,
      
        2931
                result: str,
      
        2932
                expected: str = "",
      
        2933
            ) -> ActionVerification:
      
        2934
                raise AssertionError("Verification should not run for this scenario")
      
        2935
        
        2936
            nginx_root = temp_dir / "guides" / "nginx"
      
        2937
            chapters = nginx_root / "chapters"
      
        2938
            chapters.mkdir(parents=True)
      
        2939
            index_path = nginx_root / "index.html"
      
        2940
        
        2941
            implementation_plan = temp_dir / "implementation.md"
      
        2942
            implementation_plan.write_text(
      
        2943
                "\n".join(
      
        2944
                    [
      
        2945
                        "# Implementation Plan",
      
        2946
                        "",
      
        2947
                        "## File Changes",
      
        2948
                        f"- `{chapters}/`",
      
        2949
                        f"- `{index_path}`",
      
        2950
                        f"- `{chapters / '01-introduction.html'}`",
      
        2951
                        "",
      
        2952
                    ]
      
        2953
                )
      
        2954
            )
      
        2955
        
        2956
            context = build_context(
      
        2957
                temp_dir=temp_dir,
      
        2958
                messages=[
      
        2959
                    Message(
      
        2960
                        role=Role.USER,
      
        2961
                        content=(
      
        2962
                            "[EMPTY ASSISTANT RESPONSE]\n"
      
        2963
                            "Respond with that concrete mutation tool call now. Do not return an empty response."
      
        2964
                        ),
      
        2965
                    )
      
        2966
                ],
      
        2967
                safeguards=FakeSafeguards(),
      
        2968
                assess_confidence=assess_confidence,
      
        2969
                verify_action=verify_action,
      
        2970
                auto_recover=False,
      
        2971
            )
      
        2972
            persistent_messages: list[str] = []
      
        2973
            ephemeral_messages: list[str] = []
      
        2974
            context.queue_steering_message_callback = persistent_messages.append
      
        2975
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        2976
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        2977
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        2978
            dod.implementation_plan = str(implementation_plan)
      
        2979
            sync_todos_to_definition_of_done(
      
        2980
                dod,
      
        2981
                [
      
        2982
                    {
      
        2983
                        "content": "Create the main index.html file with proper structure",
      
        2984
                        "active_form": "Creating the main index.html file with proper structure",
      
        2985
                        "status": "pending",
      
        2986
                    },
      
        2987
                    {
      
        2988
                        "content": "Create each chapter file with appropriate content",
      
        2989
                        "active_form": "Creating each chapter file with appropriate content",
      
        2990
                        "status": "pending",
      
        2991
                    },
      
        2992
                ],
      
        2993
            )
      
        2994
        
        2995
            tool_call = ToolCall(
      
        2996
                id="write-index-recovered",
      
        2997
                name="write",
      
        2998
                arguments={
      
        2999
                    "file_path": str(index_path),
      
        3000
                    "content": "<html></html>\n",
      
        3001
                },
      
        3002
            )
      
        3003
            executor = FakeExecutor(
      
        3004
                [
      
        3005
                    tool_outcome(
      
        3006
                        tool_call=tool_call,
      
        3007
                        output=f"Successfully wrote 14 bytes to {index_path}",
      
        3008
                        is_error=False,
      
        3009
                    )
      
        3010
                ]
      
        3011
            )
      
        3012
        
        3013
            summary = TurnSummary(final_response="")
      
        3014
            await runner.execute_batch(
      
        3015
                tool_calls=[tool_call],
      
        3016
                tool_source="assistant",
      
        3017
                pending_tool_calls_seen=set(),
      
        3018
                emit=_noop_emit,
      
        3019
                summary=summary,
      
        3020
                dod=dod,
      
        3021
                executor=executor,  # type: ignore[arg-type]
      
        3022
                on_confirmation=None,
      
        3023
                on_user_question=None,
      
        3024
                emit_confirmation=None,
      
        3025
                consecutive_errors=0,
      
        3026
            )
      
        3027
        
        3028
            assert persistent_messages
      
        3029
            assert ephemeral_messages == []
      
        3030
            message = persistent_messages[-1]
      
        3031
            assert "Next step: create `01-introduction.html`." in message
      
        3032
            assert "Write a compact but real initial version of that file now" not in message
      
        3033
        
        3034
        
        3035
        @pytest.mark.asyncio
      
        3036
        async def test_tool_batch_runner_todowrite_uses_concrete_output_language_for_aggregate_chapter_step(
      
        3037
            temp_dir: Path,
      
        3038
        ) -> None:
      
        3039
            async def assess_confidence(
      
        3040
                tool_name: str,
      
        3041
                tool_args: dict,
      
        3042
                context: str,
      
        3043
            ) -> ConfidenceAssessment:
      
        3044
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3045
        
        3046
            async def verify_action(
      
        3047
                tool_name: str,
      
        3048
                tool_args: dict,
      
        3049
                result: str,
      
        3050
                expected: str = "",
      
        3051
            ) -> ActionVerification:
      
        3052
                raise AssertionError("Verification should not run in this scenario")
      
        3053
        
        3054
            guide_root = temp_dir / "guides" / "nginx"
      
        3055
            chapters = guide_root / "chapters"
      
        3056
            chapters.mkdir(parents=True)
      
        3057
            index_path = guide_root / "index.html"
      
        3058
            index_path.write_text(
      
        3059
                "\n".join(
      
        3060
                    [
      
        3061
                        "<html>",
      
        3062
                        '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>',
      
        3063
                        '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
      
        3064
                        "</html>",
      
        3065
                    ]
      
        3066
                )
      
        3067
                + "\n"
      
        3068
            )
      
        3069
        
        3070
            implementation_plan = temp_dir / "implementation.md"
      
        3071
            implementation_plan.write_text(
      
        3072
                "\n".join(
      
        3073
                    [
      
        3074
                        "# Implementation Plan",
      
        3075
                        "",
      
        3076
                        "## File Changes",
      
        3077
                        f"- `{guide_root}/`",
      
        3078
                        f"- `{chapters}/`",
      
        3079
                        f"- `{index_path}`",
      
        3080
                        "",
      
        3081
                    ]
      
        3082
                )
      
        3083
            )
      
        3084
        
        3085
            context = build_context(
      
        3086
                temp_dir=temp_dir,
      
        3087
                messages=[],
      
        3088
                safeguards=FakeSafeguards(),
      
        3089
                assess_confidence=assess_confidence,
      
        3090
                verify_action=verify_action,
      
        3091
            )
      
        3092
            queued_messages: list[str] = []
      
        3093
            context.queue_steering_message_callback = queued_messages.append
      
        3094
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3095
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3096
            dod.implementation_plan = str(implementation_plan)
      
        3097
            dod.touched_files.append(str(index_path))
      
        3098
            sync_todos_to_definition_of_done(
      
        3099
                dod,
      
        3100
                [
      
        3101
                    {
      
        3102
                        "content": "Develop the main index.html file with proper structure",
      
        3103
                        "active_form": "Developing the main index.html file with proper structure",
      
        3104
                        "status": "completed",
      
        3105
                    },
      
        3106
                    {
      
        3107
                        "content": "Create chapter files with content and structure",
      
        3108
                        "active_form": "Creating chapter files with content and structure",
      
        3109
                        "status": "pending",
      
        3110
                    },
      
        3111
                ],
      
        3112
            )
      
        3113
        
        3114
            todos = [
      
        3115
                {
      
        3116
                    "content": "Develop the main index.html file with proper structure",
      
        3117
                    "active_form": "Developing the main index.html file with proper structure",
      
        3118
                    "status": "completed",
      
        3119
                },
      
        3120
                {
      
        3121
                    "content": "Create chapter files with content and structure",
      
        3122
                    "active_form": "Creating chapter files with content and structure",
      
        3123
                    "status": "pending",
      
        3124
                },
      
        3125
            ]
      
        3126
            tool_call = ToolCall(
      
        3127
                id="todo-aggregate",
      
        3128
                name="TodoWrite",
      
        3129
                arguments={"todos": todos},
      
        3130
            )
      
        3131
            executor = FakeExecutor(
      
        3132
                [
      
        3133
                    tool_outcome(
      
        3134
                        tool_call=tool_call,
      
        3135
                        output="Todos updated",
      
        3136
                        is_error=False,
      
        3137
                        metadata={"new_todos": todos},
      
        3138
                    )
      
        3139
                ]
      
        3140
            )
      
        3141
        
        3142
            summary = TurnSummary(final_response="")
      
        3143
            await runner.execute_batch(
      
        3144
                tool_calls=[tool_call],
      
        3145
                tool_source="assistant",
      
        3146
                pending_tool_calls_seen=set(),
      
        3147
                emit=_noop_emit,
      
        3148
                summary=summary,
      
        3149
                dod=dod,
      
        3150
                executor=executor,  # type: ignore[arg-type]
      
        3151
                on_confirmation=None,
      
        3152
                on_user_question=None,
      
        3153
                emit_confirmation=None,
      
        3154
                consecutive_errors=0,
      
        3155
            )
      
        3156
        
        3157
            assert queued_messages
      
        3158
            message = queued_messages[-1]
      
        3159
            assert "Todo tracking is updated." in message
      
        3160
            assert "Next step: create `01-introduction.html`." in message
      
        3161
            assert (
      
        3162
                "Continue with the next pending item: `Create chapter files with content and structure`."
      
        3163
                not in message
      
        3164
            )
      
        3165
        
        3166
        
        3167
        @pytest.mark.asyncio
      
        3168
        async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_review(
      
        3169
            temp_dir: Path,
      
        3170
        ) -> None:
      
        3171
            async def assess_confidence(
      
        3172
                tool_name: str,
      
        3173
                tool_args: dict,
      
        3174
                context: str,
      
        3175
            ) -> ConfidenceAssessment:
      
        3176
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        3177
        
        3178
            async def verify_action(
      
        3179
                tool_name: str,
      
        3180
                tool_args: dict,
      
        3181
                result: str,
      
        3182
                expected: str = "",
      
        3183
            ) -> ActionVerification:
      
        3184
                raise AssertionError("Verification should not run for this scenario")
      
        3185
        
        3186
            guide_root = temp_dir / "guides" / "nginx"
      
        3187
            chapters = guide_root / "chapters"
      
        3188
            chapters.mkdir(parents=True)
      
        3189
            index_path = guide_root / "index.html"
      
        3190
            chapter_one = chapters / "01-getting-started.html"
      
        3191
            chapter_one.write_text("<h1>One</h1>\n")
      
        3192
            index_path.write_text("<a href=\"chapters/01-getting-started.html\">One</a>\n")
      
        3193
        
        3194
            implementation_plan = temp_dir / "implementation.md"
      
        3195
            implementation_plan.write_text(
      
        3196
                "\n".join(
      
        3197
                    [
      
        3198
                        "# Implementation Plan",
      
        3199
                        "",
      
        3200
                        "## File Changes",
      
        3201
                        f"- `{index_path}`",
      
        3202
                        f"- `{chapter_one}`",
      
        3203
                        f"- `{chapters / '06-ssl-configuration.html'}`",
      
        3204
                        "",
      
        3205
                    ]
      
        3206
                )
      
        3207
            )
      
        3208
        
        3209
            context = build_context(
      
        3210
                temp_dir=temp_dir,
      
        3211
                messages=[],
      
        3212
                safeguards=FakeSafeguards(),
      
        3213
                assess_confidence=assess_confidence,
      
        3214
                verify_action=verify_action,
      
        3215
                auto_recover=False,
      
        3216
            )
      
        3217
            persistent_messages: list[str] = []
      
        3218
            ephemeral_messages: list[str] = []
      
        3219
            context.queue_steering_message_callback = persistent_messages.append
      
        3220
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3221
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3222
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3223
            dod.implementation_plan = str(implementation_plan)
      
        3224
            sync_todos_to_definition_of_done(
      
        3225
                dod,
      
        3226
                [
      
        3227
                    {
      
        3228
                        "content": "Ensure all files are properly linked and formatted consistently",
      
        3229
                        "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
      
        3230
                        "status": "pending",
      
        3231
                    },
      
        3232
                    {
      
        3233
                        "content": "Create the final chapter (06-ssl-configuration.html)",
      
        3234
                        "active_form": "Working on: Create the final chapter (06-ssl-configuration.html)",
      
        3235
                        "status": "pending",
      
        3236
                    },
      
        3237
                ],
      
        3238
            )
      
        3239
            assert tool_batches_should_prioritize_missing_artifact(
      
        3240
                dod=dod,
      
        3241
                next_pending=dod.pending_items[0],
      
        3242
                missing_artifact=(chapters / "06-ssl-configuration.html", False),
      
        3243
                project_root=temp_dir,
      
        3244
            )
      
        3245
        
        3246
            tool_call = ToolCall(
      
        3247
                id="dup-read",
      
        3248
                name="read",
      
        3249
                arguments={"file_path": str(index_path)},
      
        3250
            )
      
        3251
            runner._queue_duplicate_observation_nudge(tool_call, dod=dod)  # type: ignore[attr-defined]
      
        3252
        
        3253
            assert persistent_messages
      
        3254
            message = persistent_messages[-1]
      
        3255
            assert "06-ssl-configuration.html" in message
      
        3256
            assert "Do not switch into review or consistency-check mode" in message
      
        3257
            assert (
      
        3258
                "Continue with the next pending item: `Ensure all files are properly linked and formatted consistently`"
      
        3259
                not in message
      
        3260
            )
      
        3261
        
        3262
        
        3263
        @pytest.mark.asyncio
      
        3264
        async def test_tool_batch_runner_hands_off_to_verification_once_planned_artifacts_exist(
      
        3265
            temp_dir: Path,
      
        3266
        ) -> None:
      
        3267
            async def assess_confidence(
      
        3268
                tool_name: str,
      
        3269
                tool_args: dict,
      
        3270
                context: str,
      
        3271
            ) -> ConfidenceAssessment:
      
        3272
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        3273
        
        3274
            async def verify_action(
      
        3275
                tool_name: str,
      
        3276
                tool_args: dict,
      
        3277
                result: str,
      
        3278
                expected: str = "",
      
        3279
            ) -> ActionVerification:
      
        3280
                raise AssertionError("Verification should not run for this scenario")
      
        3281
        
        3282
            guide_root = temp_dir / "guides" / "nginx"
      
        3283
            chapters = guide_root / "chapters"
      
        3284
            chapters.mkdir(parents=True)
      
        3285
            index_path = guide_root / "index.html"
      
        3286
            chapter_one = chapters / "01-getting-started.html"
      
        3287
            chapter_two = chapters / "02-installation.html"
      
        3288
            index_path.write_text("<a href=\"chapters/01-getting-started.html\">One</a>\n")
      
        3289
            chapter_one.write_text("<h1>One</h1>\n")
      
        3290
            chapter_two.write_text("<h1>Two</h1>\n")
      
        3291
        
        3292
            implementation_plan = temp_dir / "implementation.md"
      
        3293
            implementation_plan.write_text(
      
        3294
                "\n".join(
      
        3295
                    [
      
        3296
                        "# Implementation Plan",
      
        3297
                        "",
      
        3298
                        "## File Changes",
      
        3299
                        f"- `{chapters}/`",
      
        3300
                        f"- `{index_path}`",
      
        3301
                        f"- `{chapter_one}`",
      
        3302
                        f"- `{chapter_two}`",
      
        3303
                        "",
      
        3304
                    ]
      
        3305
                )
      
        3306
            )
      
        3307
        
        3308
            context = build_context(
      
        3309
                temp_dir=temp_dir,
      
        3310
                messages=[],
      
        3311
                safeguards=FakeSafeguards(),
      
        3312
                assess_confidence=assess_confidence,
      
        3313
                verify_action=verify_action,
      
        3314
                auto_recover=False,
      
        3315
            )
      
        3316
            persistent_messages: list[str] = []
      
        3317
            ephemeral_messages: list[str] = []
      
        3318
            context.queue_steering_message_callback = persistent_messages.append
      
        3319
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3320
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3321
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3322
            dod.implementation_plan = str(implementation_plan)
      
        3323
            sync_todos_to_definition_of_done(
      
        3324
                dod,
      
        3325
                [
      
        3326
                    {
      
        3327
                        "content": "Create the guide files",
      
        3328
                        "active_form": "Working on: Create the guide files",
      
        3329
                        "status": "completed",
      
        3330
                    },
      
        3331
                    {
      
        3332
                        "content": "Ensure all files are properly linked and formatted consistently",
      
        3333
                        "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
      
        3334
                        "status": "pending",
      
        3335
                    },
      
        3336
                ],
      
        3337
            )
      
        3338
            tool_call = ToolCall(
      
        3339
                id="write-final",
      
        3340
                name="write",
      
        3341
                arguments={
      
        3342
                    "file_path": str(chapter_two),
      
        3343
                    "content": "<h1>Two</h1>\n",
      
        3344
                },
      
        3345
            )
      
        3346
            executor = FakeExecutor(
      
        3347
                [
      
        3348
                    tool_outcome(
      
        3349
                        tool_call=tool_call,
      
        3350
                        output=f"Successfully wrote {chapter_two}",
      
        3351
                        is_error=False,
      
        3352
                    )
      
        3353
                ]
      
        3354
            )
      
        3355
        
        3356
            summary = TurnSummary(final_response="")
      
        3357
            await runner.execute_batch(
      
        3358
                tool_calls=[tool_call],
      
        3359
                tool_source="assistant",
      
        3360
                pending_tool_calls_seen=set(),
      
        3361
                emit=_noop_emit,
      
        3362
                summary=summary,
      
        3363
                dod=dod,
      
        3364
                executor=executor,  # type: ignore[arg-type]
      
        3365
                on_confirmation=None,
      
        3366
                on_user_question=None,
      
        3367
                emit_confirmation=None,
      
        3368
                consecutive_errors=0,
      
        3369
            )
      
        3370
        
        3371
            assert any(
      
        3372
                "All explicitly planned artifacts now exist on disk." in message
      
        3373
                for message in persistent_messages
      
        3374
            )
      
        3375
            assert any(
      
        3376
                "Ensure all files are properly linked and formatted consistently" in message
      
        3377
                for message in persistent_messages
      
        3378
            )
      
        3379
            assert any(
      
        3380
                "Move to verification once no specific mismatch remains." in message
      
        3381
                for message in persistent_messages
      
        3382
            )
      
        3383
        
        3384
        
        3385
        @pytest.mark.asyncio
      
        3386
        async def test_tool_batch_runner_mutation_handoff_points_at_next_missing_artifact(
      
        3387
            temp_dir: Path,
      
        3388
        ) -> None:
      
        3389
            async def assess_confidence(
      
        3390
                tool_name: str,
      
        3391
                tool_args: dict,
      
        3392
                context: str,
      
        3393
            ) -> ConfidenceAssessment:
      
        3394
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3395
        
        3396
            async def verify_action(
      
        3397
                tool_name: str,
      
        3398
                tool_args: dict,
      
        3399
                result: str,
      
        3400
                expected: str = "",
      
        3401
            ) -> ActionVerification:
      
        3402
                raise AssertionError("Verification should not run in this scenario")
      
        3403
        
        3404
            guide_root = temp_dir / "guides" / "nginx"
      
        3405
            chapters = guide_root / "chapters"
      
        3406
            guide_root.mkdir(parents=True)
      
        3407
            chapters.mkdir()
      
        3408
            index_path = guide_root / "index.html"
      
        3409
            index_path.write_text("<html></html>\n")
      
        3410
            chapter_one = chapters / "01-getting-started.html"
      
        3411
            chapter_two = chapters / "02-installation.html"
      
        3412
            implementation_plan = temp_dir / "implementation.md"
      
        3413
            implementation_plan.write_text(
      
        3414
                "\n".join(
      
        3415
                    [
      
        3416
                        "# Implementation Plan",
      
        3417
                        "",
      
        3418
                        "## File Changes",
      
        3419
                        f"- `{guide_root}/`",
      
        3420
                        f"- `{index_path}`",
      
        3421
                        f"- `{chapter_one}`",
      
        3422
                        f"- `{chapter_two}`",
      
        3423
                        "",
      
        3424
                    ]
      
        3425
                )
      
        3426
            )
      
        3427
        
        3428
            context = build_context(
      
        3429
                temp_dir=temp_dir,
      
        3430
                messages=[],
      
        3431
                safeguards=FakeSafeguards(),
      
        3432
                assess_confidence=assess_confidence,
      
        3433
                verify_action=verify_action,
      
        3434
                auto_recover=False,
      
        3435
            )
      
        3436
            persistent_messages: list[str] = []
      
        3437
            ephemeral_messages: list[str] = []
      
        3438
            context.queue_steering_message_callback = persistent_messages.append
      
        3439
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3440
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3441
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3442
            dod.implementation_plan = str(implementation_plan)
      
        3443
            sync_todos_to_definition_of_done(
      
        3444
                dod,
      
        3445
                [
      
        3446
                    {
      
        3447
                        "content": "Create the main index.html file with proper structure",
      
        3448
                        "active_form": "Working on: Create the main index.html file with proper structure",
      
        3449
                        "status": "pending",
      
        3450
                    },
      
        3451
                    {
      
        3452
                        "content": "Create each chapter file in sequence, following the established pattern",
      
        3453
                        "active_form": "Working on: Create each chapter file in sequence, following the established pattern",
      
        3454
                        "status": "pending",
      
        3455
                    },
      
        3456
                    {
      
        3457
                        "content": "Ensure all files are properly linked and formatted consistently",
      
        3458
                        "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
      
        3459
                        "status": "pending",
      
        3460
                    },
      
        3461
                ],
      
        3462
            )
      
        3463
            tool_call = ToolCall(
      
        3464
                id="write-index",
      
        3465
                name="write",
      
        3466
                arguments={"file_path": str(index_path), "content": "<html></html>\n"},
      
        3467
            )
      
        3468
            executor = FakeExecutor(
      
        3469
                [tool_outcome(tool_call=tool_call, output=f"Successfully wrote {index_path}", is_error=False)]
      
        3470
            )
      
        3471
        
        3472
            summary = TurnSummary(final_response="")
      
        3473
            await runner.execute_batch(
      
        3474
                tool_calls=[tool_call],
      
        3475
                tool_source="assistant",
      
        3476
                pending_tool_calls_seen=set(),
      
        3477
                emit=_noop_emit,
      
        3478
                summary=summary,
      
        3479
                dod=dod,
      
        3480
                executor=executor,  # type: ignore[arg-type]
      
        3481
                on_confirmation=None,
      
        3482
                on_user_question=None,
      
        3483
                emit_confirmation=None,
      
        3484
                consecutive_errors=0,
      
        3485
            )
      
        3486
        
        3487
            assert persistent_messages
      
        3488
            assert ephemeral_messages == []
      
        3489
            message = persistent_messages[-1]
      
        3490
            assert "Next step: create `01-getting-started.html`." in message
      
        3491
            assert "Write a compact but real initial version of that file now" not in message
      
        3492
            assert "refresh `TodoWrite`" not in message
      
        3493
            assert "Do not reread reference material or spend the next turn on bookkeeping." in message
      
        3494
        
        3495
        
        3496
        @pytest.mark.asyncio
      
        3497
        async def test_tool_batch_runner_large_plan_does_not_claim_completion_early(
      
        3498
            temp_dir: Path,
      
        3499
        ) -> None:
      
        3500
            async def assess_confidence(
      
        3501
                tool_name: str,
      
        3502
                tool_args: dict,
      
        3503
                context: str,
      
        3504
            ) -> ConfidenceAssessment:
      
        3505
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3506
        
        3507
            async def verify_action(
      
        3508
                tool_name: str,
      
        3509
                tool_args: dict,
      
        3510
                result: str,
      
        3511
                expected: str = "",
      
        3512
            ) -> ActionVerification:
      
        3513
                raise AssertionError("Verification should not run in this scenario")
      
        3514
        
        3515
            guide_root = temp_dir / "guides" / "nginx"
      
        3516
            chapters = guide_root / "chapters"
      
        3517
            guide_root.mkdir(parents=True)
      
        3518
            chapters.mkdir()
      
        3519
            index_path = guide_root / "index.html"
      
        3520
            index_path.write_text("<html></html>\n")
      
        3521
        
        3522
            chapter_paths = [
      
        3523
                chapters / "01-getting-started.html",
      
        3524
                chapters / "02-installation.html",
      
        3525
                chapters / "03-first-website.html",
      
        3526
                chapters / "04-configuration-basics.html",
      
        3527
                chapters / "05-advanced-configurations.html",
      
        3528
                chapters / "06-performance-tuning.html",
      
        3529
                chapters / "07-security-best-practices.html",
      
        3530
            ]
      
        3531
            for chapter in chapter_paths[:4]:
      
        3532
                chapter.write_text(f"<h1>{chapter.stem}</h1>\n")
      
        3533
            chapter_paths[4].write_text("<h1>Advanced configurations</h1>\n")
      
        3534
        
        3535
            implementation_plan = temp_dir / "implementation.md"
      
        3536
            implementation_plan.write_text(
      
        3537
                "\n".join(
      
        3538
                    [
      
        3539
                        "# Implementation Plan",
      
        3540
                        "",
      
        3541
                        "## File Changes",
      
        3542
                        f"- `{guide_root}/`",
      
        3543
                        f"- `{chapters}/`",
      
        3544
                        f"- `{index_path}`",
      
        3545
                        *[f"- `{path}`" for path in chapter_paths],
      
        3546
                        "",
      
        3547
                    ]
      
        3548
                )
      
        3549
            )
      
        3550
        
        3551
            context = build_context(
      
        3552
                temp_dir=temp_dir,
      
        3553
                messages=[],
      
        3554
                safeguards=FakeSafeguards(),
      
        3555
                assess_confidence=assess_confidence,
      
        3556
                verify_action=verify_action,
      
        3557
                auto_recover=False,
      
        3558
            )
      
        3559
            persistent_messages: list[str] = []
      
        3560
            ephemeral_messages: list[str] = []
      
        3561
            context.queue_steering_message_callback = persistent_messages.append
      
        3562
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3563
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3564
            dod = create_definition_of_done("Create a thorough nginx guide.")
      
        3565
            dod.implementation_plan = str(implementation_plan)
      
        3566
            sync_todos_to_definition_of_done(
      
        3567
                dod,
      
        3568
                [
      
        3569
                    {
      
        3570
                        "content": "Create the nginx guide artifacts",
      
        3571
                        "active_form": "Creating nginx guide artifacts",
      
        3572
                        "status": "pending",
      
        3573
                    },
      
        3574
                    {
      
        3575
                        "content": "Verify all guide files are linked and complete",
      
        3576
                        "active_form": "Verifying guide linkage and completeness",
      
        3577
                        "status": "pending",
      
        3578
                    },
      
        3579
                ],
      
        3580
            )
      
        3581
            tool_call = ToolCall(
      
        3582
                id="write-chapter-05",
      
        3583
                name="write",
      
        3584
                arguments={
      
        3585
                    "file_path": str(chapter_paths[4]),
      
        3586
                    "content": "<h1>Advanced configurations</h1>\n",
      
        3587
                },
      
        3588
            )
      
        3589
            executor = FakeExecutor(
      
        3590
                [
      
        3591
                    tool_outcome(
      
        3592
                        tool_call=tool_call,
      
        3593
                        output=f"Successfully wrote {chapter_paths[4]}",
      
        3594
                        is_error=False,
      
        3595
                    )
      
        3596
                ]
      
        3597
            )
      
        3598
        
        3599
            summary = TurnSummary(final_response="")
      
        3600
            await runner.execute_batch(
      
        3601
                tool_calls=[tool_call],
      
        3602
                tool_source="assistant",
      
        3603
                pending_tool_calls_seen=set(),
      
        3604
                emit=_noop_emit,
      
        3605
                summary=summary,
      
        3606
                dod=dod,
      
        3607
                executor=executor,  # type: ignore[arg-type]
      
        3608
                on_confirmation=None,
      
        3609
                on_user_question=None,
      
        3610
                emit_confirmation=None,
      
        3611
                consecutive_errors=0,
      
        3612
            )
      
        3613
        
        3614
            assert any(
      
        3615
                "Next step: create `06-performance-tuning.html`." in message
      
        3616
                for message in ephemeral_messages
      
        3617
            )
      
        3618
            assert not any(
      
        3619
                "All explicitly planned artifacts now exist on disk." in message
      
        3620
                for message in ephemeral_messages
      
        3621
            )
      
        3622
        
        3623
        
        3624
        @pytest.mark.asyncio
      
        3625
        async def test_tool_batch_runner_uses_compact_missing_artifact_nudge_after_substantial_progress(
      
        3626
            temp_dir: Path,
      
        3627
        ) -> None:
      
        3628
            async def assess_confidence(
      
        3629
                tool_name: str,
      
        3630
                tool_args: dict,
      
        3631
                context: str,
      
        3632
            ) -> ConfidenceAssessment:
      
        3633
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3634
        
        3635
            async def verify_action(
      
        3636
                tool_name: str,
      
        3637
                tool_args: dict,
      
        3638
                result: str,
      
        3639
                expected: str = "",
      
        3640
            ) -> ActionVerification:
      
        3641
                raise AssertionError("Verification should not run in this scenario")
      
        3642
        
        3643
            guide_root = temp_dir / "guides" / "nginx"
      
        3644
            chapters = guide_root / "chapters"
      
        3645
            guide_root.mkdir(parents=True)
      
        3646
            chapters.mkdir()
      
        3647
            index_path = guide_root / "index.html"
      
        3648
            chapter_paths = [
      
        3649
                chapters / "01-introduction.html",
      
        3650
                chapters / "02-installation.html",
      
        3651
                chapters / "03-configuration.html",
      
        3652
                chapters / "04-basic-usage.html",
      
        3653
                chapters / "05-advanced-features.html",
      
        3654
            ]
      
        3655
            for path in (index_path, *chapter_paths[:4]):
      
        3656
                path.write_text("<html></html>\n")
      
        3657
        
        3658
            implementation_plan = temp_dir / "implementation.md"
      
        3659
            implementation_plan.write_text(
      
        3660
                "\n".join(
      
        3661
                    [
      
        3662
                        "# Implementation Plan",
      
        3663
                        "",
      
        3664
                        "## File Changes",
      
        3665
                        f"- `{guide_root}/`",
      
        3666
                        f"- `{chapters}/`",
      
        3667
                        f"- `{index_path}`",
      
        3668
                        *[f"- `{path}`" for path in chapter_paths],
      
        3669
                        "",
      
        3670
                    ]
      
        3671
                )
      
        3672
            )
      
        3673
        
        3674
            context = build_context(
      
        3675
                temp_dir=temp_dir,
      
        3676
                messages=[],
      
        3677
                safeguards=FakeSafeguards(),
      
        3678
                assess_confidence=assess_confidence,
      
        3679
                verify_action=verify_action,
      
        3680
                auto_recover=False,
      
        3681
            )
      
        3682
            persistent_messages: list[str] = []
      
        3683
            ephemeral_messages: list[str] = []
      
        3684
            context.queue_steering_message_callback = persistent_messages.append
      
        3685
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3686
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3687
            dod = create_definition_of_done("Create a thorough nginx guide.")
      
        3688
            dod.implementation_plan = str(implementation_plan)
      
        3689
            dod.touched_files.extend(str(path) for path in (index_path, *chapter_paths[:4]))
      
        3690
            dod.completed_items.extend(
      
        3691
                [
      
        3692
                    "Create the nginx directory structure",
      
        3693
                    "Create the main index.html file with proper structure",
      
        3694
                ]
      
        3695
            )
      
        3696
            sync_todos_to_definition_of_done(
      
        3697
                dod,
      
        3698
                [
      
        3699
                    {
      
        3700
                        "content": "Create each chapter file with appropriate content",
      
        3701
                        "active_form": "Creating each chapter file with appropriate content",
      
        3702
                        "status": "pending",
      
        3703
                    }
      
        3704
                ],
      
        3705
            )
      
        3706
            tool_call = ToolCall(
      
        3707
                id="write-chapter-04",
      
        3708
                name="write",
      
        3709
                arguments={
      
        3710
                    "file_path": str(chapter_paths[3]),
      
        3711
                    "content": "<html>updated</html>\n",
      
        3712
                },
      
        3713
            )
      
        3714
            executor = FakeExecutor(
      
        3715
                [
      
        3716
                    tool_outcome(
      
        3717
                        tool_call=tool_call,
      
        3718
                        output=f"Successfully wrote {chapter_paths[3]}",
      
        3719
                        is_error=False,
      
        3720
                    )
      
        3721
                ]
      
        3722
            )
      
        3723
        
        3724
            summary = TurnSummary(final_response="")
      
        3725
            await runner.execute_batch(
      
        3726
                tool_calls=[tool_call],
      
        3727
                tool_source="assistant",
      
        3728
                pending_tool_calls_seen=set(),
      
        3729
                emit=_noop_emit,
      
        3730
                summary=summary,
      
        3731
                dod=dod,
      
        3732
                executor=executor,  # type: ignore[arg-type]
      
        3733
                on_confirmation=None,
      
        3734
                on_user_question=None,
      
        3735
                emit_confirmation=None,
      
        3736
                consecutive_errors=0,
      
        3737
            )
      
        3738
        
        3739
            assert ephemeral_messages
      
        3740
            message = ephemeral_messages[-1]
      
        3741
            assert "Next step: create `05-advanced-features.html`." in message
      
        3742
            assert "Do not reread reference material or spend the next turn on bookkeeping." in message
      
        3743
            assert "refresh `TodoWrite`" not in message
      
        3744
        
        3745
        
        3746
        @pytest.mark.asyncio
      
        3747
        async def test_tool_batch_runner_todowrite_with_missing_artifact_requeues_exact_resume_step(
      
        3748
            temp_dir: Path,
      
        3749
        ) -> None:
      
        3750
            async def assess_confidence(
      
        3751
                tool_name: str,
      
        3752
                tool_args: dict,
      
        3753
                context: str,
      
        3754
            ) -> ConfidenceAssessment:
      
        3755
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3756
        
        3757
            async def verify_action(
      
        3758
                tool_name: str,
      
        3759
                tool_args: dict,
      
        3760
                result: str,
      
        3761
                expected: str = "",
      
        3762
            ) -> ActionVerification:
      
        3763
                raise AssertionError("Verification should not run in this scenario")
      
        3764
        
        3765
            guide_root = temp_dir / "guides" / "nginx"
      
        3766
            chapters = guide_root / "chapters"
      
        3767
            guide_root.mkdir(parents=True)
      
        3768
            chapters.mkdir()
      
        3769
            index_path = guide_root / "index.html"
      
        3770
            index_path.write_text("<html></html>\n")
      
        3771
            chapter_one = chapters / "01-getting-started.html"
      
        3772
            chapter_two = chapters / "02-installation.html"
      
        3773
            chapter_one.write_text("<h1>One</h1>\n")
      
        3774
        
        3775
            implementation_plan = temp_dir / "implementation.md"
      
        3776
            implementation_plan.write_text(
      
        3777
                "\n".join(
      
        3778
                    [
      
        3779
                        "# Implementation Plan",
      
        3780
                        "",
      
        3781
                        "## File Changes",
      
        3782
                        f"- `{guide_root}/`",
      
        3783
                        f"- `{chapters}/`",
      
        3784
                        f"- `{index_path}`",
      
        3785
                        f"- `{chapter_one}`",
      
        3786
                        f"- `{chapter_two}`",
      
        3787
                        "",
      
        3788
                    ]
      
        3789
                )
      
        3790
            )
      
        3791
        
        3792
            context = build_context(
      
        3793
                temp_dir=temp_dir,
      
        3794
                messages=[],
      
        3795
                safeguards=FakeSafeguards(),
      
        3796
                assess_confidence=assess_confidence,
      
        3797
                verify_action=verify_action,
      
        3798
                auto_recover=False,
      
        3799
            )
      
        3800
            persistent_messages: list[str] = []
      
        3801
            ephemeral_messages: list[str] = []
      
        3802
            context.queue_steering_message_callback = persistent_messages.append
      
        3803
            context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
      
        3804
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3805
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3806
            dod.implementation_plan = str(implementation_plan)
      
        3807
            sync_todos_to_definition_of_done(
      
        3808
                dod,
      
        3809
                [
      
        3810
                    {
      
        3811
                        "content": "Create 01-getting-started.html",
      
        3812
                        "active_form": "Creating 01-getting-started.html",
      
        3813
                        "status": "completed",
      
        3814
                    },
      
        3815
                    {
      
        3816
                        "content": "Create 02-installation.html",
      
        3817
                        "active_form": "Creating 02-installation.html",
      
        3818
                        "status": "pending",
      
        3819
                    },
      
        3820
                ],
      
        3821
            )
      
        3822
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        3823
        
        3824
            tool_call = ToolCall(
      
        3825
                id="todo-only",
      
        3826
                name="TodoWrite",
      
        3827
                arguments={
      
        3828
                    "todos": [
      
        3829
                        {
      
        3830
                            "content": "Create 01-getting-started.html",
      
        3831
                            "active_form": "Creating 01-getting-started.html",
      
        3832
                            "status": "completed",
      
        3833
                        },
      
        3834
                        {
      
        3835
                            "content": "Create 02-installation.html",
      
        3836
                            "active_form": "Creating 02-installation.html",
      
        3837
                            "status": "pending",
      
        3838
                        },
      
        3839
                    ]
      
        3840
                },
      
        3841
            )
      
        3842
            executor = FakeExecutor(
      
        3843
                [
      
        3844
                    tool_outcome(
      
        3845
                        tool_call=tool_call,
      
        3846
                        output="Todos updated",
      
        3847
                        is_error=False,
      
        3848
                        metadata={
      
        3849
                            "new_todos": [
      
        3850
                                {
      
        3851
                                    "content": "Create 01-getting-started.html",
      
        3852
                                    "active_form": "Creating 01-getting-started.html",
      
        3853
                                    "status": "completed",
      
        3854
                                },
      
        3855
                                {
      
        3856
                                    "content": "Create 02-installation.html",
      
        3857
                                    "active_form": "Creating 02-installation.html",
      
        3858
                                    "status": "pending",
      
        3859
                                },
      
        3860
                            ]
      
        3861
                        },
      
        3862
                    )
      
        3863
                ]
      
        3864
            )
      
        3865
        
        3866
            summary = TurnSummary(final_response="")
      
        3867
            await runner.execute_batch(
      
        3868
                tool_calls=[tool_call],
      
        3869
                tool_source="assistant",
      
        3870
                pending_tool_calls_seen=set(),
      
        3871
                emit=_noop_emit,
      
        3872
                summary=summary,
      
        3873
                dod=dod,
      
        3874
                executor=executor,  # type: ignore[arg-type]
      
        3875
                on_confirmation=None,
      
        3876
                on_user_question=None,
      
        3877
                emit_confirmation=None,
      
        3878
                consecutive_errors=0,
      
        3879
            )
      
        3880
        
        3881
            assert persistent_messages
      
        3882
            message = persistent_messages[-1]
      
        3883
            assert "Todo tracking is updated. Next step: create `02-installation.html`." in message
      
        3884
            assert "Prefer one `write(file_path=..., content=...)` call" in message
      
        3885
            assert "Make your next response the concrete mutation tool call itself." in message
      
        3886
            assert ephemeral_messages == []
      
        3887
        
        3888
        
        3889
        @pytest.mark.asyncio
      
        3890
        async def test_tool_batch_runner_todowrite_after_artifacts_exist_pushes_verification_handoff(
      
        3891
            temp_dir: Path,
      
        3892
        ) -> None:
      
        3893
            async def assess_confidence(
      
        3894
                tool_name: str,
      
        3895
                tool_args: dict,
      
        3896
                context: str,
      
        3897
            ) -> ConfidenceAssessment:
      
        3898
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        3899
        
        3900
            async def verify_action(
      
        3901
                tool_name: str,
      
        3902
                tool_args: dict,
      
        3903
                result: str,
      
        3904
                expected: str = "",
      
        3905
            ) -> ActionVerification:
      
        3906
                raise AssertionError("Verification should not run in this scenario")
      
        3907
        
        3908
            guide_root = temp_dir / "guides" / "nginx"
      
        3909
            chapters = guide_root / "chapters"
      
        3910
            guide_root.mkdir(parents=True)
      
        3911
            chapters.mkdir()
      
        3912
            index_path = guide_root / "index.html"
      
        3913
            chapter_one = chapters / "01-getting-started.html"
      
        3914
            chapter_two = chapters / "02-installation.html"
      
        3915
            index_path.write_text("<html></html>\n")
      
        3916
            chapter_one.write_text("<h1>One</h1>\n")
      
        3917
            chapter_two.write_text("<h1>Two</h1>\n")
      
        3918
        
        3919
            implementation_plan = temp_dir / "implementation.md"
      
        3920
            implementation_plan.write_text(
      
        3921
                "\n".join(
      
        3922
                    [
      
        3923
                        "# Implementation Plan",
      
        3924
                        "",
      
        3925
                        "## File Changes",
      
        3926
                        f"- `{guide_root}/`",
      
        3927
                        f"- `{chapters}/`",
      
        3928
                        f"- `{index_path}`",
      
        3929
                        f"- `{chapter_one}`",
      
        3930
                        f"- `{chapter_two}`",
      
        3931
                        "",
      
        3932
                    ]
      
        3933
                )
      
        3934
            )
      
        3935
        
        3936
            context = build_context(
      
        3937
                temp_dir=temp_dir,
      
        3938
                messages=[],
      
        3939
                safeguards=FakeSafeguards(),
      
        3940
                assess_confidence=assess_confidence,
      
        3941
                verify_action=verify_action,
      
        3942
                auto_recover=False,
      
        3943
            )
      
        3944
            queued_messages: list[str] = []
      
        3945
            context.queue_steering_message_callback = queued_messages.append
      
        3946
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        3947
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        3948
            dod.implementation_plan = str(implementation_plan)
      
        3949
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        3950
            sync_todos_to_definition_of_done(
      
        3951
                dod,
      
        3952
                [
      
        3953
                    {
      
        3954
                        "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3955
                        "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3956
                        "status": "pending",
      
        3957
                    },
      
        3958
                    {
      
        3959
                        "content": "Verify all guide files are linked and complete",
      
        3960
                        "active_form": "Working on: Verify all guide files are linked and complete",
      
        3961
                        "status": "pending",
      
        3962
                    },
      
        3963
                ],
      
        3964
                project_root=temp_dir,
      
        3965
            )
      
        3966
        
        3967
            tool_call = ToolCall(
      
        3968
                id="todo-only",
      
        3969
                name="TodoWrite",
      
        3970
                arguments={
      
        3971
                    "todos": [
      
        3972
                        {
      
        3973
                            "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3974
                            "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3975
                            "status": "pending",
      
        3976
                        },
      
        3977
                        {
      
        3978
                            "content": "Verify all guide files are linked and complete",
      
        3979
                            "active_form": "Working on: Verify all guide files are linked and complete",
      
        3980
                            "status": "pending",
      
        3981
                        },
      
        3982
                    ]
      
        3983
                },
      
        3984
            )
      
        3985
            executor = FakeExecutor(
      
        3986
                [
      
        3987
                    tool_outcome(
      
        3988
                        tool_call=tool_call,
      
        3989
                        output="Todos updated",
      
        3990
                        is_error=False,
      
        3991
                        metadata={
      
        3992
                            "new_todos": [
      
        3993
                                {
      
        3994
                                    "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3995
                                    "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
      
        3996
                                    "status": "pending",
      
        3997
                                },
      
        3998
                                {
      
        3999
                                    "content": "Verify all guide files are linked and complete",
      
        4000
                                    "active_form": "Working on: Verify all guide files are linked and complete",
      
        4001
                                    "status": "pending",
      
        4002
                                },
      
        4003
                            ]
      
        4004
                        },
      
        4005
                    )
      
        4006
                ]
      
        4007
            )
      
        4008
        
        4009
            summary = TurnSummary(final_response="")
      
        4010
            await runner.execute_batch(
      
        4011
                tool_calls=[tool_call],
      
        4012
                tool_source="assistant",
      
        4013
                pending_tool_calls_seen=set(),
      
        4014
                emit=_noop_emit,
      
        4015
                summary=summary,
      
        4016
                dod=dod,
      
        4017
                executor=executor,  # type: ignore[arg-type]
      
        4018
                on_confirmation=None,
      
        4019
                on_user_question=None,
      
        4020
                emit_confirmation=None,
      
        4021
                consecutive_errors=0,
      
        4022
            )
      
        4023
        
        4024
            assert queued_messages
      
        4025
            message = queued_messages[-1]
      
        4026
            assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
      
        4027
            assert "Verify all guide files are linked and complete" in message
      
        4028
            assert "Move to verification once no specific mismatch remains." in message
      
        4029
            assert "reopen reference materials" in message
      
        4030
            assert "Fortran guide structure" not in message
      
        4031
            assert context.workflow_mode == "execute"
      
        4032
        
        4033
        
        4034
        @pytest.mark.asyncio
      
        4035
        async def test_tool_batch_runner_todowrite_after_outputs_exist_but_links_missing_still_handoffs_to_verify(
      
        4036
            temp_dir: Path,
      
        4037
        ) -> None:
      
        4038
            async def assess_confidence(
      
        4039
                tool_name: str,
      
        4040
                tool_args: dict,
      
        4041
                context: str,
      
        4042
            ) -> ConfidenceAssessment:
      
        4043
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        4044
        
        4045
            async def verify_action(
      
        4046
                tool_name: str,
      
        4047
                tool_args: dict,
      
        4048
                result: str,
      
        4049
                expected: str = "",
      
        4050
            ) -> ActionVerification:
      
        4051
                raise AssertionError("Verification should not run for this scenario")
      
        4052
        
        4053
            guide_root = temp_dir / "guides" / "nginx"
      
        4054
            chapters = guide_root / "chapters"
      
        4055
            guide_root.mkdir(parents=True)
      
        4056
            chapters.mkdir()
      
        4057
            index_path = guide_root / "index.html"
      
        4058
            chapter_one = chapters / "01-introduction.html"
      
        4059
            chapter_two = chapters / "02-installation.html"
      
        4060
            index_path.write_text(
      
        4061
                "\n".join(
      
        4062
                    [
      
        4063
                        '<a href="chapters/01-introduction.html">Intro</a>',
      
        4064
                        '<a href="chapters/02-installation.html">Install</a>',
      
        4065
                        '<a href="../index.html">Back</a>',
      
        4066
                        "",
      
        4067
                    ]
      
        4068
                )
      
        4069
            )
      
        4070
            chapter_one.write_text("<html></html>\n")
      
        4071
            chapter_two.write_text("<html></html>\n")
      
        4072
        
        4073
            implementation_plan = temp_dir / "implementation.md"
      
        4074
            implementation_plan.write_text(
      
        4075
                "\n".join(
      
        4076
                    [
      
        4077
                        "# Implementation Plan",
      
        4078
                        "",
      
        4079
                        "## File Changes",
      
        4080
                        f"- `{guide_root}/`",
      
        4081
                        f"- `{chapters}/`",
      
        4082
                        f"- `{index_path}`",
      
        4083
                        f"- `{chapter_one}`",
      
        4084
                        f"- `{chapter_two}`",
      
        4085
                        "",
      
        4086
                    ]
      
        4087
                )
      
        4088
            )
      
        4089
        
        4090
            context = build_context(
      
        4091
                temp_dir=temp_dir,
      
        4092
                messages=[],
      
        4093
                safeguards=FakeSafeguards(),
      
        4094
                assess_confidence=assess_confidence,
      
        4095
                verify_action=verify_action,
      
        4096
                auto_recover=False,
      
        4097
            )
      
        4098
            queued_messages: list[str] = []
      
        4099
            context.queue_steering_message_callback = queued_messages.append
      
        4100
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4101
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        4102
            dod.implementation_plan = str(implementation_plan)
      
        4103
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        4104
            sync_todos_to_definition_of_done(
      
        4105
                dod,
      
        4106
                [
      
        4107
                    {
      
        4108
                        "content": "Create chapter files following the established pattern",
      
        4109
                        "active_form": "Creating chapter files",
      
        4110
                        "status": "in_progress",
      
        4111
                    }
      
        4112
                ],
      
        4113
                project_root=temp_dir,
      
        4114
            )
      
        4115
        
        4116
            tool_call = ToolCall(
      
        4117
                id="todo-post-build",
      
        4118
                name="TodoWrite",
      
        4119
                arguments={
      
        4120
                    "todos": [
      
        4121
                        {
      
        4122
                            "content": "Create chapter files following the established pattern",
      
        4123
                            "active_form": "Creating chapter files",
      
        4124
                            "status": "in_progress",
      
        4125
                        }
      
        4126
                    ]
      
        4127
                },
      
        4128
            )
      
        4129
            executor = FakeExecutor(
      
        4130
                [
      
        4131
                    tool_outcome(
      
        4132
                        tool_call=tool_call,
      
        4133
                        output="Todos updated",
      
        4134
                        is_error=False,
      
        4135
                        metadata={
      
        4136
                            "new_todos": [
      
        4137
                                {
      
        4138
                                    "content": "Create chapter files following the established pattern",
      
        4139
                                    "active_form": "Creating chapter files",
      
        4140
                                    "status": "in_progress",
      
        4141
                                }
      
        4142
                            ]
      
        4143
                        },
      
        4144
                    )
      
        4145
                ]
      
        4146
            )
      
        4147
        
        4148
            summary = TurnSummary(final_response="")
      
        4149
            await runner.execute_batch(
      
        4150
                tool_calls=[tool_call],
      
        4151
                tool_source="assistant",
      
        4152
                pending_tool_calls_seen=set(),
      
        4153
                emit=_noop_emit,
      
        4154
                summary=summary,
      
        4155
                dod=dod,
      
        4156
                executor=executor,  # type: ignore[arg-type]
      
        4157
                on_confirmation=None,
      
        4158
                on_user_question=None,
      
        4159
                emit_confirmation=None,
      
        4160
                consecutive_errors=0,
      
        4161
            )
      
        4162
        
        4163
            assert queued_messages
      
        4164
            message = queued_messages[-1]
      
        4165
            assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
      
        4166
            assert "Verification should run next." in message
      
        4167
            assert "Repair or verify the current files instead of expanding the artifact set." not in message
      
        4168
            assert context.workflow_mode == "verify"
      
        4169
        
        4170
        
        4171
        @pytest.mark.asyncio
      
        4172
        async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verify_handoff(
      
        4173
            temp_dir: Path,
      
        4174
        ) -> None:
      
        4175
            async def assess_confidence(
      
        4176
                tool_name: str,
      
        4177
                tool_args: dict,
      
        4178
                context: str,
      
        4179
            ) -> ConfidenceAssessment:
      
        4180
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        4181
        
        4182
            async def verify_action(
      
        4183
                tool_name: str,
      
        4184
                tool_args: dict,
      
        4185
                result: str,
      
        4186
                expected: str = "",
      
        4187
            ) -> ActionVerification:
      
        4188
                raise AssertionError("Verification should not run for this scenario")
      
        4189
        
        4190
            guide_root = temp_dir / "guides" / "nginx"
      
        4191
            chapters = guide_root / "chapters"
      
        4192
            guide_root.mkdir(parents=True)
      
        4193
            chapters.mkdir()
      
        4194
            index_path = guide_root / "index.html"
      
        4195
            chapter_one = chapters / "01-introduction.html"
      
        4196
            chapter_two = chapters / "02-installation.html"
      
        4197
            index_path.write_text(
      
        4198
                "\n".join(
      
        4199
                    [
      
        4200
                        '<li><a href="chapters/01-introduction.html">Chapter 1: Introduction</a></li>',
      
        4201
                        '<li><a href="chapters/02-installation.html">Chapter 2: Installation</a></li>',
      
        4202
                        "",
      
        4203
                    ]
      
        4204
                )
      
        4205
            )
      
        4206
            chapter_one.write_text("<html></html>\n")
      
        4207
            chapter_two.write_text("<html></html>\n")
      
        4208
        
        4209
            implementation_plan = temp_dir / "implementation.md"
      
        4210
            implementation_plan.write_text(
      
        4211
                "\n".join(
      
        4212
                    [
      
        4213
                        "# Implementation Plan",
      
        4214
                        "",
      
        4215
                        "## File Changes",
      
        4216
                        f"- `{guide_root}/`",
      
        4217
                        f"- `{chapters}/`",
      
        4218
                        f"- `{index_path}`",
      
        4219
                        f"- `{chapter_one}`",
      
        4220
                        f"- `{chapter_two}`",
      
        4221
                        "",
      
        4222
                    ]
      
        4223
                )
      
        4224
            )
      
        4225
        
        4226
            context = build_context(
      
        4227
                temp_dir=temp_dir,
      
        4228
                messages=[],
      
        4229
                safeguards=FakeSafeguards(),
      
        4230
                assess_confidence=assess_confidence,
      
        4231
                verify_action=verify_action,
      
        4232
                auto_recover=False,
      
        4233
            )
      
        4234
            queued_messages: list[str] = []
      
        4235
            context.queue_steering_message_callback = queued_messages.append
      
        4236
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4237
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        4238
            dod.implementation_plan = str(implementation_plan)
      
        4239
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        4240
        
        4241
            todo_call = ToolCall(
      
        4242
                id="todo-post-build-preempt",
      
        4243
                name="TodoWrite",
      
        4244
                arguments={"todos": []},
      
        4245
            )
      
        4246
            audit_read = ToolCall(
      
        4247
                id="read-after-todo",
      
        4248
                name="read",
      
        4249
                arguments={"file_path": str(index_path)},
      
        4250
            )
      
        4251
            executor = FakeExecutor(
      
        4252
                [
      
        4253
                    tool_outcome(
      
        4254
                        tool_call=todo_call,
      
        4255
                        output="Todos updated",
      
        4256
                        is_error=False,
      
        4257
                        metadata={"new_todos": []},
      
        4258
                    ),
      
        4259
                    tool_outcome(
      
        4260
                        tool_call=audit_read,
      
        4261
                        output=index_path.read_text(),
      
        4262
                        is_error=False,
      
        4263
                    ),
      
        4264
                ]
      
        4265
            )
      
        4266
        
        4267
            summary = TurnSummary(final_response="")
      
        4268
            result = await runner.execute_batch(
      
        4269
                tool_calls=[todo_call, audit_read],
      
        4270
                tool_source="assistant",
      
        4271
                pending_tool_calls_seen=set(),
      
        4272
                emit=_noop_emit,
      
        4273
                summary=summary,
      
        4274
                dod=dod,
      
        4275
                executor=executor,  # type: ignore[arg-type]
      
        4276
                on_confirmation=None,
      
        4277
                on_user_question=None,
      
        4278
                emit_confirmation=None,
      
        4279
                consecutive_errors=0,
      
        4280
            )
      
        4281
        
        4282
            assert result.continue_after_batch is True
      
        4283
            assert result.halted is False
      
        4284
            assert [call.id for call in executor.calls] == ["todo-post-build-preempt"]
      
        4285
            assert len(summary.tool_result_messages) == 1
      
        4286
            assert context.workflow_mode == "verify"
      
        4287
            assert queued_messages
      
        4288
            assert "Verification should run next." in queued_messages[-1]
      
        4289
        
        4290
        
        4291
        @pytest.mark.asyncio
      
        4292
        async def test_tool_batch_runner_todowrite_complete_directory_plan_does_not_reinfer_first_child(
      
        4293
            temp_dir: Path,
      
        4294
        ) -> None:
      
        4295
            async def assess_confidence(
      
        4296
                tool_name: str,
      
        4297
                tool_args: dict,
      
        4298
                context: str,
      
        4299
            ) -> ConfidenceAssessment:
      
        4300
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        4301
        
        4302
            async def verify_action(
      
        4303
                tool_name: str,
      
        4304
                tool_args: dict,
      
        4305
                result: str,
      
        4306
                expected: str = "",
      
        4307
            ) -> ActionVerification:
      
        4308
                raise AssertionError("Verification should not run for this scenario")
      
        4309
        
        4310
            reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
      
        4311
            reference.parent.mkdir(parents=True)
      
        4312
            reference.write_text("<h1>Introduction</h1>\n")
      
        4313
        
        4314
            guide_root = temp_dir / "Loader" / "guides" / "nginx"
      
        4315
            chapters = guide_root / "chapters"
      
        4316
            guide_root.mkdir(parents=True)
      
        4317
            chapters.mkdir()
      
        4318
            index_path = guide_root / "index.html"
      
        4319
            chapter_one = chapters / "01-introduction.html"
      
        4320
            chapter_two = chapters / "02-installation.html"
      
        4321
            chapter_three = chapters / "03-basic-configuration.html"
      
        4322
            index_path.write_text(
      
        4323
                "\n".join(
      
        4324
                    [
      
        4325
                        '<a href="chapters/01-introduction.html">Introduction</a>',
      
        4326
                        '<a href="chapters/02-installation.html">Installation</a>',
      
        4327
                        '<a href="chapters/03-basic-configuration.html">Configuration</a>',
      
        4328
                        "",
      
        4329
                    ]
      
        4330
                )
      
        4331
            )
      
        4332
            chapter_one.write_text("<html></html>\n")
      
        4333
            chapter_two.write_text("<html></html>\n")
      
        4334
            chapter_three.write_text("<html></html>\n")
      
        4335
        
        4336
            implementation_plan = temp_dir / "implementation.md"
      
        4337
            implementation_plan.write_text(
      
        4338
                "\n".join(
      
        4339
                    [
      
        4340
                        "# Implementation Plan",
      
        4341
                        "",
      
        4342
                        "## File Changes",
      
        4343
                        f"- `{guide_root / 'index.html'}`",
      
        4344
                        f"- `{chapters}/`",
      
        4345
                        "",
      
        4346
                    ]
      
        4347
                )
      
        4348
            )
      
        4349
        
        4350
            messages = [
      
        4351
                Message(
      
        4352
                    role=Role.ASSISTANT,
      
        4353
                    content="I examined the reference guide structure.",
      
        4354
                    tool_calls=[
      
        4355
                        ToolCall(
      
        4356
                            id="read-reference-child",
      
        4357
                            name="read",
      
        4358
                            arguments={"file_path": str(reference)},
      
        4359
                        )
      
        4360
                    ],
      
        4361
                )
      
        4362
            ]
      
        4363
            context = build_context(
      
        4364
                temp_dir=temp_dir,
      
        4365
                messages=messages,
      
        4366
                safeguards=FakeSafeguards(),
      
        4367
                assess_confidence=assess_confidence,
      
        4368
                verify_action=verify_action,
      
        4369
                auto_recover=False,
      
        4370
            )
      
        4371
            queued_messages: list[str] = []
      
        4372
            context.queue_steering_message_callback = queued_messages.append
      
        4373
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4374
            dod = create_definition_of_done("Create an equally thorough nginx guide.")
      
        4375
            dod.implementation_plan = str(implementation_plan)
      
        4376
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        4377
        
        4378
            todo_call = ToolCall(
      
        4379
                id="todo-complete-directory-plan",
      
        4380
                name="TodoWrite",
      
        4381
                arguments={"todos": []},
      
        4382
            )
      
        4383
            executor = FakeExecutor(
      
        4384
                [
      
        4385
                    tool_outcome(
      
        4386
                        tool_call=todo_call,
      
        4387
                        output="Todos updated",
      
        4388
                        is_error=False,
      
        4389
                        metadata={"new_todos": []},
      
        4390
                    )
      
        4391
                ]
      
        4392
            )
      
        4393
        
        4394
            summary = TurnSummary(final_response="")
      
        4395
            result = await runner.execute_batch(
      
        4396
                tool_calls=[todo_call],
      
        4397
                tool_source="assistant",
      
        4398
                pending_tool_calls_seen=set(),
      
        4399
                emit=_noop_emit,
      
        4400
                summary=summary,
      
        4401
                dod=dod,
      
        4402
                executor=executor,  # type: ignore[arg-type]
      
        4403
                on_confirmation=None,
      
        4404
                on_user_question=None,
      
        4405
                emit_confirmation=None,
      
        4406
                consecutive_errors=0,
      
        4407
            )
      
        4408
        
        4409
            assert result.continue_after_batch is True
      
        4410
            assert queued_messages
      
        4411
            message = queued_messages[-1]
      
        4412
            assert "Verification should run next." in message
      
        4413
            assert "01-introduction.html" not in message
      
        4414
            assert "chapter files" not in message.lower()
      
        4415
            assert context.workflow_mode == "verify"
      
        4416
            assert summary.tool_result_messages
      
        4417
            assert "verification should be reviewed next" in summary.tool_result_messages[-1].content
      
        4418
            assert "fortran guide structure" not in summary.tool_result_messages[-1].content.lower()
      
        4419
        
        4420
        
        4421
        @pytest.mark.asyncio
      
        4422
        async def test_tool_batch_runner_preempts_post_build_observation_batch_for_verify_handoff(
      
        4423
            temp_dir: Path,
      
        4424
        ) -> None:
      
        4425
            async def assess_confidence(
      
        4426
                tool_name: str,
      
        4427
                tool_args: dict,
      
        4428
                context: str,
      
        4429
            ) -> ConfidenceAssessment:
      
        4430
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        4431
        
        4432
            async def verify_action(
      
        4433
                tool_name: str,
      
        4434
                tool_args: dict,
      
        4435
                result: str,
      
        4436
                expected: str = "",
      
        4437
            ) -> ActionVerification:
      
        4438
                raise AssertionError("Verification should not run for this scenario")
      
        4439
        
        4440
            guide_root = temp_dir / "guides" / "nginx"
      
        4441
            chapters = guide_root / "chapters"
      
        4442
            guide_root.mkdir(parents=True)
      
        4443
            chapters.mkdir()
      
        4444
            index_path = guide_root / "index.html"
      
        4445
            chapter_one = chapters / "01-introduction.html"
      
        4446
            chapter_two = chapters / "02-installation.html"
      
        4447
            chapter_three = chapters / "03-configuration.html"
      
        4448
            index_path.write_text(
      
        4449
                "\n".join(
      
        4450
                    [
      
        4451
                        '<li><a href="chapters/01-introduction.html">Chapter 1: Introduction</a></li>',
      
        4452
                        '<li><a href="chapters/02-installation.html">Chapter 2: Installation</a></li>',
      
        4453
                        "",
      
        4454
                    ]
      
        4455
                )
      
        4456
            )
      
        4457
            chapter_one.write_text("<html></html>\n")
      
        4458
            chapter_two.write_text("<html></html>\n")
      
        4459
            chapter_three.write_text("<html></html>\n")
      
        4460
        
        4461
            implementation_plan = temp_dir / "implementation.md"
      
        4462
            implementation_plan.write_text(
      
        4463
                "\n".join(
      
        4464
                    [
      
        4465
                        "# Implementation Plan",
      
        4466
                        "",
      
        4467
                        "## File Changes",
      
        4468
                        f"- `{guide_root}/`",
      
        4469
                        f"- `{chapters}/`",
      
        4470
                        f"- `{index_path}`",
      
        4471
                        "",
      
        4472
                    ]
      
        4473
                )
      
        4474
            )
      
        4475
        
        4476
            context = build_context(
      
        4477
                temp_dir=temp_dir,
      
        4478
                messages=[],
      
        4479
                safeguards=FakeSafeguards(),
      
        4480
                assess_confidence=assess_confidence,
      
        4481
                verify_action=verify_action,
      
        4482
                auto_recover=False,
      
        4483
            )
      
        4484
            queued_messages: list[str] = []
      
        4485
            context.queue_steering_message_callback = queued_messages.append
      
        4486
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4487
            dod = create_definition_of_done("Create an equally thorough nginx guide.")
      
        4488
            dod.implementation_plan = str(implementation_plan)
      
        4489
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        4490
        
        4491
            audit_bash = ToolCall(
      
        4492
                id="bash-post-build-audit",
      
        4493
                name="bash",
      
        4494
                arguments={"command": f"ls -la {guide_root}"},
      
        4495
            )
      
        4496
            audit_read = ToolCall(
      
        4497
                id="read-index-after-audit",
      
        4498
                name="read",
      
        4499
                arguments={"file_path": str(index_path)},
      
        4500
            )
      
        4501
            executor = FakeExecutor(
      
        4502
                [
      
        4503
                    tool_outcome(
      
        4504
                        tool_call=audit_bash,
      
        4505
                        output="total 8\n",
      
        4506
                        is_error=False,
      
        4507
                    ),
      
        4508
                    tool_outcome(
      
        4509
                        tool_call=audit_read,
      
        4510
                        output=index_path.read_text(),
      
        4511
                        is_error=False,
      
        4512
                    ),
      
        4513
                ]
      
        4514
            )
      
        4515
        
        4516
            summary = TurnSummary(final_response="")
      
        4517
            result = await runner.execute_batch(
      
        4518
                tool_calls=[audit_bash, audit_read],
      
        4519
                tool_source="assistant",
      
        4520
                pending_tool_calls_seen=set(),
      
        4521
                emit=_noop_emit,
      
        4522
                summary=summary,
      
        4523
                dod=dod,
      
        4524
                executor=executor,  # type: ignore[arg-type]
      
        4525
                on_confirmation=None,
      
        4526
                on_user_question=None,
      
        4527
                emit_confirmation=None,
      
        4528
                consecutive_errors=0,
      
        4529
            )
      
        4530
        
        4531
            assert result.continue_after_batch is True
      
        4532
            assert [call.id for call in executor.calls] == ["bash-post-build-audit"]
      
        4533
            assert context.workflow_mode == "verify"
      
        4534
            assert queued_messages
      
        4535
            assert "Verification should run next." in queued_messages[-1]
      
        4536
        
        4537
        
        4538
        @pytest.mark.asyncio
      
        4539
        async def test_tool_batch_runner_preempts_post_build_observation_batch_during_consistency_review(
      
        4540
            temp_dir: Path,
      
        4541
        ) -> None:
      
        4542
            async def assess_confidence(
      
        4543
                tool_name: str,
      
        4544
                tool_args: dict,
      
        4545
                context: str,
      
        4546
            ) -> ConfidenceAssessment:
      
        4547
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        4548
        
        4549
            async def verify_action(
      
        4550
                tool_name: str,
      
        4551
                tool_args: dict,
      
        4552
                result: str,
      
        4553
                expected: str = "",
      
        4554
            ) -> ActionVerification:
      
        4555
                raise AssertionError("Verification should not run for this scenario")
      
        4556
        
        4557
            guide_root = temp_dir / "guides" / "nginx"
      
        4558
            chapters = guide_root / "chapters"
      
        4559
            guide_root.mkdir(parents=True)
      
        4560
            chapters.mkdir()
      
        4561
            index_path = guide_root / "index.html"
      
        4562
            chapter_one = chapters / "01-introduction.html"
      
        4563
            chapter_two = chapters / "02-installation.html"
      
        4564
            chapter_three = chapters / "03-basic-configuration.html"
      
        4565
            index_path.write_text("<html></html>\n")
      
        4566
            chapter_one.write_text("<html></html>\n")
      
        4567
            chapter_two.write_text("<html></html>\n")
      
        4568
            chapter_three.write_text("<html></html>\n")
      
        4569
        
        4570
            implementation_plan = temp_dir / "implementation.md"
      
        4571
            implementation_plan.write_text(
      
        4572
                "\n".join(
      
        4573
                    [
      
        4574
                        "# Implementation Plan",
      
        4575
                        "",
      
        4576
                        "## File Changes",
      
        4577
                        f"- `{guide_root}/`",
      
        4578
                        f"- `{chapters}/`",
      
        4579
                        f"- `{index_path}`",
      
        4580
                        "",
      
        4581
                    ]
      
        4582
                )
      
        4583
            )
      
        4584
        
        4585
            context = build_context(
      
        4586
                temp_dir=temp_dir,
      
        4587
                messages=[],
      
        4588
                safeguards=FakeSafeguards(),
      
        4589
                assess_confidence=assess_confidence,
      
        4590
                verify_action=verify_action,
      
        4591
                auto_recover=False,
      
        4592
            )
      
        4593
            queued_messages: list[str] = []
      
        4594
            queued_ephemeral: list[str] = []
      
        4595
            context.queue_steering_message_callback = queued_messages.append
      
        4596
            context.queue_ephemeral_steering_message_callback = queued_ephemeral.append
      
        4597
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4598
            dod = create_definition_of_done("Create an equally thorough nginx guide.")
      
        4599
            dod.implementation_plan = str(implementation_plan)
      
        4600
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        4601
            sync_todos_to_definition_of_done(
      
        4602
                dod,
      
        4603
                [
      
        4604
                    {
      
        4605
                        "content": "Review the generated guide for consistency and completeness",
      
        4606
                        "active_form": "Reviewing the generated guide for consistency and completeness",
      
        4607
                        "status": "pending",
      
        4608
                    }
      
        4609
                ],
      
        4610
                project_root=temp_dir,
      
        4611
            )
      
        4612
        
        4613
            audit_read = ToolCall(
      
        4614
                id="read-index-during-review",
      
        4615
                name="read",
      
        4616
                arguments={"file_path": str(index_path)},
      
        4617
            )
      
        4618
            second_read = ToolCall(
      
        4619
                id="read-chapter-after-review",
      
        4620
                name="read",
      
        4621
                arguments={"file_path": str(chapter_one)},
      
        4622
            )
      
        4623
            executor = FakeExecutor(
      
        4624
                [
      
        4625
                    tool_outcome(
      
        4626
                        tool_call=audit_read,
      
        4627
                        output=index_path.read_text(),
      
        4628
                        is_error=False,
      
        4629
                    ),
      
        4630
                    tool_outcome(
      
        4631
                        tool_call=second_read,
      
        4632
                        output=chapter_one.read_text(),
      
        4633
                        is_error=False,
      
        4634
                    ),
      
        4635
                ]
      
        4636
            )
      
        4637
        
        4638
            summary = TurnSummary(final_response="")
      
        4639
            result = await runner.execute_batch(
      
        4640
                tool_calls=[audit_read, second_read],
      
        4641
                tool_source="assistant",
      
        4642
                pending_tool_calls_seen=set(),
      
        4643
                emit=_noop_emit,
      
        4644
                summary=summary,
      
        4645
                dod=dod,
      
        4646
                executor=executor,  # type: ignore[arg-type]
      
        4647
                on_confirmation=None,
      
        4648
                on_user_question=None,
      
        4649
                emit_confirmation=None,
      
        4650
                consecutive_errors=0,
      
        4651
            )
      
        4652
        
        4653
            assert result.continue_after_batch is True
      
        4654
            assert [call.id for call in executor.calls] == ["read-index-during-review"]
      
        4655
            queued = queued_ephemeral or queued_messages
      
        4656
            assert queued
      
        4657
            assert "All explicitly planned artifacts already exist." in queued[-1]
      
        4658
            assert "generated files" in queued[-1]
      
        4659
        
        4660
        
        4661
        @pytest.mark.asyncio
      
        4662
        async def test_tool_batch_runner_skips_post_build_user_question_during_consistency_review(
      
        4663
            temp_dir: Path,
      
        4664
        ) -> None:
      
        4665
            async def assess_confidence(
      
        4666
                tool_name: str,
      
        4667
                tool_args: dict,
      
        4668
                context: str,
      
        4669
            ) -> ConfidenceAssessment:
      
        4670
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        4671
        
        4672
            async def verify_action(
      
        4673
                tool_name: str,
      
        4674
                tool_args: dict,
      
        4675
                result: str,
      
        4676
                expected: str = "",
      
        4677
            ) -> ActionVerification:
      
        4678
                raise AssertionError("Verification should not run for this scenario")
      
        4679
        
        4680
            guide_root = temp_dir / "guides" / "nginx"
      
        4681
            chapters = guide_root / "chapters"
      
        4682
            guide_root.mkdir(parents=True)
      
        4683
            chapters.mkdir()
      
        4684
            index_path = guide_root / "index.html"
      
        4685
            chapter_one = chapters / "01-introduction.html"
      
        4686
            chapter_two = chapters / "02-installation.html"
      
        4687
            index_path.write_text(
      
        4688
                "\n".join(
      
        4689
                    [
      
        4690
                        '<li><a href="chapters/01-introduction.html">Chapter 1: Introduction</a></li>',
      
        4691
                        '<li><a href="chapters/02-installation.html">Chapter 2: Installation</a></li>',
      
        4692
                        "",
      
        4693
                    ]
      
        4694
                )
      
        4695
            )
      
        4696
            chapter_one.write_text("<html></html>\n")
      
        4697
            chapter_two.write_text("<html></html>\n")
      
        4698
        
        4699
            implementation_plan = temp_dir / "implementation.md"
      
        4700
            implementation_plan.write_text(
      
        4701
                "\n".join(
      
        4702
                    [
      
        4703
                        "# Implementation Plan",
      
        4704
                        "",
      
        4705
                        "## File Changes",
      
        4706
                        f"- `{guide_root}/`",
      
        4707
                        f"- `{chapters}/`",
      
        4708
                        f"- `{index_path}`",
      
        4709
                        f"- `{chapter_one}`",
      
        4710
                        f"- `{chapter_two}`",
      
        4711
                        "",
      
        4712
                    ]
      
        4713
                )
      
        4714
            )
      
        4715
        
        4716
            context = build_context(
      
        4717
                temp_dir=temp_dir,
      
        4718
                messages=[],
      
        4719
                safeguards=FakeSafeguards(),
      
        4720
                assess_confidence=assess_confidence,
      
        4721
                verify_action=verify_action,
      
        4722
                auto_recover=False,
      
        4723
            )
      
        4724
            queued_messages: list[str] = []
      
        4725
            context.queue_steering_message_callback = queued_messages.append
      
        4726
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4727
            dod = create_definition_of_done("Create an equally thorough nginx guide.")
      
        4728
            dod.implementation_plan = str(implementation_plan)
      
        4729
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        4730
            dod.pending_items = ["Ensure all files are properly linked and formatted"]
      
        4731
        
        4732
            question_call = ToolCall(
      
        4733
                id="ask-post-build-review",
      
        4734
                name="AskUserQuestion",
      
        4735
                arguments={
      
        4736
                    "question": "Which specific aspects of the reference guide should I copy?",
      
        4737
                    "context": "I already created the output files and want to ensure they match.",
      
        4738
                },
      
        4739
            )
      
        4740
            executor = FakeExecutor([])
      
        4741
        
        4742
            summary = TurnSummary(final_response="")
      
        4743
            result = await runner.execute_batch(
      
        4744
                tool_calls=[question_call],
      
        4745
                tool_source="assistant",
      
        4746
                pending_tool_calls_seen=set(),
      
        4747
                emit=_noop_emit,
      
        4748
                summary=summary,
      
        4749
                dod=dod,
      
        4750
                executor=executor,  # type: ignore[arg-type]
      
        4751
                on_confirmation=None,
      
        4752
                on_user_question=None,
      
        4753
                emit_confirmation=None,
      
        4754
                consecutive_errors=0,
      
        4755
            )
      
        4756
        
        4757
            assert result.continue_after_batch is True
      
        4758
            assert executor.calls == []
      
        4759
            assert queued_messages
      
        4760
            assert "The remaining work is review/verification of the generated files." in queued_messages[-1]
      
        4761
            assert "Do not ask the user for more clarification about the reference pattern now." in queued_messages[-1]
      
        4762
            assert "Verification should run next." in queued_messages[-1]
      
        4763
            assert context.workflow_mode == "verify"
      
        4764
            assert summary.tool_result_messages
      
        4765
            assert "Skipped - stale post-build user question" in summary.tool_result_messages[-1].content
      
        4766
        
        4767
        
        4768
        @pytest.mark.asyncio
      
        4769
        async def test_tool_batch_runner_rewrites_stale_todowrite_summary_from_reconciled_dod(
      
        4770
            temp_dir: Path,
      
        4771
        ) -> None:
      
        4772
            async def assess_confidence(
      
        4773
                tool_name: str,
      
        4774
                tool_args: dict,
      
        4775
                context: str,
      
        4776
            ) -> ConfidenceAssessment:
      
        4777
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        4778
        
        4779
            async def verify_action(
      
        4780
                tool_name: str,
      
        4781
                tool_args: dict,
      
        4782
                result: str,
      
        4783
                expected: str = "",
      
        4784
            ) -> ActionVerification:
      
        4785
                raise AssertionError("Verification should not run for this scenario")
      
        4786
        
        4787
            guide_root = temp_dir / "guides" / "nginx"
      
        4788
            chapters = guide_root / "chapters"
      
        4789
            guide_root.mkdir(parents=True)
      
        4790
            chapters.mkdir()
      
        4791
            index_path = guide_root / "index.html"
      
        4792
            for name in (
      
        4793
                "01-introduction.html",
      
        4794
                "02-installation.html",
      
        4795
                "03-basic-configuration.html",
      
        4796
                "04-advanced-usage.html",
      
        4797
                "05-troubleshooting.html",
      
        4798
            ):
      
        4799
                (chapters / name).write_text("<html></html>\n")
      
        4800
            index_path.write_text("<html></html>\n")
      
        4801
        
        4802
            implementation_plan = temp_dir / "implementation.md"
      
        4803
            implementation_plan.write_text(
      
        4804
                "\n".join(
      
        4805
                    [
      
        4806
                        "# Implementation Plan",
      
        4807
                        "",
      
        4808
                        "## File Changes",
      
        4809
                        f"- `{guide_root}/`",
      
        4810
                        f"- `{chapters}/`",
      
        4811
                        f"- `{index_path}`",
      
        4812
                        "",
      
        4813
                    ]
      
        4814
                )
      
        4815
            )
      
        4816
        
        4817
            context = build_context(
      
        4818
                temp_dir=temp_dir,
      
        4819
                messages=[],
      
        4820
                safeguards=FakeSafeguards(),
      
        4821
                assess_confidence=assess_confidence,
      
        4822
                verify_action=verify_action,
      
        4823
                auto_recover=False,
      
        4824
            )
      
        4825
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4826
            dod = create_definition_of_done("Create an equally thorough nginx guide.")
      
        4827
            dod.implementation_plan = str(implementation_plan)
      
        4828
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        4829
        
        4830
            tool_call = ToolCall(
      
        4831
                id="todo-stale-summary",
      
        4832
                name="TodoWrite",
      
        4833
                arguments={
      
        4834
                    "todos": [
      
        4835
                        {
      
        4836
                            "content": "First, examine the existing fortran guide structure and content to understand the format",
      
        4837
                            "active_form": "Working on: First, examine the existing fortran guide structure and content to understand the format",
      
        4838
                            "status": "pending",
      
        4839
                        }
      
        4840
                    ]
      
        4841
                },
      
        4842
            )
      
        4843
            executor = FakeExecutor(
      
        4844
                [
      
        4845
                    tool_outcome(
      
        4846
                        tool_call=tool_call,
      
        4847
                        output="Todos updated",
      
        4848
                        is_error=False,
      
        4849
                        metadata={
      
        4850
                            "new_todos": [
      
        4851
                                {
      
        4852
                                    "content": "First, examine the existing fortran guide structure and content to understand the format",
      
        4853
                                    "active_form": "Working on: First, examine the existing fortran guide structure and content to understand the format",
      
        4854
                                    "status": "pending",
      
        4855
                                }
      
        4856
                            ]
      
        4857
                        },
      
        4858
                    )
      
        4859
                ]
      
        4860
            )
      
        4861
        
        4862
            summary = TurnSummary(final_response="")
      
        4863
            result = await runner.execute_batch(
      
        4864
                tool_calls=[tool_call],
      
        4865
                tool_source="assistant",
      
        4866
                pending_tool_calls_seen=set(),
      
        4867
                emit=_noop_emit,
      
        4868
                summary=summary,
      
        4869
                dod=dod,
      
        4870
                executor=executor,  # type: ignore[arg-type]
      
        4871
                on_confirmation=None,
      
        4872
                on_user_question=None,
      
        4873
                emit_confirmation=None,
      
        4874
                consecutive_errors=0,
      
        4875
            )
      
        4876
        
        4877
            assert result.continue_after_batch is True
      
        4878
            assert summary.tool_result_messages
      
        4879
            message = summary.tool_result_messages[-1].content
      
        4880
            assert "updated todo list" in message
      
        4881
            assert "verification should be reviewed next" in message
      
        4882
            assert "next pending:" not in message
      
        4883
            assert "fortran guide structure" not in message.lower()
      
        4884
        
        4885
        
        4886
        @pytest.mark.asyncio
      
        4887
        async def test_tool_batch_runner_todowrite_drops_unplanned_expansion_after_outputs_exist(
      
        4888
            temp_dir: Path,
      
        4889
        ) -> None:
      
        4890
            async def assess_confidence(
      
        4891
                tool_name: str,
      
        4892
                tool_args: dict,
      
        4893
                context: str,
      
        4894
            ) -> ConfidenceAssessment:
      
        4895
                raise AssertionError("Confidence scoring should not run for this scenario")
      
        4896
        
        4897
            async def verify_action(
      
        4898
                tool_name: str,
      
        4899
                tool_args: dict,
      
        4900
                result: str,
      
        4901
                expected: str = "",
      
        4902
            ) -> ActionVerification:
      
        4903
                raise AssertionError("Verification should not run for this scenario")
      
        4904
        
        4905
            guide_root = temp_dir / "guides" / "nginx"
      
        4906
            chapters = guide_root / "chapters"
      
        4907
            guide_root.mkdir(parents=True)
      
        4908
            chapters.mkdir()
      
        4909
            index_path = guide_root / "index.html"
      
        4910
            chapter_one = chapters / "01-introduction.html"
      
        4911
            chapter_two = chapters / "02-installation.html"
      
        4912
            index_path.write_text(
      
        4913
                "\n".join(
      
        4914
                    [
      
        4915
                        '<a href="chapters/01-introduction.html">Intro</a>',
      
        4916
                        '<a href="chapters/02-installation.html">Install</a>',
      
        4917
                        '<a href="../index.html">Back</a>',
      
        4918
                        "",
      
        4919
                    ]
      
        4920
                )
      
        4921
            )
      
        4922
            chapter_one.write_text("<html></html>\n")
      
        4923
            chapter_two.write_text("<html></html>\n")
      
        4924
        
        4925
            implementation_plan = temp_dir / "implementation.md"
      
        4926
            implementation_plan.write_text(
      
        4927
                "\n".join(
      
        4928
                    [
      
        4929
                        "# Implementation Plan",
      
        4930
                        "",
      
        4931
                        "## File Changes",
      
        4932
                        f"- `{guide_root}/`",
      
        4933
                        f"- `{chapters}/`",
      
        4934
                        f"- `{index_path}`",
      
        4935
                        f"- `{chapter_one}`",
      
        4936
                        f"- `{chapter_two}`",
      
        4937
                        "",
      
        4938
                    ]
      
        4939
                )
      
        4940
            )
      
        4941
        
        4942
            context = build_context(
      
        4943
                temp_dir=temp_dir,
      
        4944
                messages=[],
      
        4945
                safeguards=FakeSafeguards(),
      
        4946
                assess_confidence=assess_confidence,
      
        4947
                verify_action=verify_action,
      
        4948
                auto_recover=False,
      
        4949
            )
      
        4950
            queued_messages: list[str] = []
      
        4951
            context.queue_steering_message_callback = queued_messages.append
      
        4952
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        4953
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        4954
            dod.implementation_plan = str(implementation_plan)
      
        4955
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        4956
        
        4957
            tool_call = ToolCall(
      
        4958
                id="todo-post-build-expansion",
      
        4959
                name="TodoWrite",
      
        4960
                arguments={
      
        4961
                    "todos": [
      
        4962
                        {
      
        4963
                            "content": "Create index.html for nginx guide",
      
        4964
                            "activeForm": "Creating index.html",
      
        4965
                            "status": "in_progress",
      
        4966
                        },
      
        4967
                        {
      
        4968
                            "content": "Create chapter 01-introduction.html",
      
        4969
                            "activeForm": "Creating chapter 01-introduction.html",
      
        4970
                            "status": "completed",
      
        4971
                        },
      
        4972
                        {
      
        4973
                            "content": "Create chapter 02-installation.html",
      
        4974
                            "activeForm": "Creating chapter 02-installation.html",
      
        4975
                            "status": "completed",
      
        4976
                        },
      
        4977
                        {
      
        4978
                            "content": "Create chapter 08-troubleshooting.html",
      
        4979
                            "activeForm": "Creating chapter 08-troubleshooting.html",
      
        4980
                            "status": "pending",
      
        4981
                        },
      
        4982
                    ]
      
        4983
                },
      
        4984
            )
      
        4985
            executor = FakeExecutor(
      
        4986
                [
      
        4987
                    tool_outcome(
      
        4988
                        tool_call=tool_call,
      
        4989
                        output="Todos updated",
      
        4990
                        is_error=False,
      
        4991
                        metadata={
      
        4992
                            "new_todos": [
      
        4993
                                {
      
        4994
                                    "content": "Create index.html for nginx guide",
      
        4995
                                    "active_form": "Creating index.html",
      
        4996
                                    "status": "in_progress",
      
        4997
                                },
      
        4998
                                {
      
        4999
                                    "content": "Create chapter 01-introduction.html",
      
        5000
                                    "active_form": "Creating chapter 01-introduction.html",
      
        5001
                                    "status": "completed",
      
        5002
                                },
      
        5003
                                {
      
        5004
                                    "content": "Create chapter 02-installation.html",
      
        5005
                                    "active_form": "Creating chapter 02-installation.html",
      
        5006
                                    "status": "completed",
      
        5007
                                },
      
        5008
                                {
      
        5009
                                    "content": "Create chapter 08-troubleshooting.html",
      
        5010
                                    "active_form": "Creating chapter 08-troubleshooting.html",
      
        5011
                                    "status": "pending",
      
        5012
                                },
      
        5013
                            ]
      
        5014
                        },
      
        5015
                    )
      
        5016
                ]
      
        5017
            )
      
        5018
        
        5019
            summary = TurnSummary(final_response="")
      
        5020
            await runner.execute_batch(
      
        5021
                tool_calls=[tool_call],
      
        5022
                tool_source="assistant",
      
        5023
                pending_tool_calls_seen=set(),
      
        5024
                emit=_noop_emit,
      
        5025
                summary=summary,
      
        5026
                dod=dod,
      
        5027
                executor=executor,  # type: ignore[arg-type]
      
        5028
                on_confirmation=None,
      
        5029
                on_user_question=None,
      
        5030
                emit_confirmation=None,
      
        5031
                consecutive_errors=0,
      
        5032
            )
      
        5033
        
        5034
            assert queued_messages
      
        5035
            message = queued_messages[-1]
      
        5036
            assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
      
        5037
            assert "Verification should run next." in message
      
        5038
            assert "Repair or verify the current files instead of expanding the artifact set." not in message
      
        5039
            assert "08-troubleshooting.html" not in message
      
        5040
            assert context.workflow_mode == "verify"
      
        5041
        
        5042
        
        5043
        @pytest.mark.asyncio
      
        5044
        async def test_tool_batch_runner_todowrite_with_existing_output_roots_requeues_next_mutation(
      
        5045
            temp_dir: Path,
      
        5046
        ) -> None:
      
        5047
            async def assess_confidence(
      
        5048
                tool_name: str,
      
        5049
                tool_args: dict,
      
        5050
                context: str,
      
        5051
            ) -> ConfidenceAssessment:
      
        5052
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        5053
        
        5054
            async def verify_action(
      
        5055
                tool_name: str,
      
        5056
                tool_args: dict,
      
        5057
                result: str,
      
        5058
                expected: str = "",
      
        5059
            ) -> ActionVerification:
      
        5060
                raise AssertionError("Verification should not run in this scenario")
      
        5061
        
        5062
            guide_root = temp_dir / "guides" / "nginx"
      
        5063
            chapters = guide_root / "chapters"
      
        5064
            guide_root.mkdir(parents=True)
      
        5065
            chapters.mkdir()
      
        5066
            index_path = guide_root / "index.html"
      
        5067
            index_path.write_text(
      
        5068
                "\n".join(
      
        5069
                    [
      
        5070
                        "<!DOCTYPE html>",
      
        5071
                        "<html>",
      
        5072
                        "<body>",
      
        5073
                        '<a href="chapters/01-introduction.html">Introduction</a>',
      
        5074
                        "</body>",
      
        5075
                        "</html>",
      
        5076
                        "",
      
        5077
                    ]
      
        5078
                )
      
        5079
            )
      
        5080
        
        5081
            implementation_plan = temp_dir / "implementation.md"
      
        5082
            implementation_plan.write_text(
      
        5083
                "\n".join(
      
        5084
                    [
      
        5085
                        "# Implementation Plan",
      
        5086
                        "",
      
        5087
                        "## File Changes",
      
        5088
                        f"- `{guide_root}/`",
      
        5089
                        f"- `{chapters}/`",
      
        5090
                        f"- `{index_path}`",
      
        5091
                        "",
      
        5092
                    ]
      
        5093
                )
      
        5094
            )
      
        5095
        
        5096
            context = build_context(
      
        5097
                temp_dir=temp_dir,
      
        5098
                messages=[],
      
        5099
                safeguards=FakeSafeguards(),
      
        5100
                assess_confidence=assess_confidence,
      
        5101
                verify_action=verify_action,
      
        5102
                auto_recover=False,
      
        5103
            )
      
        5104
            queued_messages: list[str] = []
      
        5105
            context.queue_steering_message_callback = queued_messages.append
      
        5106
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5107
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5108
            dod.implementation_plan = str(implementation_plan)
      
        5109
            dod.touched_files.append(str(index_path))
      
        5110
            sync_todos_to_definition_of_done(
      
        5111
                dod,
      
        5112
                [
      
        5113
                    {
      
        5114
                        "content": "Examine the existing Fortran guide structure",
      
        5115
                        "active_form": "Examining the existing Fortran guide structure",
      
        5116
                        "status": "completed",
      
        5117
                    },
      
        5118
                    {
      
        5119
                        "content": "Create the nginx directory structure",
      
        5120
                        "active_form": "Creating the nginx directory structure",
      
        5121
                        "status": "completed",
      
        5122
                    },
      
        5123
                    {
      
        5124
                        "content": "Write the introduction chapter",
      
        5125
                        "active_form": "Writing the introduction chapter",
      
        5126
                        "status": "pending",
      
        5127
                    },
      
        5128
                ],
      
        5129
                project_root=temp_dir,
      
        5130
            )
      
        5131
        
        5132
            tool_call = ToolCall(
      
        5133
                id="todo-next-mutation",
      
        5134
                name="TodoWrite",
      
        5135
                arguments={
      
        5136
                    "todos": [
      
        5137
                        {
      
        5138
                            "content": "Examine the existing Fortran guide structure",
      
        5139
                            "active_form": "Examining the existing Fortran guide structure",
      
        5140
                            "status": "completed",
      
        5141
                        },
      
        5142
                        {
      
        5143
                            "content": "Create the nginx directory structure",
      
        5144
                            "active_form": "Creating the nginx directory structure",
      
        5145
                            "status": "completed",
      
        5146
                        },
      
        5147
                        {
      
        5148
                            "content": "Write the introduction chapter",
      
        5149
                            "active_form": "Writing the introduction chapter",
      
        5150
                            "status": "pending",
      
        5151
                        },
      
        5152
                    ]
      
        5153
                },
      
        5154
            )
      
        5155
            executor = FakeExecutor(
      
        5156
                [
      
        5157
                    tool_outcome(
      
        5158
                        tool_call=tool_call,
      
        5159
                        output="Todos updated",
      
        5160
                        is_error=False,
      
        5161
                        metadata={
      
        5162
                            "new_todos": [
      
        5163
                                {
      
        5164
                                    "content": "Examine the existing Fortran guide structure",
      
        5165
                                    "active_form": "Examining the existing Fortran guide structure",
      
        5166
                                    "status": "completed",
      
        5167
                                },
      
        5168
                                {
      
        5169
                                    "content": "Create the nginx directory structure",
      
        5170
                                    "active_form": "Creating the nginx directory structure",
      
        5171
                                    "status": "completed",
      
        5172
                                },
      
        5173
                                {
      
        5174
                                    "content": "Write the introduction chapter",
      
        5175
                                    "active_form": "Writing the introduction chapter",
      
        5176
                                    "status": "pending",
      
        5177
                                },
      
        5178
                            ]
      
        5179
                        },
      
        5180
                    )
      
        5181
                ]
      
        5182
            )
      
        5183
        
        5184
            summary = TurnSummary(final_response="")
      
        5185
            await runner.execute_batch(
      
        5186
                tool_calls=[tool_call],
      
        5187
                tool_source="assistant",
      
        5188
                pending_tool_calls_seen=set(),
      
        5189
                emit=_noop_emit,
      
        5190
                summary=summary,
      
        5191
                dod=dod,
      
        5192
                executor=executor,  # type: ignore[arg-type]
      
        5193
                on_confirmation=None,
      
        5194
                on_user_question=None,
      
        5195
                emit_confirmation=None,
      
        5196
                consecutive_errors=0,
      
        5197
            )
      
        5198
        
        5199
            assert queued_messages
      
        5200
            message = queued_messages[-1]
      
        5201
            assert "Todo tracking is updated. Next step: create `01-introduction.html`." in message
      
        5202
            assert "Prefer one `write(file_path=..., content=...)` call" in message
      
        5203
            assert "Make your next response the concrete mutation tool call itself." in message
      
        5204
        
        5205
        
        5206
        @pytest.mark.asyncio
      
        5207
        async def test_tool_batch_runner_todowrite_prefers_pending_index_over_empty_output_directory(
      
        5208
            temp_dir: Path,
      
        5209
        ) -> None:
      
        5210
            async def assess_confidence(
      
        5211
                tool_name: str,
      
        5212
                tool_args: dict,
      
        5213
                context: str,
      
        5214
            ) -> ConfidenceAssessment:
      
        5215
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        5216
        
        5217
            async def verify_action(
      
        5218
                tool_name: str,
      
        5219
                tool_args: dict,
      
        5220
                result: str,
      
        5221
                expected: str = "",
      
        5222
            ) -> ActionVerification:
      
        5223
                raise AssertionError("Verification should not run in this scenario")
      
        5224
        
        5225
            guide_root = temp_dir / "Loader" / "guides" / "nginx"
      
        5226
            chapters = guide_root / "chapters"
      
        5227
            chapters.mkdir(parents=True)
      
        5228
            index_path = guide_root / "index.html"
      
        5229
            implementation_plan = temp_dir / "implementation.md"
      
        5230
            implementation_plan.write_text(
      
        5231
                "\n".join(
      
        5232
                    [
      
        5233
                        "# Implementation Plan",
      
        5234
                        "",
      
        5235
                        "## File Changes",
      
        5236
                        f"- `{chapters}/`",
      
        5237
                        f"- `{index_path}`",
      
        5238
                        "",
      
        5239
                    ]
      
        5240
                )
      
        5241
            )
      
        5242
        
        5243
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5244
            dod.implementation_plan = str(implementation_plan)
      
        5245
            sync_todos_to_definition_of_done(
      
        5246
                dod,
      
        5247
                [
      
        5248
                    {
      
        5249
                        "content": "Examine the existing Fortran guide structure to understand the format and depth",
      
        5250
                        "active_form": "Examining the existing Fortran guide structure",
      
        5251
                        "status": "completed",
      
        5252
                    },
      
        5253
                    {
      
        5254
                        "content": "Create the new nginx guide directory structure",
      
        5255
                        "active_form": "Creating the new nginx guide directory structure",
      
        5256
                        "status": "completed",
      
        5257
                    },
      
        5258
                    {
      
        5259
                        "content": "Create a new index.html for the nginx guide",
      
        5260
                        "active_form": "Creating a new index.html for the nginx guide",
      
        5261
                        "status": "pending",
      
        5262
                    },
      
        5263
                    {
      
        5264
                        "content": "Create the first chapter for the nginx guide",
      
        5265
                        "active_form": "Creating the first chapter for the nginx guide",
      
        5266
                        "status": "pending",
      
        5267
                    },
      
        5268
                ],
      
        5269
                project_root=temp_dir,
      
        5270
            )
      
        5271
        
        5272
            queued_messages: list[str] = []
      
        5273
            context = build_context(
      
        5274
                temp_dir=temp_dir,
      
        5275
                messages=[],
      
        5276
                safeguards=FakeSafeguards(),
      
        5277
                assess_confidence=assess_confidence,
      
        5278
                verify_action=verify_action,
      
        5279
                auto_recover=False,
      
        5280
            )
      
        5281
            context.queue_steering_message_callback = queued_messages.append
      
        5282
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5283
        
        5284
            todos = [
      
        5285
                {
      
        5286
                    "content": "Examine the existing Fortran guide structure to understand the format and depth",
      
        5287
                    "active_form": "Examining the existing Fortran guide structure",
      
        5288
                    "status": "completed",
      
        5289
                },
      
        5290
                {
      
        5291
                    "content": "Create the new nginx guide directory structure",
      
        5292
                    "active_form": "Creating the new nginx guide directory structure",
      
        5293
                    "status": "completed",
      
        5294
                },
      
        5295
                {
      
        5296
                    "content": "Create a new index.html for the nginx guide",
      
        5297
                    "active_form": "Creating a new index.html for the nginx guide",
      
        5298
                    "status": "pending",
      
        5299
                },
      
        5300
                {
      
        5301
                    "content": "Create the first chapter for the nginx guide",
      
        5302
                    "active_form": "Creating the first chapter for the nginx guide",
      
        5303
                    "status": "pending",
      
        5304
                },
      
        5305
            ]
      
        5306
            tool_call = ToolCall(
      
        5307
                id="todo-index-before-chapter",
      
        5308
                name="TodoWrite",
      
        5309
                arguments={"todos": todos},
      
        5310
            )
      
        5311
            executor = FakeExecutor(
      
        5312
                [
      
        5313
                    tool_outcome(
      
        5314
                        tool_call=tool_call,
      
        5315
                        output="Todos updated",
      
        5316
                        is_error=False,
      
        5317
                        metadata={"new_todos": todos},
      
        5318
                    )
      
        5319
                ]
      
        5320
            )
      
        5321
        
        5322
            summary = TurnSummary(final_response="")
      
        5323
            await runner.execute_batch(
      
        5324
                tool_calls=[tool_call],
      
        5325
                tool_source="assistant",
      
        5326
                pending_tool_calls_seen=set(),
      
        5327
                emit=_noop_emit,
      
        5328
                summary=summary,
      
        5329
                dod=dod,
      
        5330
                executor=executor,  # type: ignore[arg-type]
      
        5331
                on_confirmation=None,
      
        5332
                on_user_question=None,
      
        5333
                emit_confirmation=None,
      
        5334
                consecutive_errors=0,
      
        5335
            )
      
        5336
        
        5337
            assert queued_messages
      
        5338
            message = queued_messages[-1]
      
        5339
            assert "Todo tracking is updated. Next step: create `index.html`." in message
      
        5340
            assert f"Prefer one `write(file_path=..., content=...)` call for `{index_path.resolve(strict=False)}`" in message
      
        5341
            assert "01-introduction.html" not in message
      
        5342
        
        5343
        
        5344
        @pytest.mark.asyncio
      
        5345
        async def test_tool_batch_runner_todowrite_with_declared_child_targets_names_next_missing_file(
      
        5346
            temp_dir: Path,
      
        5347
        ) -> None:
      
        5348
            async def assess_confidence(
      
        5349
                tool_name: str,
      
        5350
                tool_args: dict,
      
        5351
                context: str,
      
        5352
            ) -> ConfidenceAssessment:
      
        5353
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        5354
        
        5355
            async def verify_action(
      
        5356
                tool_name: str,
      
        5357
                tool_args: dict,
      
        5358
                result: str,
      
        5359
                expected: str = "",
      
        5360
            ) -> ActionVerification:
      
        5361
                raise AssertionError("Verification should not run in this scenario")
      
        5362
        
        5363
            guide_root = temp_dir / "guides" / "nginx"
      
        5364
            chapters = guide_root / "chapters"
      
        5365
            guide_root.mkdir(parents=True)
      
        5366
            chapters.mkdir()
      
        5367
            index_path = guide_root / "index.html"
      
        5368
            index_path.write_text(
      
        5369
                "\n".join(
      
        5370
                    [
      
        5371
                        "<html>",
      
        5372
                        '<a href="chapters/introduction.html">Introduction</a>',
      
        5373
                        '<a href="chapters/installation.html">Installation</a>',
      
        5374
                        "</html>",
      
        5375
                    ]
      
        5376
                )
      
        5377
                + "\n"
      
        5378
            )
      
        5379
        
        5380
            implementation_plan = temp_dir / "implementation.md"
      
        5381
            implementation_plan.write_text(
      
        5382
                "\n".join(
      
        5383
                    [
      
        5384
                        "# Implementation Plan",
      
        5385
                        "",
      
        5386
                        "## File Changes",
      
        5387
                        f"- `{guide_root}/`",
      
        5388
                        f"- `{chapters}/`",
      
        5389
                        f"- `{index_path}`",
      
        5390
                        "",
      
        5391
                    ]
      
        5392
                )
      
        5393
            )
      
        5394
        
        5395
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5396
            dod.implementation_plan = str(implementation_plan)
      
        5397
            dod.pending_items = [
      
        5398
                "Write the introduction chapter",
      
        5399
                "Complete the requested work",
      
        5400
            ]
      
        5401
            dod.touched_files.append(str(index_path))
      
        5402
        
        5403
            queued_messages: list[str] = []
      
        5404
            context = build_context(
      
        5405
                temp_dir=temp_dir,
      
        5406
                messages=[],
      
        5407
                safeguards=FakeSafeguards(),
      
        5408
                assess_confidence=assess_confidence,
      
        5409
                verify_action=verify_action,
      
        5410
                auto_recover=False,
      
        5411
            )
      
        5412
            context.queue_steering_message_callback = queued_messages.append
      
        5413
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5414
        
        5415
            tool_call = ToolCall(
      
        5416
                id="todo-1",
      
        5417
                name="TodoWrite",
      
        5418
                arguments={
      
        5419
                    "todos": [
      
        5420
                        {
      
        5421
                            "content": "Write the introduction chapter",
      
        5422
                            "activeForm": "Writing the introduction chapter",
      
        5423
                            "status": "pending",
      
        5424
                        }
      
        5425
                    ]
      
        5426
                },
      
        5427
            )
      
        5428
            executor = FakeExecutor(
      
        5429
                [
      
        5430
                    tool_outcome(
      
        5431
                        tool_call=tool_call,
      
        5432
                        output="Todos updated",
      
        5433
                        is_error=False,
      
        5434
                        metadata={
      
        5435
                            "new_todos": [
      
        5436
                                {
      
        5437
                                    "content": "Write the introduction chapter",
      
        5438
                                    "active_form": "Writing the introduction chapter",
      
        5439
                                    "status": "pending",
      
        5440
                                }
      
        5441
                            ]
      
        5442
                        },
      
        5443
                    )
      
        5444
                ]
      
        5445
            )
      
        5446
        
        5447
            summary = TurnSummary(final_response="")
      
        5448
            await runner.execute_batch(
      
        5449
                tool_calls=[tool_call],
      
        5450
                tool_source="assistant",
      
        5451
                pending_tool_calls_seen=set(),
      
        5452
                emit=_noop_emit,
      
        5453
                summary=summary,
      
        5454
                dod=dod,
      
        5455
                executor=executor,  # type: ignore[arg-type]
      
        5456
                on_confirmation=None,
      
        5457
                on_user_question=None,
      
        5458
                emit_confirmation=None,
      
        5459
                consecutive_errors=0,
      
        5460
            )
      
        5461
        
        5462
            assert queued_messages
      
        5463
            message = queued_messages[-1]
      
        5464
            assert "Todo tracking is updated. Next step: create `introduction.html`." in message
      
        5465
            assert "Prefer one `write(file_path=..., content=...)` call" in message
      
        5466
            assert "Make your next response the concrete mutation tool call itself." in message
      
        5467
        
        5468
        
        5469
        @pytest.mark.asyncio
      
        5470
        async def test_tool_batch_runner_todowrite_names_concrete_pending_file_after_artifacts_exist(
      
        5471
            temp_dir: Path,
      
        5472
        ) -> None:
      
        5473
            async def assess_confidence(
      
        5474
                tool_name: str,
      
        5475
                tool_args: dict,
      
        5476
                context: str,
      
        5477
            ) -> ConfidenceAssessment:
      
        5478
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        5479
        
        5480
            async def verify_action(
      
        5481
                tool_name: str,
      
        5482
                tool_args: dict,
      
        5483
                result: str,
      
        5484
                expected: str = "",
      
        5485
            ) -> ActionVerification:
      
        5486
                raise AssertionError("Verification should not run in this scenario")
      
        5487
        
        5488
            guide_root = temp_dir / "guides" / "nginx"
      
        5489
            chapters = guide_root / "chapters"
      
        5490
            guide_root.mkdir(parents=True)
      
        5491
            chapters.mkdir()
      
        5492
            index_path = guide_root / "index.html"
      
        5493
            chapter_one = chapters / "01-introduction.html"
      
        5494
            index_path.write_text(
      
        5495
                "\n".join(
      
        5496
                    [
      
        5497
                        "<html>",
      
        5498
                        '<a href="chapters/01-introduction.html">Chapter 1: Introduction to NGINX Tool</a>',
      
        5499
                        '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
      
        5500
                        "</html>",
      
        5501
                    ]
      
        5502
                )
      
        5503
                + "\n"
      
        5504
            )
      
        5505
            chapter_one.write_text("<html></html>\n")
      
        5506
        
        5507
            implementation_plan = temp_dir / "implementation.md"
      
        5508
            implementation_plan.write_text(
      
        5509
                "\n".join(
      
        5510
                    [
      
        5511
                        "# Implementation Plan",
      
        5512
                        "",
      
        5513
                        "## File Changes",
      
        5514
                        f"- `{guide_root}/`",
      
        5515
                        f"- `{chapters}/`",
      
        5516
                        f"- `{index_path}`",
      
        5517
                        "",
      
        5518
                    ]
      
        5519
                )
      
        5520
            )
      
        5521
        
        5522
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5523
            dod.implementation_plan = str(implementation_plan)
      
        5524
            dod.pending_items = [
      
        5525
                "Creating Chapter 2: Installation and Setup",
      
        5526
                "Complete the requested work",
      
        5527
            ]
      
        5528
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        5529
        
        5530
            queued_messages: list[str] = []
      
        5531
            context = build_context(
      
        5532
                temp_dir=temp_dir,
      
        5533
                messages=[],
      
        5534
                safeguards=FakeSafeguards(),
      
        5535
                assess_confidence=assess_confidence,
      
        5536
                verify_action=verify_action,
      
        5537
                auto_recover=False,
      
        5538
            )
      
        5539
            context.queue_steering_message_callback = queued_messages.append
      
        5540
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5541
        
        5542
            tool_call = ToolCall(
      
        5543
                id="todo-1",
      
        5544
                name="TodoWrite",
      
        5545
                arguments={
      
        5546
                    "todos": [
      
        5547
                        {
      
        5548
                            "content": "Creating Chapter 2: Installation and Setup",
      
        5549
                            "activeForm": "Creating Chapter 2: Installation and Setup",
      
        5550
                            "status": "pending",
      
        5551
                        }
      
        5552
                    ]
      
        5553
                },
      
        5554
            )
      
        5555
            executor = FakeExecutor(
      
        5556
                [
      
        5557
                    tool_outcome(
      
        5558
                        tool_call=tool_call,
      
        5559
                        output="Todos updated",
      
        5560
                        is_error=False,
      
        5561
                        metadata={
      
        5562
                            "new_todos": [
      
        5563
                                {
      
        5564
                                    "content": "Creating Chapter 2: Installation and Setup",
      
        5565
                                    "active_form": "Creating Chapter 2: Installation and Setup",
      
        5566
                                    "status": "pending",
      
        5567
                                }
      
        5568
                            ]
      
        5569
                        },
      
        5570
                    )
      
        5571
                ]
      
        5572
            )
      
        5573
        
        5574
            summary = TurnSummary(final_response="")
      
        5575
            await runner.execute_batch(
      
        5576
                tool_calls=[tool_call],
      
        5577
                tool_source="assistant",
      
        5578
                pending_tool_calls_seen=set(),
      
        5579
                emit=_noop_emit,
      
        5580
                summary=summary,
      
        5581
                dod=dod,
      
        5582
                executor=executor,  # type: ignore[arg-type]
      
        5583
                on_confirmation=None,
      
        5584
                on_user_question=None,
      
        5585
                emit_confirmation=None,
      
        5586
                consecutive_errors=0,
      
        5587
            )
      
        5588
        
        5589
            assert queued_messages
      
        5590
            message = queued_messages[-1]
      
        5591
            assert "Todo tracking is updated. Next step: create `02-installation.html`." in message
      
        5592
            assert "Prefer one `write(file_path=..., content=...)` call" in message
      
        5593
            assert "Make your next response the concrete mutation tool call itself" in message
      
        5594
        
        5595
        
        5596
        @pytest.mark.asyncio
      
        5597
        async def test_tool_batch_runner_todowrite_uses_observed_sibling_pattern_for_next_file(
      
        5598
            temp_dir: Path,
      
        5599
        ) -> None:
      
        5600
            async def assess_confidence(
      
        5601
                tool_name: str,
      
        5602
                tool_args: dict,
      
        5603
                context: str,
      
        5604
            ) -> ConfidenceAssessment:
      
        5605
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        5606
        
        5607
            async def verify_action(
      
        5608
                tool_name: str,
      
        5609
                tool_args: dict,
      
        5610
                result: str,
      
        5611
                expected: str = "",
      
        5612
            ) -> ActionVerification:
      
        5613
                raise AssertionError("Verification should not run in this scenario")
      
        5614
        
        5615
            reference_chapters = temp_dir / "fortran" / "chapters"
      
        5616
            reference_chapters.mkdir(parents=True)
      
        5617
            (reference_chapters / "01-introduction.html").write_text("<h1>Introduction</h1>\n")
      
        5618
        
        5619
            guide_root = temp_dir / "guides" / "nginx"
      
        5620
            chapters = guide_root / "chapters"
      
        5621
            guide_root.mkdir(parents=True)
      
        5622
            chapters.mkdir()
      
        5623
            index_path = guide_root / "index.html"
      
        5624
            index_path.write_text("<html></html>\n")
      
        5625
        
        5626
            implementation_plan = temp_dir / "implementation.md"
      
        5627
            implementation_plan.write_text(
      
        5628
                "\n".join(
      
        5629
                    [
      
        5630
                        "# Implementation Plan",
      
        5631
                        "",
      
        5632
                        "## File Changes",
      
        5633
                        f"- `{guide_root}/`",
      
        5634
                        f"- `{chapters}/`",
      
        5635
                        f"- `{index_path}`",
      
        5636
                        "",
      
        5637
                    ]
      
        5638
                )
      
        5639
            )
      
        5640
        
        5641
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5642
            dod.implementation_plan = str(implementation_plan)
      
        5643
            dod.pending_items = [
      
        5644
                "Write the introduction chapter",
      
        5645
                "Complete the requested work",
      
        5646
            ]
      
        5647
            dod.touched_files.append(str(index_path))
      
        5648
        
        5649
            queued_messages: list[str] = []
      
        5650
            context = build_context(
      
        5651
                temp_dir=temp_dir,
      
        5652
                messages=[
      
        5653
                    Message(
      
        5654
                        role=Role.ASSISTANT,
      
        5655
                        content="",
      
        5656
                        tool_calls=[
      
        5657
                            ToolCall(
      
        5658
                                id="read-ref-1",
      
        5659
                                name="read",
      
        5660
                                arguments={"file_path": str(reference_chapters / "01-introduction.html")},
      
        5661
                            )
      
        5662
                        ],
      
        5663
                    )
      
        5664
                ],
      
        5665
                safeguards=FakeSafeguards(),
      
        5666
                assess_confidence=assess_confidence,
      
        5667
                verify_action=verify_action,
      
        5668
                auto_recover=False,
      
        5669
            )
      
        5670
            context.queue_steering_message_callback = queued_messages.append
      
        5671
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5672
        
        5673
            tool_call = ToolCall(
      
        5674
                id="todo-observed-1",
      
        5675
                name="TodoWrite",
      
        5676
                arguments={
      
        5677
                    "todos": [
      
        5678
                        {
      
        5679
                            "content": "Write the introduction chapter",
      
        5680
                            "activeForm": "Writing the introduction chapter",
      
        5681
                            "status": "pending",
      
        5682
                        }
      
        5683
                    ]
      
        5684
                },
      
        5685
            )
      
        5686
            executor = FakeExecutor(
      
        5687
                [
      
        5688
                    tool_outcome(
      
        5689
                        tool_call=tool_call,
      
        5690
                        output="Todos updated",
      
        5691
                        is_error=False,
      
        5692
                        metadata={
      
        5693
                            "new_todos": [
      
        5694
                                {
      
        5695
                                    "content": "Write the introduction chapter",
      
        5696
                                    "active_form": "Writing the introduction chapter",
      
        5697
                                    "status": "pending",
      
        5698
                                }
      
        5699
                            ]
      
        5700
                        },
      
        5701
                    )
      
        5702
                ]
      
        5703
            )
      
        5704
        
        5705
            summary = TurnSummary(final_response="")
      
        5706
            await runner.execute_batch(
      
        5707
                tool_calls=[tool_call],
      
        5708
                tool_source="assistant",
      
        5709
                pending_tool_calls_seen=set(),
      
        5710
                emit=_noop_emit,
      
        5711
                summary=summary,
      
        5712
                dod=dod,
      
        5713
                executor=executor,  # type: ignore[arg-type]
      
        5714
                on_confirmation=None,
      
        5715
                on_user_question=None,
      
        5716
                emit_confirmation=None,
      
        5717
                consecutive_errors=0,
      
        5718
            )
      
        5719
        
        5720
            assert queued_messages
      
        5721
            message = queued_messages[-1]
      
        5722
            assert "Todo tracking is updated. Next step: create `01-introduction.html`." in message
      
        5723
            assert "Prefer one `write(file_path=..., content=...)` call" in message
      
        5724
        
        5725
        
        5726
        @pytest.mark.asyncio
      
        5727
        async def test_tool_batch_runner_bookkeeping_note_with_missing_artifact_requeues_resume_step(
      
        5728
            temp_dir: Path,
      
        5729
        ) -> None:
      
        5730
            async def assess_confidence(
      
        5731
                tool_name: str,
      
        5732
                tool_args: dict,
      
        5733
                context: str,
      
        5734
            ) -> ConfidenceAssessment:
      
        5735
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        5736
        
        5737
            async def verify_action(
      
        5738
                tool_name: str,
      
        5739
                tool_args: dict,
      
        5740
                result: str,
      
        5741
                expected: str = "",
      
        5742
            ) -> ActionVerification:
      
        5743
                raise AssertionError("Verification should not run in this scenario")
      
        5744
        
        5745
            guide_root = temp_dir / "guides" / "nginx"
      
        5746
            chapters = guide_root / "chapters"
      
        5747
            guide_root.mkdir(parents=True)
      
        5748
            chapters.mkdir()
      
        5749
            index_path = guide_root / "index.html"
      
        5750
            chapter_one = chapters / "01-getting-started.html"
      
        5751
            chapter_two = chapters / "02-installation.html"
      
        5752
            index_path.write_text("<html></html>\n")
      
        5753
            chapter_one.write_text("<h1>One</h1>\n")
      
        5754
        
        5755
            implementation_plan = temp_dir / "implementation.md"
      
        5756
            implementation_plan.write_text(
      
        5757
                "\n".join(
      
        5758
                    [
      
        5759
                        "# Implementation Plan",
      
        5760
                        "",
      
        5761
                        "## File Changes",
      
        5762
                        f"- `{guide_root}/`",
      
        5763
                        f"- `{chapters}/`",
      
        5764
                        f"- `{index_path}`",
      
        5765
                        f"- `{chapter_one}`",
      
        5766
                        f"- `{chapter_two}`",
      
        5767
                        "",
      
        5768
                    ]
      
        5769
                )
      
        5770
            )
      
        5771
        
        5772
            context = build_context(
      
        5773
                temp_dir=temp_dir,
      
        5774
                messages=[],
      
        5775
                safeguards=FakeSafeguards(),
      
        5776
                assess_confidence=assess_confidence,
      
        5777
                verify_action=verify_action,
      
        5778
                auto_recover=False,
      
        5779
            )
      
        5780
            queued_messages: list[str] = []
      
        5781
            context.queue_steering_message_callback = queued_messages.append
      
        5782
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5783
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5784
            dod.implementation_plan = str(implementation_plan)
      
        5785
            sync_todos_to_definition_of_done(
      
        5786
                dod,
      
        5787
                [
      
        5788
                    {
      
        5789
                        "content": "Create 01-getting-started.html",
      
        5790
                        "active_form": "Creating 01-getting-started.html",
      
        5791
                        "status": "completed",
      
        5792
                    },
      
        5793
                    {
      
        5794
                        "content": "Create 02-installation.html",
      
        5795
                        "active_form": "Creating 02-installation.html",
      
        5796
                        "status": "pending",
      
        5797
                    },
      
        5798
                ],
      
        5799
                project_root=temp_dir,
      
        5800
            )
      
        5801
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        5802
        
        5803
            tool_call = ToolCall(
      
        5804
                id="working-note",
      
        5805
                name="notepad_write_working",
      
        5806
                arguments={"content": "Creating the second chapter file: Installation"},
      
        5807
            )
      
        5808
            executor = FakeExecutor(
      
        5809
                [
      
        5810
                    tool_outcome(
      
        5811
                        tool_call=tool_call,
      
        5812
                        output="Working note recorded",
      
        5813
                        is_error=False,
      
        5814
                    )
      
        5815
                ]
      
        5816
            )
      
        5817
        
        5818
            summary = TurnSummary(final_response="")
      
        5819
            await runner.execute_batch(
      
        5820
                tool_calls=[tool_call],
      
        5821
                tool_source="assistant",
      
        5822
                pending_tool_calls_seen=set(),
      
        5823
                emit=_noop_emit,
      
        5824
                summary=summary,
      
        5825
                dod=dod,
      
        5826
                executor=executor,  # type: ignore[arg-type]
      
        5827
                on_confirmation=None,
      
        5828
                on_user_question=None,
      
        5829
                emit_confirmation=None,
      
        5830
                consecutive_errors=0,
      
        5831
            )
      
        5832
        
        5833
            assert queued_messages
      
        5834
            message = queued_messages[-1]
      
        5835
            assert "Bookkeeping note is recorded. A declared output artifact is still missing." in message
      
        5836
            assert "Resume by creating `02-installation.html` now." in message
      
        5837
            assert "Make your next response the concrete mutation tool call itself" in message
      
        5838
            assert "refresh `TodoWrite`" in message
      
        5839
            assert "Do not spend the next turn on additional notes, rediscovery, verification, or final confirmation" in message
      
        5840
        
        5841
        
        5842
        @pytest.mark.asyncio
      
        5843
        async def test_tool_batch_runner_working_note_respects_discovery_first_pending_step(
      
        5844
            temp_dir: Path,
      
        5845
        ) -> None:
      
        5846
            async def assess_confidence(
      
        5847
                tool_name: str,
      
        5848
                tool_args: dict,
      
        5849
                context: str,
      
        5850
            ) -> ConfidenceAssessment:
      
        5851
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        5852
        
        5853
            async def verify_action(
      
        5854
                tool_name: str,
      
        5855
                tool_args: dict,
      
        5856
                result: str,
      
        5857
                expected: str = "",
      
        5858
            ) -> ActionVerification:
      
        5859
                raise AssertionError("Verification should not run in this scenario")
      
        5860
        
        5861
            implementation_plan = temp_dir / "implementation.md"
      
        5862
            implementation_plan.write_text(
      
        5863
                "\n".join(
      
        5864
                    [
      
        5865
                        "# Implementation Plan",
      
        5866
                        "",
      
        5867
                        "## File Changes",
      
        5868
                        f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
      
        5869
                        f"- `{temp_dir / 'guides' / 'nginx' / 'chapters'}`",
      
        5870
                        "",
      
        5871
                    ]
      
        5872
                )
      
        5873
            )
      
        5874
        
        5875
            context = build_context(
      
        5876
                temp_dir=temp_dir,
      
        5877
                messages=[],
      
        5878
                safeguards=FakeSafeguards(),
      
        5879
                assess_confidence=assess_confidence,
      
        5880
                verify_action=verify_action,
      
        5881
                auto_recover=False,
      
        5882
            )
      
        5883
            queued_messages: list[str] = []
      
        5884
            context.queue_steering_message_callback = queued_messages.append
      
        5885
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5886
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5887
            dod.implementation_plan = str(implementation_plan)
      
        5888
            dod.pending_items.extend(
      
        5889
                [
      
        5890
                    "First, examine the existing fortran guide structure and content to understand the format",
      
        5891
                    "Create the nginx directory structure",
      
        5892
                    "Develop the main index.html file for the nginx guide",
      
        5893
                ]
      
        5894
            )
      
        5895
        
        5896
            tool_call = ToolCall(
      
        5897
                id="working-note",
      
        5898
                name="notepad_write_working",
      
        5899
                arguments={"content": "Analyzing the fortran guide structure before creating nginx guide"},
      
        5900
            )
      
        5901
            executor = FakeExecutor(
      
        5902
                [
      
        5903
                    tool_outcome(
      
        5904
                        tool_call=tool_call,
      
        5905
                        output="Working note recorded",
      
        5906
                        is_error=False,
      
        5907
                    )
      
        5908
                ]
      
        5909
            )
      
        5910
        
        5911
            summary = TurnSummary(final_response="")
      
        5912
            await runner.execute_batch(
      
        5913
                tool_calls=[tool_call],
      
        5914
                tool_source="assistant",
      
        5915
                pending_tool_calls_seen=set(),
      
        5916
                emit=_noop_emit,
      
        5917
                summary=summary,
      
        5918
                dod=dod,
      
        5919
                executor=executor,  # type: ignore[arg-type]
      
        5920
                on_confirmation=None,
      
        5921
                on_user_question=None,
      
        5922
                emit_confirmation=None,
      
        5923
                consecutive_errors=0,
      
        5924
            )
      
        5925
        
        5926
            assert queued_messages
      
        5927
            message = queued_messages[-1]
      
        5928
            assert (
      
        5929
                "Continue with the next pending item: `First, examine the existing fortran guide structure and content to understand the format`."
      
        5930
                in message
      
        5931
            )
      
        5932
            assert "one concrete evidence-gathering tool call" in message
      
        5933
            assert "Resume by creating `index.html` now." not in message
      
        5934
        
        5935
        
        5936
        @pytest.mark.asyncio
      
        5937
        async def test_tool_batch_runner_working_note_prefers_declared_output_gap_over_stale_discovery(
      
        5938
            temp_dir: Path,
      
        5939
        ) -> None:
      
        5940
            async def assess_confidence(
      
        5941
                tool_name: str,
      
        5942
                tool_args: dict,
      
        5943
                context: str,
      
        5944
            ) -> ConfidenceAssessment:
      
        5945
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        5946
        
        5947
            async def verify_action(
      
        5948
                tool_name: str,
      
        5949
                tool_args: dict,
      
        5950
                result: str,
      
        5951
                expected: str = "",
      
        5952
            ) -> ActionVerification:
      
        5953
                raise AssertionError("Verification should not run in this scenario")
      
        5954
        
        5955
            guide_root = temp_dir / "guides" / "nginx"
      
        5956
            chapters_dir = guide_root / "chapters"
      
        5957
            chapters_dir.mkdir(parents=True)
      
        5958
            index_path = guide_root / "index.html"
      
        5959
            first_chapter = chapters_dir / "01-introduction.html"
      
        5960
            index_path.write_text(
      
        5961
                "\n".join(
      
        5962
                    [
      
        5963
                        '<a href="chapters/01-introduction.html">Introduction</a>',
      
        5964
                        '<a href="chapters/02-installation.html">Installation</a>',
      
        5965
                        '<a href="chapters/03-configuration.html">Configuration</a>',
      
        5966
                    ]
      
        5967
                )
      
        5968
            )
      
        5969
            first_chapter.write_text("<h1>Introduction</h1>\n")
      
        5970
        
        5971
            implementation_plan = temp_dir / "implementation.md"
      
        5972
            implementation_plan.write_text(
      
        5973
                "\n".join(
      
        5974
                    [
      
        5975
                        "# Implementation Plan",
      
        5976
                        "",
      
        5977
                        "## File Changes",
      
        5978
                        f"- `{guide_root / 'index.html'}`",
      
        5979
                        f"- `{chapters_dir}/`",
      
        5980
                        "",
      
        5981
                    ]
      
        5982
                )
      
        5983
            )
      
        5984
        
        5985
            context = build_context(
      
        5986
                temp_dir=temp_dir,
      
        5987
                messages=[],
      
        5988
                safeguards=FakeSafeguards(),
      
        5989
                assess_confidence=assess_confidence,
      
        5990
                verify_action=verify_action,
      
        5991
                auto_recover=False,
      
        5992
            )
      
        5993
            queued_messages: list[str] = []
      
        5994
            context.queue_steering_message_callback = queued_messages.append
      
        5995
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        5996
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        5997
            dod.implementation_plan = str(implementation_plan)
      
        5998
            dod.pending_items.extend(
      
        5999
                [
      
        6000
                    "First, examine the existing fortran guide structure and content to understand the format",
      
        6001
                    "Create chapter files following the established pattern",
      
        6002
                ]
      
        6003
            )
      
        6004
            dod.touched_files.extend([str(index_path), str(first_chapter)])
      
        6005
        
        6006
            tool_call = ToolCall(
      
        6007
                id="working-note",
      
        6008
                name="notepad_write_working",
      
        6009
                arguments={"content": "Created index and first chapter; next is chapter 2"},
      
        6010
            )
      
        6011
            executor = FakeExecutor(
      
        6012
                [
      
        6013
                    tool_outcome(
      
        6014
                        tool_call=tool_call,
      
        6015
                        output="Working note recorded",
      
        6016
                        is_error=False,
      
        6017
                    )
      
        6018
                ]
      
        6019
            )
      
        6020
        
        6021
            summary = TurnSummary(final_response="")
      
        6022
            await runner.execute_batch(
      
        6023
                tool_calls=[tool_call],
      
        6024
                tool_source="assistant",
      
        6025
                pending_tool_calls_seen=set(),
      
        6026
                emit=_noop_emit,
      
        6027
                summary=summary,
      
        6028
                dod=dod,
      
        6029
                executor=executor,  # type: ignore[arg-type]
      
        6030
                on_confirmation=None,
      
        6031
                on_user_question=None,
      
        6032
                emit_confirmation=None,
      
        6033
                consecutive_errors=0,
      
        6034
            )
      
        6035
        
        6036
            assert queued_messages
      
        6037
            message = queued_messages[-1]
      
        6038
            assert "Bookkeeping note is recorded. A declared output artifact is still missing." in message
      
        6039
            assert "Resume by creating `02-installation.html` now." in message
      
        6040
            assert "Continue with the next pending item: `First, examine the existing fortran guide structure" not in message
      
        6041
        
        6042
        
        6043
        @pytest.mark.asyncio
      
        6044
        async def test_tool_batch_runner_shallow_glob_does_not_handoff_before_content_read(
      
        6045
            temp_dir: Path,
      
        6046
        ) -> None:
      
        6047
            async def assess_confidence(
      
        6048
                tool_name: str,
      
        6049
                tool_args: dict,
      
        6050
                context: str,
      
        6051
            ) -> ConfidenceAssessment:
      
        6052
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6053
        
        6054
            async def verify_action(
      
        6055
                tool_name: str,
      
        6056
                tool_args: dict,
      
        6057
                result: str,
      
        6058
                expected: str = "",
      
        6059
            ) -> ActionVerification:
      
        6060
                raise AssertionError("Verification should not run in this scenario")
      
        6061
        
        6062
            fortran_root = temp_dir / "Loader" / "guides" / "fortran"
      
        6063
            chapters_dir = fortran_root / "chapters"
      
        6064
            chapters_dir.mkdir(parents=True)
      
        6065
        
        6066
            implementation_plan = temp_dir / "implementation.md"
      
        6067
            implementation_plan.write_text(
      
        6068
                "\n".join(
      
        6069
                    [
      
        6070
                        "# Implementation Plan",
      
        6071
                        "",
      
        6072
                        "## File Changes",
      
        6073
                        f"- `{temp_dir / 'Loader' / 'guides' / 'nginx' / 'index.html'}`",
      
        6074
                        f"- `{temp_dir / 'Loader' / 'guides' / 'nginx' / 'chapters'}`",
      
        6075
                        "",
      
        6076
                    ]
      
        6077
                )
      
        6078
            )
      
        6079
        
        6080
            context = build_context(
      
        6081
                temp_dir=temp_dir,
      
        6082
                messages=[],
      
        6083
                safeguards=FakeSafeguards(),
      
        6084
                assess_confidence=assess_confidence,
      
        6085
                verify_action=verify_action,
      
        6086
                auto_recover=False,
      
        6087
            )
      
        6088
            queued_messages: list[str] = []
      
        6089
            context.queue_steering_message_callback = queued_messages.append
      
        6090
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6091
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        6092
            dod.implementation_plan = str(implementation_plan)
      
        6093
            dod.pending_items.extend(
      
        6094
                [
      
        6095
                    "First, examine the existing fortran guide structure and content",
      
        6096
                    "Create the nginx directory structure",
      
        6097
                    "Develop the main index.html file for nginx guide",
      
        6098
                ]
      
        6099
            )
      
        6100
        
        6101
            tool_call = ToolCall(
      
        6102
                id="glob-1",
      
        6103
                name="glob",
      
        6104
                arguments={"pattern": "**", "path": str(fortran_root)},
      
        6105
            )
      
        6106
            executor = FakeExecutor(
      
        6107
                [
      
        6108
                    tool_outcome(
      
        6109
                        tool_call=tool_call,
      
        6110
                        output=f"{fortran_root}\n{chapters_dir}",
      
        6111
                        is_error=False,
      
        6112
                    )
      
        6113
                ]
      
        6114
            )
      
        6115
        
        6116
            summary = TurnSummary(final_response="")
      
        6117
            await runner.execute_batch(
      
        6118
                tool_calls=[tool_call],
      
        6119
                tool_source="assistant",
      
        6120
                pending_tool_calls_seen=set(),
      
        6121
                emit=_noop_emit,
      
        6122
                summary=summary,
      
        6123
                dod=dod,
      
        6124
                executor=executor,  # type: ignore[arg-type]
      
        6125
                on_confirmation=None,
      
        6126
                on_user_question=None,
      
        6127
                emit_confirmation=None,
      
        6128
                consecutive_errors=0,
      
        6129
            )
      
        6130
        
        6131
            assert queued_messages == []
      
        6132
        
        6133
        
        6134
        @pytest.mark.asyncio
      
        6135
        async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_valid(
      
        6136
            temp_dir: Path,
      
        6137
        ) -> None:
      
        6138
            async def assess_confidence(
      
        6139
                tool_name: str,
      
        6140
                tool_args: dict,
      
        6141
                context: str,
      
        6142
            ) -> ConfidenceAssessment:
      
        6143
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        6144
        
        6145
            async def verify_action(
      
        6146
                tool_name: str,
      
        6147
                tool_args: dict,
      
        6148
                result: str,
      
        6149
                expected: str = "",
      
        6150
            ) -> ActionVerification:
      
        6151
                raise AssertionError("Verification should not run in this scenario")
      
        6152
        
        6153
            prompt = (
      
        6154
                "Have a look at ~/Loader/guides/fortran/index.html, then "
      
        6155
                "~/Loader/guides/fortran/chapters. The table of contents links in "
      
        6156
                "index.html are inaccurate and the href’s are wrong. Let’s update the "
      
        6157
                "links and their link texts to be correct."
      
        6158
            )
      
        6159
            chapters = temp_dir / "chapters"
      
        6160
            chapters.mkdir()
      
        6161
            (chapters / "01-introduction.html").write_text(
      
        6162
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        6163
            )
      
        6164
            (chapters / "02-setup.html").write_text(
      
        6165
                "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
      
        6166
            )
      
        6167
            current_block = (
      
        6168
                "<h2>Table of Contents</h2>\n"
      
        6169
                '        <ul class="chapter-list">\n'
      
        6170
                '            <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
      
        6171
                '            <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
      
        6172
                "        </ul>\n"
      
        6173
            )
      
        6174
            index_path = temp_dir / "index.html"
      
        6175
            index_path.write_text(current_block)
      
        6176
        
        6177
            context = build_context(
      
        6178
                temp_dir=temp_dir,
      
        6179
                messages=[],
      
        6180
                safeguards=FakeSafeguards(),
      
        6181
                assess_confidence=assess_confidence,
      
        6182
                verify_action=verify_action,
      
        6183
                auto_recover=False,
      
        6184
            )
      
        6185
            context.session.current_task = prompt  # type: ignore[attr-defined]
      
        6186
            queued_messages: list[str] = []
      
        6187
            context.queue_steering_message_callback = queued_messages.append
      
        6188
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6189
            tool_call = ToolCall(
      
        6190
                id="edit-1",
      
        6191
                name="edit",
      
        6192
                arguments={
      
        6193
                    "file_path": str(index_path),
      
        6194
                    "old_string": current_block,
      
        6195
                    "new_string": current_block,
      
        6196
                },
      
        6197
            )
      
        6198
            executor = FakeExecutor(
      
        6199
                [
      
        6200
                    tool_outcome(
      
        6201
                        tool_call=tool_call,
      
        6202
                        output=(
      
        6203
                            "[Blocked - old_string and new_string are identical - no change "
      
        6204
                            "would occur] Suggestion: Provide different old and new strings"
      
        6205
                        ),
      
        6206
                        is_error=True,
      
        6207
                        state=ToolExecutionState.BLOCKED,
      
        6208
                    )
      
        6209
                ]
      
        6210
            )
      
        6211
        
        6212
            await runner.execute_batch(
      
        6213
                tool_calls=[tool_call],
      
        6214
                tool_source="assistant",
      
        6215
                pending_tool_calls_seen=set(),
      
        6216
                emit=_noop_emit,
      
        6217
                summary=TurnSummary(final_response=""),
      
        6218
                dod=create_definition_of_done(prompt),
      
        6219
                executor=executor,  # type: ignore[arg-type]
      
        6220
                on_confirmation=None,
      
        6221
                on_user_question=None,
      
        6222
                emit_confirmation=None,
      
        6223
                consecutive_errors=0,
      
        6224
            )
      
        6225
        
        6226
            assert queued_messages == []
      
        6227
        
        6228
        
        6229
        def test_tool_batch_runner_blocked_noop_edit_nudge_stays_on_active_repair_target(
      
        6230
            temp_dir: Path,
      
        6231
        ) -> None:
      
        6232
            async def assess_confidence(
      
        6233
                tool_name: str,
      
        6234
                tool_args: dict,
      
        6235
                context: str,
      
        6236
            ) -> ConfidenceAssessment:
      
        6237
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6238
        
        6239
            async def verify_action(
      
        6240
                tool_name: str,
      
        6241
                tool_args: dict,
      
        6242
                result: str,
      
        6243
                expected: str = "",
      
        6244
            ) -> ActionVerification:
      
        6245
                raise AssertionError("Verification should not run in this scenario")
      
        6246
        
        6247
            repair_target = temp_dir / "guide" / "chapters" / "04-basic-usage.html"
      
        6248
            context = build_context(
      
        6249
                temp_dir=temp_dir,
      
        6250
                messages=[
      
        6251
                    Message(
      
        6252
                        role=Role.ASSISTANT,
      
        6253
                        content=(
      
        6254
                            "Repair focus:\n"
      
        6255
                            f"- Fix the broken local reference `05-advanced-topics.html` in `{repair_target}`.\n"
      
        6256
                            f"- Immediate next step: edit `{repair_target}`.\n"
      
        6257
                            f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '05-advanced-topics.html'}`; otherwise remove or replace `05-advanced-topics.html`.\n"
      
        6258
                        ),
      
        6259
                    )
      
        6260
                ],
      
        6261
                safeguards=FakeSafeguards(),
      
        6262
                assess_confidence=assess_confidence,
      
        6263
                verify_action=verify_action,
      
        6264
            )
      
        6265
            queued: list[str] = []
      
        6266
            context.queue_steering_message_callback = queued.append
      
        6267
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6268
            dod = create_definition_of_done("Repair a guide page.")
      
        6269
        
        6270
            runner._queue_blocked_html_edit_nudge(
      
        6271
                ToolCall(
      
        6272
                    id="edit-1",
      
        6273
                    name="edit",
      
        6274
                    arguments={
      
        6275
                        "file_path": str(repair_target),
      
        6276
                        "old_string": "same",
      
        6277
                        "new_string": "same",
      
        6278
                    },
      
        6279
                ),
      
        6280
                "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
      
        6281
                dod=dod,
      
        6282
            )
      
        6283
        
        6284
            assert queued
      
        6285
            assert str(repair_target) in queued[0]
      
        6286
            assert "no on-disk change" in queued[0]
      
        6287
            assert "replace the surrounding block" in queued[0]
      
        6288
            assert "Do not reopen unrelated reference materials" in queued[0]
      
        6289
        
        6290
        
        6291
        def test_tool_batch_runner_blocked_noop_edit_after_full_build_prefers_verification(
      
        6292
            temp_dir: Path,
      
        6293
        ) -> None:
      
        6294
            async def assess_confidence(
      
        6295
                tool_name: str,
      
        6296
                tool_args: dict,
      
        6297
                context: str,
      
        6298
            ) -> ConfidenceAssessment:
      
        6299
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6300
        
        6301
            async def verify_action(
      
        6302
                tool_name: str,
      
        6303
                tool_args: dict,
      
        6304
                result: str,
      
        6305
                expected: str = "",
      
        6306
            ) -> ActionVerification:
      
        6307
                raise AssertionError("Verification should not run in this scenario")
      
        6308
        
        6309
            guide_root = temp_dir / "guide"
      
        6310
            chapters = guide_root / "chapters"
      
        6311
            chapters.mkdir(parents=True)
      
        6312
            index_path = guide_root / "index.html"
      
        6313
            chapter_one = chapters / "01-introduction.html"
      
        6314
            index_path.write_text("<html></html>\n")
      
        6315
            chapter_one.write_text("<html></html>\n")
      
        6316
        
        6317
            implementation_plan = temp_dir / "implementation.md"
      
        6318
            implementation_plan.write_text(
      
        6319
                "\n".join(
      
        6320
                    [
      
        6321
                        "# Implementation Plan",
      
        6322
                        "",
      
        6323
                        "## File Changes",
      
        6324
                        f"- `{index_path}`",
      
        6325
                        f"- `{chapter_one}`",
      
        6326
                        "",
      
        6327
                    ]
      
        6328
                )
      
        6329
            )
      
        6330
        
        6331
            context = build_context(
      
        6332
                temp_dir=temp_dir,
      
        6333
                messages=[
      
        6334
                    Message(
      
        6335
                        role=Role.ASSISTANT,
      
        6336
                        content=(
      
        6337
                            "Repair focus:\n"
      
        6338
                            f"- Confirm the final guide state in `{index_path}`.\n"
      
        6339
                            f"- Immediate next step: verify `{index_path}` if no concrete mismatch remains.\n"
      
        6340
                        ),
      
        6341
                    )
      
        6342
                ],
      
        6343
                safeguards=FakeSafeguards(),
      
        6344
                assess_confidence=assess_confidence,
      
        6345
                verify_action=verify_action,
      
        6346
            )
      
        6347
            queued: list[str] = []
      
        6348
            context.queue_steering_message_callback = queued.append
      
        6349
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6350
        
        6351
            dod = create_definition_of_done("Create a multi-file guide.")
      
        6352
            dod.implementation_plan = str(implementation_plan)
      
        6353
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        6354
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        6355
        
        6356
            runner._queue_blocked_html_edit_nudge(
      
        6357
                ToolCall(
      
        6358
                    id="edit-1",
      
        6359
                    name="edit",
      
        6360
                    arguments={
      
        6361
                        "file_path": str(index_path),
      
        6362
                        "old_string": "same",
      
        6363
                        "new_string": "same",
      
        6364
                    },
      
        6365
                ),
      
        6366
                "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
      
        6367
                dod=dod,
      
        6368
            )
      
        6369
        
        6370
            assert queued
      
        6371
            assert "All explicitly planned artifacts already exist." in queued[0]
      
        6372
            assert "Move to verification or final confirmation using the files already on disk." in queued[0]
      
        6373
            assert "replace the surrounding block" not in queued[0]
      
        6374
        
        6375
        
        6376
        async def _noop_emit(event: AgentEvent) -> None:
      
        6377
            return None
      
        6378
        
        6379
        
        6380
        @pytest.mark.asyncio
      
        6381
        async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
      
        6382
            temp_dir: Path,
      
        6383
        ) -> None:
      
        6384
            async def assess_confidence(
      
        6385
                tool_name: str,
      
        6386
                tool_args: dict,
      
        6387
                context: str,
      
        6388
            ) -> ConfidenceAssessment:
      
        6389
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6390
        
        6391
            async def verify_action(
      
        6392
                tool_name: str,
      
        6393
                tool_args: dict,
      
        6394
                result: str,
      
        6395
                expected: str = "",
      
        6396
            ) -> ActionVerification:
      
        6397
                raise AssertionError("Verification should not run for this scenario")
      
        6398
        
        6399
            context = build_context(
      
        6400
                temp_dir=temp_dir,
      
        6401
                messages=[],
      
        6402
                safeguards=FakeSafeguards(),
      
        6403
                assess_confidence=assess_confidence,
      
        6404
                verify_action=verify_action,
      
        6405
            )
      
        6406
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6407
            tool_call = ToolCall(
      
        6408
                id="write-1",
      
        6409
                name="write",
      
        6410
                arguments={"file_path": str(temp_dir / "README.md"), "content": "updated\n"},
      
        6411
            )
      
        6412
            executor = FakeExecutor(
      
        6413
                [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
      
        6414
            )
      
        6415
            summary = TurnSummary(final_response="")
      
        6416
            dod = create_definition_of_done("Update README and verify it still works.")
      
        6417
            events: list[AgentEvent] = []
      
        6418
        
        6419
            async def emit(event: AgentEvent) -> None:
      
        6420
                events.append(event)
      
        6421
        
        6422
            await runner.execute_batch(
      
        6423
                tool_calls=[tool_call],
      
        6424
                tool_source="assistant",
      
        6425
                pending_tool_calls_seen=set(),
      
        6426
                emit=emit,
      
        6427
                summary=summary,
      
        6428
                dod=dod,
      
        6429
                executor=executor,  # type: ignore[arg-type]
      
        6430
                on_confirmation=None,
      
        6431
                on_user_question=None,
      
        6432
                emit_confirmation=None,
      
        6433
                consecutive_errors=0,
      
        6434
            )
      
        6435
        
        6436
            assert dod.last_verification_result == "planned"
      
        6437
            assert dod.verification_commands
      
        6438
            assert "Collect verification evidence" in dod.pending_items
      
        6439
            assert dod.active_verification_attempt_id == "verification-attempt-1"
      
        6440
            assert dod.active_verification_attempt_number == 1
      
        6441
            assert summary.workflow_timeline[-1].reason_code == "verification_planned"
      
        6442
            assert summary.workflow_timeline[-1].policy_outcome == "planned"
      
        6443
            assert summary.workflow_timeline[-1].verification_observations[0].status == "planned"
      
        6444
            assert (
      
        6445
                summary.workflow_timeline[-1].verification_observations[0].attempt_id
      
        6446
                == "verification-attempt-1"
      
        6447
            )
      
        6448
            assert (
      
        6449
                summary.workflow_timeline[-1].verification_observations[0].attempt_number == 1
      
        6450
            )
      
        6451
        
        6452
        
        6453
        @pytest.mark.asyncio
      
        6454
        async def test_tool_batch_runner_does_not_mark_verification_planned_after_setup_only_mkdir(
      
        6455
            temp_dir: Path,
      
        6456
        ) -> None:
      
        6457
            async def assess_confidence(
      
        6458
                tool_name: str,
      
        6459
                tool_args: dict,
      
        6460
                context: str,
      
        6461
            ) -> ConfidenceAssessment:
      
        6462
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6463
        
        6464
            async def verify_action(
      
        6465
                tool_name: str,
      
        6466
                tool_args: dict,
      
        6467
                result: str,
      
        6468
                expected: str = "",
      
        6469
            ) -> ActionVerification:
      
        6470
                raise AssertionError("Verification should not run in this scenario")
      
        6471
        
        6472
            context = build_context(
      
        6473
                temp_dir=temp_dir,
      
        6474
                messages=[],
      
        6475
                safeguards=FakeSafeguards(),
      
        6476
                assess_confidence=assess_confidence,
      
        6477
                verify_action=verify_action,
      
        6478
            )
      
        6479
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6480
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        6481
            chapters = nginx_root / "chapters"
      
        6482
            implementation_plan = temp_dir / "implementation.md"
      
        6483
            implementation_plan.write_text(
      
        6484
                "\n".join(
      
        6485
                    [
      
        6486
                        "# Implementation Plan",
      
        6487
                        "",
      
        6488
                        "## File Changes",
      
        6489
                        f"- `{chapters}/`",
      
        6490
                        f"- `{nginx_root / 'index.html'}`",
      
        6491
                        "",
      
        6492
                    ]
      
        6493
                )
      
        6494
            )
      
        6495
        
        6496
            tool_call = ToolCall(
      
        6497
                id="mkdir-1",
      
        6498
                name="bash",
      
        6499
                arguments={"command": f"mkdir -p {chapters}"},
      
        6500
            )
      
        6501
            executor = FakeExecutor(
      
        6502
                [tool_outcome(tool_call=tool_call, output="", is_error=False)]
      
        6503
            )
      
        6504
            summary = TurnSummary(final_response="")
      
        6505
            dod = create_definition_of_done("Create an equally thorough nginx guide with chapters.")
      
        6506
            dod.implementation_plan = str(implementation_plan)
      
        6507
            events: list[AgentEvent] = []
      
        6508
        
        6509
            async def emit(event: AgentEvent) -> None:
      
        6510
                events.append(event)
      
        6511
        
        6512
            await runner.execute_batch(
      
        6513
                tool_calls=[tool_call],
      
        6514
                tool_source="assistant",
      
        6515
                pending_tool_calls_seen=set(),
      
        6516
                emit=emit,
      
        6517
                summary=summary,
      
        6518
                dod=dod,
      
        6519
                executor=executor,  # type: ignore[arg-type]
      
        6520
                on_confirmation=None,
      
        6521
                on_user_question=None,
      
        6522
                emit_confirmation=None,
      
        6523
                consecutive_errors=0,
      
        6524
            )
      
        6525
        
        6526
            assert dod.last_verification_result is None
      
        6527
            assert "Collect verification evidence" not in dod.pending_items
      
        6528
            assert not any(
      
        6529
                entry.reason_code == "verification_planned" for entry in summary.workflow_timeline
      
        6530
            )
      
        6531
        
        6532
        
        6533
        @pytest.mark.asyncio
      
        6534
        async def test_tool_batch_runner_does_not_mark_verification_planned_while_chapter_build_pending(
      
        6535
            temp_dir: Path,
      
        6536
        ) -> None:
      
        6537
            async def assess_confidence(
      
        6538
                tool_name: str,
      
        6539
                tool_args: dict,
      
        6540
                context: str,
      
        6541
            ) -> ConfidenceAssessment:
      
        6542
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6543
        
        6544
            async def verify_action(
      
        6545
                tool_name: str,
      
        6546
                tool_args: dict,
      
        6547
                result: str,
      
        6548
                expected: str = "",
      
        6549
            ) -> ActionVerification:
      
        6550
                raise AssertionError("Verification should not run in this scenario")
      
        6551
        
        6552
            context = build_context(
      
        6553
                temp_dir=temp_dir,
      
        6554
                messages=[],
      
        6555
                safeguards=FakeSafeguards(),
      
        6556
                assess_confidence=assess_confidence,
      
        6557
                verify_action=verify_action,
      
        6558
            )
      
        6559
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6560
            nginx_root = temp_dir / "Loader" / "guides" / "nginx"
      
        6561
            chapters = nginx_root / "chapters"
      
        6562
            chapters.mkdir(parents=True)
      
        6563
            index_path = nginx_root / "index.html"
      
        6564
            implementation_plan = temp_dir / "implementation.md"
      
        6565
            implementation_plan.write_text(
      
        6566
                "\n".join(
      
        6567
                    [
      
        6568
                        "# Implementation Plan",
      
        6569
                        "",
      
        6570
                        "## File Changes",
      
        6571
                        f"- `{nginx_root}/`",
      
        6572
                        f"- `{chapters}/`",
      
        6573
                        f"- `{index_path}`",
      
        6574
                        "",
      
        6575
                    ]
      
        6576
                )
      
        6577
            )
      
        6578
        
        6579
            tool_call = ToolCall(
      
        6580
                id="write-index",
      
        6581
                name="write",
      
        6582
                arguments={"file_path": str(index_path), "content": "<html></html>\n"},
      
        6583
            )
      
        6584
            executor = FakeExecutor(
      
        6585
                [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
      
        6586
            )
      
        6587
            summary = TurnSummary(final_response="")
      
        6588
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        6589
            dod.implementation_plan = str(implementation_plan)
      
        6590
            dod.pending_items.extend(
      
        6591
                [
      
        6592
                    "Develop the main index.html file with proper structure",
      
        6593
                    "Create first nginx chapter",
      
        6594
                ]
      
        6595
            )
      
        6596
            events: list[AgentEvent] = []
      
        6597
        
        6598
            async def emit(event: AgentEvent) -> None:
      
        6599
                events.append(event)
      
        6600
        
        6601
            await runner.execute_batch(
      
        6602
                tool_calls=[tool_call],
      
        6603
                tool_source="assistant",
      
        6604
                pending_tool_calls_seen=set(),
      
        6605
                emit=emit,
      
        6606
                summary=summary,
      
        6607
                dod=dod,
      
        6608
                executor=executor,  # type: ignore[arg-type]
      
        6609
                on_confirmation=None,
      
        6610
                on_user_question=None,
      
        6611
                emit_confirmation=None,
      
        6612
                consecutive_errors=0,
      
        6613
            )
      
        6614
        
        6615
            assert dod.last_verification_result is None
      
        6616
            assert "Collect verification evidence" not in dod.pending_items
      
        6617
            assert "Create first nginx chapter" in dod.pending_items
      
        6618
            assert not any(
      
        6619
                entry.reason_code == "verification_planned" for entry in summary.workflow_timeline
      
        6620
            )
      
        6621
        
        6622
        
        6623
        @pytest.mark.asyncio
      
        6624
        async def test_tool_batch_runner_marks_passed_verification_stale_after_new_mutation(
      
        6625
            temp_dir: Path,
      
        6626
        ) -> None:
      
        6627
            async def assess_confidence(
      
        6628
                tool_name: str,
      
        6629
                tool_args: dict,
      
        6630
                context: str,
      
        6631
            ) -> ConfidenceAssessment:
      
        6632
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6633
        
        6634
            async def verify_action(
      
        6635
                tool_name: str,
      
        6636
                tool_args: dict,
      
        6637
                result: str,
      
        6638
                expected: str = "",
      
        6639
            ) -> ActionVerification:
      
        6640
                raise AssertionError("Verification should not run for this scenario")
      
        6641
        
        6642
            context = build_context(
      
        6643
                temp_dir=temp_dir,
      
        6644
                messages=[],
      
        6645
                safeguards=FakeSafeguards(),
      
        6646
                assess_confidence=assess_confidence,
      
        6647
                verify_action=verify_action,
      
        6648
            )
      
        6649
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6650
            tool_call = ToolCall(
      
        6651
                id="write-1",
      
        6652
                name="write",
      
        6653
                arguments={"file_path": str(temp_dir / "README.md"), "content": "updated\n"},
      
        6654
            )
      
        6655
            executor = FakeExecutor(
      
        6656
                [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
      
        6657
            )
      
        6658
            summary = TurnSummary(final_response="")
      
        6659
            dod = create_definition_of_done("Update README and verify it still works.")
      
        6660
            dod.verification_commands = ["uv run pytest -q"]
      
        6661
            dod.last_verification_result = "passed"
      
        6662
            dod.verification_attempt_counter = 1
      
        6663
            dod.active_verification_attempt_id = "verification-attempt-1"
      
        6664
            dod.active_verification_attempt_number = 1
      
        6665
            dod.evidence = [
      
        6666
                VerificationEvidence(
      
        6667
                    command="uv run pytest -q",
      
        6668
                    passed=True,
      
        6669
                    stdout="401 passed",
      
        6670
                    kind="test",
      
        6671
                )
      
        6672
            ]
      
        6673
            dod.completed_items.append("Collect verification evidence")
      
        6674
            events: list[AgentEvent] = []
      
        6675
        
        6676
            async def emit(event: AgentEvent) -> None:
      
        6677
                events.append(event)
      
        6678
        
        6679
            await runner.execute_batch(
      
        6680
                tool_calls=[tool_call],
      
        6681
                tool_source="assistant",
      
        6682
                pending_tool_calls_seen=set(),
      
        6683
                emit=emit,
      
        6684
                summary=summary,
      
        6685
                dod=dod,
      
        6686
                executor=executor,  # type: ignore[arg-type]
      
        6687
                on_confirmation=None,
      
        6688
                on_user_question=None,
      
        6689
                emit_confirmation=None,
      
        6690
                consecutive_errors=0,
      
        6691
            )
      
        6692
        
        6693
            assert dod.last_verification_result == "stale"
      
        6694
            assert dod.evidence == []
      
        6695
            assert "Collect verification evidence" in dod.pending_items
      
        6696
            assert "Collect verification evidence" not in dod.completed_items
      
        6697
            assert dod.active_verification_attempt_id == "verification-attempt-2"
      
        6698
            assert dod.active_verification_attempt_number == 2
      
        6699
            assert summary.workflow_timeline[-1].reason_code == "verification_stale"
      
        6700
            assert summary.workflow_timeline[-1].policy_outcome == "stale"
      
        6701
            assert summary.workflow_timeline[-1].verification_observations[0].status == "stale"
      
        6702
            assert (
      
        6703
                summary.workflow_timeline[-1].verification_observations[0].attempt_id
      
        6704
                == "verification-attempt-1"
      
        6705
            )
      
        6706
            assert (
      
        6707
                summary.workflow_timeline[-1].verification_observations[0].attempt_number == 1
      
        6708
            )
      
        6709
            assert (
      
        6710
                summary.workflow_timeline[-1].verification_observations[0].supersedes_attempt_id
      
        6711
                == "verification-attempt-2"
      
        6712
            )
      
        6713
            assert (
      
        6714
                summary.workflow_timeline[-1].verification_observations[0].command
      
        6715
                == "uv run pytest -q"
      
        6716
            )
      
        6717
        
        6718
        
        6719
        def test_tool_batch_runner_blocked_active_repair_nudge_uses_repair_scope(temp_dir: Path) -> None:
      
        6720
            async def assess_confidence(
      
        6721
                tool_name: str,
      
        6722
                tool_args: dict,
      
        6723
                context: str,
      
        6724
            ) -> ConfidenceAssessment:
      
        6725
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6726
        
        6727
            async def verify_action(
      
        6728
                tool_name: str,
      
        6729
                tool_args: dict,
      
        6730
                result: str,
      
        6731
                expected: str = "",
      
        6732
            ) -> ActionVerification:
      
        6733
                raise AssertionError("Verification should not run in this scenario")
      
        6734
        
        6735
            repair_target = temp_dir / "guide" / "index.html"
      
        6736
            context = build_context(
      
        6737
                temp_dir=temp_dir,
      
        6738
                messages=[
      
        6739
                    Message(
      
        6740
                        role=Role.ASSISTANT,
      
        6741
                        content=(
      
        6742
                            "Repair focus:\n"
      
        6743
                            f"- Fix the broken local reference `chapters/01-getting-started.html` in `{repair_target}`.\n"
      
        6744
                            f"- Immediate next step: edit `{repair_target}`.\n"
      
        6745
                            f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`; otherwise remove or replace `chapters/01-getting-started.html`.\n"
      
        6746
                        ),
      
        6747
                    )
      
        6748
                ],
      
        6749
                safeguards=FakeSafeguards(),
      
        6750
                assess_confidence=assess_confidence,
      
        6751
                verify_action=verify_action,
      
        6752
            )
      
        6753
            queued: list[str] = []
      
        6754
            context.queue_steering_message_callback = queued.append
      
        6755
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6756
        
        6757
            runner._queue_blocked_active_repair_nudge(
      
        6758
                "[Blocked - active repair scope: verification already identified the repair target.]"
      
        6759
            )
      
        6760
        
        6761
            assert queued
      
        6762
            assert str(repair_target) in queued[0]
      
        6763
            assert str(temp_dir / "guide" / "chapters" / "01-getting-started.html") in queued[0]
      
        6764
            assert "Do not reopen unrelated reference materials" in queued[0]
      
        6765
        
        6766
        
        6767
        def test_tool_batch_runner_blocked_active_repair_mutation_nudge_uses_allowed_paths(
      
        6768
            temp_dir: Path,
      
        6769
        ) -> None:
      
        6770
            async def assess_confidence(
      
        6771
                tool_name: str,
      
        6772
                tool_args: dict,
      
        6773
                context: str,
      
        6774
            ) -> ConfidenceAssessment:
      
        6775
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6776
        
        6777
            async def verify_action(
      
        6778
                tool_name: str,
      
        6779
                tool_args: dict,
      
        6780
                result: str,
      
        6781
                expected: str = "",
      
        6782
            ) -> ActionVerification:
      
        6783
                raise AssertionError("Verification should not run in this scenario")
      
        6784
        
        6785
            repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
      
        6786
            stylesheet = temp_dir / "guide" / "styles.css"
      
        6787
            context = build_context(
      
        6788
                temp_dir=temp_dir,
      
        6789
                messages=[
      
        6790
                    Message(
      
        6791
                        role=Role.ASSISTANT,
      
        6792
                        content=(
      
        6793
                            "Repair focus:\n"
      
        6794
                            f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
      
        6795
                            f"- Immediate next step: edit `{repair_target}`.\n"
      
        6796
                            f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
      
        6797
                        ),
      
        6798
                    )
      
        6799
                ],
      
        6800
                safeguards=FakeSafeguards(),
      
        6801
                assess_confidence=assess_confidence,
      
        6802
                verify_action=verify_action,
      
        6803
            )
      
        6804
            queued: list[str] = []
      
        6805
            context.queue_steering_message_callback = queued.append
      
        6806
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6807
        
        6808
            runner._queue_blocked_active_repair_mutation_nudge(
      
        6809
                "[Blocked - active repair mutation scope: verification already identified the repair target.]"
      
        6810
            )
      
        6811
        
        6812
            assert queued
      
        6813
            assert str(repair_target) in queued[0]
      
        6814
            assert str(stylesheet) in queued[0]
      
        6815
            assert "before widening the change set" in queued[0]
      
        6816
        
        6817
        
        6818
        def test_tool_batch_runner_blocked_late_reference_drift_nudge_points_to_missing_artifact(
      
        6819
            temp_dir: Path,
      
        6820
        ) -> None:
      
        6821
            async def assess_confidence(
      
        6822
                tool_name: str,
      
        6823
                tool_args: dict,
      
        6824
                context: str,
      
        6825
            ) -> ConfidenceAssessment:
      
        6826
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6827
        
        6828
            async def verify_action(
      
        6829
                tool_name: str,
      
        6830
                tool_args: dict,
      
        6831
                result: str,
      
        6832
                expected: str = "",
      
        6833
            ) -> ActionVerification:
      
        6834
                raise AssertionError("Verification should not run in this scenario")
      
        6835
        
        6836
            context = build_context(
      
        6837
                temp_dir=temp_dir,
      
        6838
                messages=[],
      
        6839
                safeguards=FakeSafeguards(),
      
        6840
                assess_confidence=assess_confidence,
      
        6841
                verify_action=verify_action,
      
        6842
            )
      
        6843
            queued: list[str] = []
      
        6844
            context.queue_steering_message_callback = queued.append
      
        6845
            store = DefinitionOfDoneStore(temp_dir)
      
        6846
            dod = create_definition_of_done("Create a multi-file guide from a reference")
      
        6847
            plan_path = temp_dir / "implementation.md"
      
        6848
            plan_path.write_text(
      
        6849
                "# File Changes\n"
      
        6850
                "- `guide/index.html`\n"
      
        6851
                "- `guide/chapters/01-getting-started.html`\n"
      
        6852
                "- `guide/chapters/02-installation.html`\n"
      
        6853
                "- `guide/chapters/03-first-website.html`\n"
      
        6854
            )
      
        6855
            dod.implementation_plan = str(plan_path)
      
        6856
            (temp_dir / "guide" / "chapters").mkdir(parents=True, exist_ok=True)
      
        6857
            (temp_dir / "guide" / "index.html").write_text("index")
      
        6858
            (temp_dir / "guide" / "chapters" / "01-getting-started.html").write_text("one")
      
        6859
            (temp_dir / "guide" / "chapters" / "02-installation.html").write_text("two")
      
        6860
            runner = ToolBatchRunner(context, store)
      
        6861
        
        6862
            runner._queue_blocked_late_reference_drift_nudge(
      
        6863
                "[Blocked - late reference drift: several planned artifacts already exist.]",
      
        6864
                dod=dod,
      
        6865
            )
      
        6866
        
        6867
            assert queued
      
        6868
            assert "03-first-website.html" in queued[0]
      
        6869
            assert "older reference materials" in queued[0]
      
        6870
        
        6871
        
        6872
        def test_tool_batch_runner_blocked_completed_artifact_scope_nudge_prefers_verification(
      
        6873
            temp_dir: Path,
      
        6874
        ) -> None:
      
        6875
            async def assess_confidence(
      
        6876
                tool_name: str,
      
        6877
                tool_args: dict,
      
        6878
                context: str,
      
        6879
            ) -> ConfidenceAssessment:
      
        6880
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6881
        
        6882
            async def verify_action(
      
        6883
                tool_name: str,
      
        6884
                tool_args: dict,
      
        6885
                result: str,
      
        6886
                expected: str = "",
      
        6887
            ) -> ActionVerification:
      
        6888
                raise AssertionError("Verification should not run in this scenario")
      
        6889
        
        6890
            guide_root = temp_dir / "guide"
      
        6891
            chapters = guide_root / "chapters"
      
        6892
            guide_root.mkdir(parents=True)
      
        6893
            chapters.mkdir()
      
        6894
            index_path = guide_root / "index.html"
      
        6895
            chapter_one = chapters / "01-getting-started.html"
      
        6896
            chapter_two = chapters / "02-installation.html"
      
        6897
            index_path.write_text("index")
      
        6898
            chapter_one.write_text("one")
      
        6899
            chapter_two.write_text("two")
      
        6900
        
        6901
            implementation_plan = temp_dir / "implementation.md"
      
        6902
            implementation_plan.write_text(
      
        6903
                "\n".join(
      
        6904
                    [
      
        6905
                        "# Implementation Plan",
      
        6906
                        "",
      
        6907
                        "## File Changes",
      
        6908
                        f"- `{guide_root}`",
      
        6909
                        f"- `{chapters}`",
      
        6910
                        f"- `{index_path}`",
      
        6911
                        f"- `{chapter_one}`",
      
        6912
                        f"- `{chapter_two}`",
      
        6913
                        "",
      
        6914
                    ]
      
        6915
                )
      
        6916
            )
      
        6917
        
        6918
            context = build_context(
      
        6919
                temp_dir=temp_dir,
      
        6920
                messages=[],
      
        6921
                safeguards=FakeSafeguards(),
      
        6922
                assess_confidence=assess_confidence,
      
        6923
                verify_action=verify_action,
      
        6924
            )
      
        6925
            queued: list[str] = []
      
        6926
            context.queue_steering_message_callback = queued.append
      
        6927
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        6928
            dod = create_definition_of_done("Create a multi-file guide from a reference")
      
        6929
            dod.implementation_plan = str(implementation_plan)
      
        6930
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        6931
            sync_todos_to_definition_of_done(
      
        6932
                dod,
      
        6933
                [
      
        6934
                    {
      
        6935
                        "content": "Verify all guide files are linked and complete",
      
        6936
                        "active_form": "Working on: Verify all guide files are linked and complete",
      
        6937
                        "status": "pending",
      
        6938
                    }
      
        6939
                ],
      
        6940
                project_root=temp_dir,
      
        6941
            )
      
        6942
        
        6943
            runner._queue_blocked_completed_artifact_scope_nudge(
      
        6944
                "[Blocked - completed artifact set scope: all explicitly planned artifacts already exist.]",
      
        6945
                dod=dod,
      
        6946
            )
      
        6947
        
        6948
            assert queued
      
        6949
            assert context.workflow_mode == "verify"
      
        6950
            assert "All explicitly planned artifacts already exist." in queued[0]
      
        6951
            assert "Verify all guide files are linked and complete" in queued[0]
      
        6952
            assert "Do not reopen earlier reference materials." in queued[0]
      
        6953
            assert "Verification should run next" in queued[0]
      
        6954
        
        6955
        
        6956
        def test_tool_batch_runner_blocked_post_build_audit_nudge_switches_to_verify(
      
        6957
            temp_dir: Path,
      
        6958
        ) -> None:
      
        6959
            async def assess_confidence(
      
        6960
                tool_name: str,
      
        6961
                tool_args: dict,
      
        6962
                context: str,
      
        6963
            ) -> ConfidenceAssessment:
      
        6964
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        6965
        
        6966
            async def verify_action(
      
        6967
                tool_name: str,
      
        6968
                tool_args: dict,
      
        6969
                result: str,
      
        6970
                expected: str = "",
      
        6971
            ) -> ActionVerification:
      
        6972
                raise AssertionError("Verification should not run in this scenario")
      
        6973
        
        6974
            guide_root = temp_dir / "guide"
      
        6975
            chapters = guide_root / "chapters"
      
        6976
            guide_root.mkdir(parents=True)
      
        6977
            chapters.mkdir()
      
        6978
            index_path = guide_root / "index.html"
      
        6979
            chapter_one = chapters / "01-getting-started.html"
      
        6980
            chapter_two = chapters / "02-installation.html"
      
        6981
            index_path.write_text("index")
      
        6982
            chapter_one.write_text("one")
      
        6983
            chapter_two.write_text("two")
      
        6984
        
        6985
            implementation_plan = temp_dir / "implementation.md"
      
        6986
            implementation_plan.write_text(
      
        6987
                "\n".join(
      
        6988
                    [
      
        6989
                        "# Implementation Plan",
      
        6990
                        "",
      
        6991
                        "## File Changes",
      
        6992
                        f"- `{guide_root}`",
      
        6993
                        f"- `{chapters}`",
      
        6994
                        f"- `{index_path}`",
      
        6995
                        f"- `{chapter_one}`",
      
        6996
                        f"- `{chapter_two}`",
      
        6997
                        "",
      
        6998
                    ]
      
        6999
                )
      
        7000
            )
      
        7001
        
        7002
            context = build_context(
      
        7003
                temp_dir=temp_dir,
      
        7004
                messages=[],
      
        7005
                safeguards=FakeSafeguards(),
      
        7006
                assess_confidence=assess_confidence,
      
        7007
                verify_action=verify_action,
      
        7008
            )
      
        7009
            queued: list[str] = []
      
        7010
            context.queue_steering_message_callback = queued.append
      
        7011
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        7012
            dod = create_definition_of_done("Create a multi-file guide from a reference")
      
        7013
            dod.implementation_plan = str(implementation_plan)
      
        7014
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        7015
        
        7016
            runner._queue_blocked_completed_artifact_scope_nudge(
      
        7017
                "[Blocked - post-build audit loop: all explicitly planned artifacts already exist.]",
      
        7018
                dod=dod,
      
        7019
            )
      
        7020
        
        7021
            assert queued
      
        7022
            assert context.workflow_mode == "verify"
      
        7023
            assert "All explicitly planned artifacts already exist." in queued[0]
      
        7024
            assert "move to verification or final confirmation" in queued[0]
      
        7025
        
        7026
        
        7027
        @pytest.mark.asyncio
      
        7028
        async def test_tool_batch_runner_does_not_halt_on_repeated_post_build_audit_blocks(
      
        7029
            temp_dir: Path,
      
        7030
        ) -> None:
      
        7031
            async def assess_confidence(
      
        7032
                tool_name: str,
      
        7033
                tool_args: dict,
      
        7034
                context: str,
      
        7035
            ) -> ConfidenceAssessment:
      
        7036
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        7037
        
        7038
            async def verify_action(
      
        7039
                tool_name: str,
      
        7040
                tool_args: dict,
      
        7041
                result: str,
      
        7042
                expected: str = "",
      
        7043
            ) -> ActionVerification:
      
        7044
                raise AssertionError("Verification should not run in this scenario")
      
        7045
        
        7046
            guide_root = temp_dir / "guide"
      
        7047
            chapters = guide_root / "chapters"
      
        7048
            guide_root.mkdir(parents=True)
      
        7049
            chapters.mkdir()
      
        7050
            index_path = guide_root / "index.html"
      
        7051
            chapter_one = chapters / "01-getting-started.html"
      
        7052
            chapter_two = chapters / "02-installation.html"
      
        7053
            index_path.write_text("index")
      
        7054
            chapter_one.write_text("one")
      
        7055
            chapter_two.write_text("two")
      
        7056
        
        7057
            implementation_plan = temp_dir / "implementation.md"
      
        7058
            implementation_plan.write_text(
      
        7059
                "\n".join(
      
        7060
                    [
      
        7061
                        "# Implementation Plan",
      
        7062
                        "",
      
        7063
                        "## File Changes",
      
        7064
                        f"- `{guide_root}`",
      
        7065
                        f"- `{chapters}`",
      
        7066
                        f"- `{index_path}`",
      
        7067
                        f"- `{chapter_one}`",
      
        7068
                        f"- `{chapter_two}`",
      
        7069
                        "",
      
        7070
                    ]
      
        7071
                )
      
        7072
            )
      
        7073
        
        7074
            context = build_context(
      
        7075
                temp_dir=temp_dir,
      
        7076
                messages=[],
      
        7077
                safeguards=FakeSafeguards(),
      
        7078
                assess_confidence=assess_confidence,
      
        7079
                verify_action=verify_action,
      
        7080
            )
      
        7081
            queued: list[str] = []
      
        7082
            context.queue_steering_message_callback = queued.append
      
        7083
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        7084
            dod = create_definition_of_done("Create a multi-file guide from a reference")
      
        7085
            dod.implementation_plan = str(implementation_plan)
      
        7086
            dod.verification_commands = [f"ls -la {guide_root}"]
      
        7087
        
        7088
            blocked_message = (
      
        7089
                "[Blocked - post-build audit loop: all explicitly planned artifacts already exist.]"
      
        7090
            )
      
        7091
            tool_calls = [
      
        7092
                ToolCall(
      
        7093
                    id=f"audit-{index}",
      
        7094
                    name="bash",
      
        7095
                    arguments={"command": f"cd {temp_dir} && ls -la guide/chapters/"},
      
        7096
                )
      
        7097
                for index in range(1, 4)
      
        7098
            ]
      
        7099
            executor = FakeExecutor(
      
        7100
                [
      
        7101
                    tool_outcome(
      
        7102
                        tool_call=tool_call,
      
        7103
                        output=blocked_message,
      
        7104
                        is_error=True,
      
        7105
                        state=ToolExecutionState.BLOCKED,
      
        7106
                    )
      
        7107
                    for tool_call in tool_calls
      
        7108
                ]
      
        7109
            )
      
        7110
            events: list[AgentEvent] = []
      
        7111
        
        7112
            async def emit(event: AgentEvent) -> None:
      
        7113
                events.append(event)
      
        7114
        
        7115
            result = await runner.execute_batch(
      
        7116
                tool_calls=tool_calls,
      
        7117
                tool_source="native",
      
        7118
                pending_tool_calls_seen=set(),
      
        7119
                emit=emit,
      
        7120
                summary=TurnSummary(final_response=""),
      
        7121
                dod=dod,
      
        7122
                executor=executor,
      
        7123
                on_confirmation=None,
      
        7124
                on_user_question=None,
      
        7125
                emit_confirmation=None,
      
        7126
                consecutive_errors=0,
      
        7127
            )
      
        7128
        
        7129
            assert result.halted is False
      
        7130
            assert result.consecutive_errors == 0
      
        7131
            assert context.workflow_mode == "verify"
      
        7132
            assert queued
      
        7133
            assert any("move to verification or final confirmation" in message for message in queued)
      
        7134
        
        7135
        
        7136
        def test_tool_batch_runner_blocked_html_declared_target_nudge_uses_closest_declared_target(
      
        7137
            temp_dir: Path,
      
        7138
        ) -> None:
      
        7139
            async def assess_confidence(
      
        7140
                tool_name: str,
      
        7141
                tool_args: dict,
      
        7142
                context: str,
      
        7143
            ) -> ConfidenceAssessment:
      
        7144
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        7145
        
        7146
            async def verify_action(
      
        7147
                tool_name: str,
      
        7148
                tool_args: dict,
      
        7149
                result: str,
      
        7150
                expected: str = "",
      
        7151
            ) -> ActionVerification:
      
        7152
                raise AssertionError("Verification should not run in this scenario")
      
        7153
        
        7154
            context = build_context(
      
        7155
                temp_dir=temp_dir,
      
        7156
                messages=[],
      
        7157
                safeguards=FakeSafeguards(),
      
        7158
                assess_confidence=assess_confidence,
      
        7159
                verify_action=verify_action,
      
        7160
            )
      
        7161
            queued: list[str] = []
      
        7162
            context.queue_steering_message_callback = queued.append
      
        7163
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        7164
        
        7165
            runner._queue_blocked_html_declared_target_nudge(
      
        7166
                ToolCall(
      
        7167
                    id="write-ch1",
      
        7168
                    name="write",
      
        7169
                    arguments={"file_path": str(temp_dir / "guide" / "chapters" / "01-introduction.html")},
      
        7170
                ),
      
        7171
                (
      
        7172
                    "[Blocked - HTML page introduces new local targets outside the current declared artifact set] "
      
        7173
                    "Suggestion: Keep non-root HTML pages within the root-declared local-link set and avoid "
      
        7174
                    "introducing new sibling targets that the guide root does not declare, for example fix: 02-setup.html. "
      
        7175
                    "Already-declared local targets include: chapters/01-introduction.html, chapters/02-installation.html, "
      
        7176
                    "chapters/03-configuration.html. Closest declared local targets include: chapters/02-installation.html"
      
        7177
                ),
      
        7178
            )
      
        7179
        
        7180
            assert queued
      
        7181
            assert str(temp_dir / "guide" / "chapters" / "01-introduction.html") in queued[0]
      
        7182
            assert "`chapters/02-installation.html`" in queued[0]
      
        7183
            assert "same file now" in queued[0]
      
        7184
        
        7185
        
        7186
        def test_tool_batch_runner_blocked_html_declared_target_nudge_without_close_match(
      
        7187
            temp_dir: Path,
      
        7188
        ) -> None:
      
        7189
            async def assess_confidence(
      
        7190
                tool_name: str,
      
        7191
                tool_args: dict,
      
        7192
                context: str,
      
        7193
            ) -> ConfidenceAssessment:
      
        7194
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        7195
        
        7196
            async def verify_action(
      
        7197
                tool_name: str,
      
        7198
                tool_args: dict,
      
        7199
                result: str,
      
        7200
                expected: str = "",
      
        7201
            ) -> ActionVerification:
      
        7202
                raise AssertionError("Verification should not run in this scenario")
      
        7203
        
        7204
            context = build_context(
      
        7205
                temp_dir=temp_dir,
      
        7206
                messages=[],
      
        7207
                safeguards=FakeSafeguards(),
      
        7208
                assess_confidence=assess_confidence,
      
        7209
                verify_action=verify_action,
      
        7210
            )
      
        7211
            queued: list[str] = []
      
        7212
            context.queue_steering_message_callback = queued.append
      
        7213
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        7214
        
        7215
            runner._queue_blocked_html_declared_target_nudge(
      
        7216
                ToolCall(
      
        7217
                    id="write-ch1",
      
        7218
                    name="write",
      
        7219
                    arguments={"file_path": str(temp_dir / "guide" / "chapters" / "introduction.html")},
      
        7220
                ),
      
        7221
                (
      
        7222
                    "[Blocked - HTML page introduces new local targets outside the current declared artifact set] "
      
        7223
                    "Suggestion: Keep non-root HTML pages within the root-declared local-link set and avoid "
      
        7224
                    "introducing new sibling targets that the guide root does not declare; remove or replace "
      
        7225
                    "undeclared hrefs like: troubleshooting.html. "
      
        7226
                    "Already-declared local targets include: chapters/introduction.html, chapters/installation.html, "
      
        7227
                    "chapters/configuration.html."
      
        7228
                ),
      
        7229
            )
      
        7230
        
        7231
            assert queued
      
        7232
            assert "Remove the invented hrefs or keep local links within the declared target set" in queued[0]
      
        7233
            assert "`chapters/installation.html`" in queued[0]
      
        7234
            assert "closest declared target(s)" not in queued[0]
      
        7235
        
        7236
        
        7237
        def test_tool_batch_runner_blocked_html_declared_file_creation_nudge_points_to_root(
      
        7238
            temp_dir: Path,
      
        7239
        ) -> None:
      
        7240
            async def assess_confidence(
      
        7241
                tool_name: str,
      
        7242
                tool_args: dict,
      
        7243
                context: str,
      
        7244
            ) -> ConfidenceAssessment:
      
        7245
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        7246
        
        7247
            async def verify_action(
      
        7248
                tool_name: str,
      
        7249
                tool_args: dict,
      
        7250
                result: str,
      
        7251
                expected: str = "",
      
        7252
            ) -> ActionVerification:
      
        7253
                raise AssertionError("Verification should not run in this scenario")
      
        7254
        
        7255
            context = build_context(
      
        7256
                temp_dir=temp_dir,
      
        7257
                messages=[],
      
        7258
                safeguards=FakeSafeguards(),
      
        7259
                assess_confidence=assess_confidence,
      
        7260
                verify_action=verify_action,
      
        7261
            )
      
        7262
            queued: list[str] = []
      
        7263
            context.queue_steering_message_callback = queued.append
      
        7264
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        7265
            dod = create_definition_of_done("Create a guide.")
      
        7266
        
        7267
            target = temp_dir / "guide" / "chapters" / "troubleshooting.html"
      
        7268
            runner._queue_blocked_html_declared_file_creation_nudge(
      
        7269
                ToolCall(
      
        7270
                    id="write-troubleshooting",
      
        7271
                    name="write",
      
        7272
                    arguments={"file_path": str(target)},
      
        7273
                ),
      
        7274
                (
      
        7275
                    "[Blocked - HTML file creation falls outside the current declared artifact set] "
      
        7276
                    "Suggestion: Keep new non-root HTML files within the root-declared artifact set and "
      
        7277
                    f"update the guide root `{(temp_dir / 'guide' / 'index.html').resolve(strict=False)}` "
      
        7278
                    "before creating undeclared sibling pages, for example: chapters/troubleshooting.html. "
      
        7279
                    "Already-declared local targets include: chapters/advanced-topics.html, "
      
        7280
                    "chapters/basic-usage.html, chapters/configuration.html"
      
        7281
                ),
      
        7282
                dod=dod,
      
        7283
            )
      
        7284
        
        7285
            assert queued
      
        7286
            assert "update" in queued[0].lower()
      
        7287
            assert str((temp_dir / "guide" / "index.html").resolve(strict=False)) in queued[0]
      
        7288
            assert "`chapters/troubleshooting.html`" in queued[0]
      
        7289
            assert "retry the file creation" in queued[0]
      
        7290
        
        7291
        
        7292
        def test_tool_batch_runner_blocked_html_declared_file_creation_after_outputs_exist_prefers_verify(
      
        7293
            temp_dir: Path,
      
        7294
        ) -> None:
      
        7295
            async def assess_confidence(
      
        7296
                tool_name: str,
      
        7297
                tool_args: dict,
      
        7298
                context: str,
      
        7299
            ) -> ConfidenceAssessment:
      
        7300
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        7301
        
        7302
            async def verify_action(
      
        7303
                tool_name: str,
      
        7304
                tool_args: dict,
      
        7305
                result: str,
      
        7306
                expected: str = "",
      
        7307
            ) -> ActionVerification:
      
        7308
                raise AssertionError("Verification should not run in this scenario")
      
        7309
        
        7310
            guide = temp_dir / "guide"
      
        7311
            chapters = guide / "chapters"
      
        7312
            guide.mkdir()
      
        7313
            chapters.mkdir()
      
        7314
            index = guide / "index.html"
      
        7315
            index.write_text(
      
        7316
                "\n".join(
      
        7317
                    [
      
        7318
                        '<a href="chapters/01-introduction.html">Intro</a>',
      
        7319
                        '<a href="chapters/02-installation.html">Install</a>',
      
        7320
                        '<a href="../index.html">Back</a>',
      
        7321
                        "",
      
        7322
                    ]
      
        7323
                )
      
        7324
            )
      
        7325
            (chapters / "01-introduction.html").write_text("<html></html>\n")
      
        7326
            (chapters / "02-installation.html").write_text("<html></html>\n")
      
        7327
        
        7328
            implementation_plan = temp_dir / "implementation.md"
      
        7329
            implementation_plan.write_text(
      
        7330
                "\n".join(
      
        7331
                    [
      
        7332
                        "# Implementation Plan",
      
        7333
                        "",
      
        7334
                        "## File Changes",
      
        7335
                        f"- `{index}`",
      
        7336
                        f"- `{chapters / '01-introduction.html'}`",
      
        7337
                        f"- `{chapters / '02-installation.html'}`",
      
        7338
                        "",
      
        7339
                    ]
      
        7340
                )
      
        7341
            )
      
        7342
        
        7343
            context = build_context(
      
        7344
                temp_dir=temp_dir,
      
        7345
                messages=[],
      
        7346
                safeguards=FakeSafeguards(),
      
        7347
                assess_confidence=assess_confidence,
      
        7348
                verify_action=verify_action,
      
        7349
            )
      
        7350
            queued: list[str] = []
      
        7351
            context.queue_steering_message_callback = queued.append
      
        7352
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        7353
            dod = create_definition_of_done("Create a guide.")
      
        7354
            dod.implementation_plan = str(implementation_plan)
      
        7355
            dod.verification_commands = [f"ls -la {guide}"]
      
        7356
            dod.touched_files = [str(index), str(chapters / "01-introduction.html"), str(chapters / "02-installation.html")]
      
        7357
        
        7358
            target = guide / "chapters" / "08-advanced-configuration.html"
      
        7359
            runner._queue_blocked_html_declared_file_creation_nudge(
      
        7360
                ToolCall(
      
        7361
                    id="write-extra",
      
        7362
                    name="write",
      
        7363
                    arguments={"file_path": str(target)},
      
        7364
                ),
      
        7365
                (
      
        7366
                    "[Blocked - HTML file creation falls outside the current declared artifact set] "
      
        7367
                    "Suggestion: Keep new non-root HTML files within the root-declared artifact set and "
      
        7368
                    f"update the guide root `{index.resolve(strict=False)}` before creating undeclared sibling pages, "
      
        7369
                    "for example: chapters/08-advanced-configuration.html."
      
        7370
                ),
      
        7371
                dod=dod,
      
        7372
            )
      
        7373
        
        7374
            assert queued
      
        7375
            assert "All explicitly planned artifacts already exist on disk." in queued[0]
      
        7376
            assert "Do not expand the output set with `chapters/08-advanced-configuration.html`." in queued[0]
      
        7377
            assert "Move to verification or final confirmation using the files already on disk." in queued[0]
      
        7378
            assert "update the guide root" not in queued[0]
      
        7379
        
        7380
        
        7381
        def test_tool_batch_runner_blocked_html_declared_file_creation_prefers_closest_target(
      
        7382
            temp_dir: Path,
      
        7383
        ) -> None:
      
        7384
            async def assess_confidence(
      
        7385
                tool_name: str,
      
        7386
                tool_args: dict,
      
        7387
                context: str,
      
        7388
            ) -> ConfidenceAssessment:
      
        7389
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        7390
        
        7391
            async def verify_action(
      
        7392
                tool_name: str,
      
        7393
                tool_args: dict,
      
        7394
                result: str,
      
        7395
                expected: str = "",
      
        7396
            ) -> ActionVerification:
      
        7397
                raise AssertionError("Verification should not run in this scenario")
      
        7398
        
        7399
            context = build_context(
      
        7400
                temp_dir=temp_dir,
      
        7401
                messages=[],
      
        7402
                safeguards=FakeSafeguards(),
      
        7403
                assess_confidence=assess_confidence,
      
        7404
                verify_action=verify_action,
      
        7405
            )
      
        7406
            queued: list[str] = []
      
        7407
            context.queue_steering_message_callback = queued.append
      
        7408
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        7409
            dod = create_definition_of_done("Create a guide.")
      
        7410
        
        7411
            target = temp_dir / "guide" / "chapters" / "02-basics.html"
      
        7412
            runner._queue_blocked_html_declared_file_creation_nudge(
      
        7413
                ToolCall(
      
        7414
                    id="write-basics",
      
        7415
                    name="write",
      
        7416
                    arguments={"file_path": str(target)},
      
        7417
                ),
      
        7418
                (
      
        7419
                    "[Blocked - HTML file creation falls outside the current declared artifact set] "
      
        7420
                    "Suggestion: Keep new non-root HTML files within the root-declared artifact set. "
      
        7421
                    "Do not create undeclared sibling page `chapters/02-basics.html`; use the closest declared local target instead. "
      
        7422
                    "Already-declared local targets include: chapters/01-introduction.html, "
      
        7423
                    "chapters/02-installation.html, chapters/03-basic-configuration.html. "
      
        7424
                    "Closest declared local targets include: chapters/02-installation.html"
      
        7425
                ),
      
        7426
                dod=dod,
      
        7427
            )
      
        7428
        
        7429
            assert queued
      
        7430
            assert "Do not create `chapters/02-basics.html`." in queued[0]
      
        7431
            assert "closest declared target instead: `chapters/02-installation.html`" in queued[0]
      
        7432
            assert "Already-declared local targets include:" in queued[0]
      
        7433
            assert "update the guide root" not in queued[0]
      
        7434
        
        7435
        
        7436
        def test_tool_batch_runner_blocked_html_missing_target_after_outputs_exist_prefers_verify(
      
        7437
            temp_dir: Path,
      
        7438
        ) -> None:
      
        7439
            async def assess_confidence(
      
        7440
                tool_name: str,
      
        7441
                tool_args: dict,
      
        7442
                context: str,
      
        7443
            ) -> ConfidenceAssessment:
      
        7444
                raise AssertionError("Confidence scoring should not run in this scenario")
      
        7445
        
        7446
            async def verify_action(
      
        7447
                tool_name: str,
      
        7448
                tool_args: dict,
      
        7449
                result: str,
      
        7450
                expected: str = "",
      
        7451
            ) -> ActionVerification:
      
        7452
                raise AssertionError("Verification should not run in this scenario")
      
        7453
        
        7454
            guide = temp_dir / "guide"
      
        7455
            chapters = guide / "chapters"
      
        7456
            guide.mkdir()
      
        7457
            chapters.mkdir()
      
        7458
            index = guide / "index.html"
      
        7459
            index.write_text(
      
        7460
                "\n".join(
      
        7461
                    [
      
        7462
                        '<a href="chapters/01-introduction.html">Intro</a>',
      
        7463
                        '<a href="chapters/02-installation.html">Install</a>',
      
        7464
                        '<a href="../index.html">Back</a>',
      
        7465
                        "",
      
        7466
                    ]
      
        7467
                )
      
        7468
            )
      
        7469
            (chapters / "01-introduction.html").write_text("<html></html>\n")
      
        7470
            (chapters / "02-installation.html").write_text("<html></html>\n")
      
        7471
        
        7472
            implementation_plan = temp_dir / "implementation.md"
      
        7473
            implementation_plan.write_text(
      
        7474
                "\n".join(
      
        7475
                    [
      
        7476
                        "# Implementation Plan",
      
        7477
                        "",
      
        7478
                        "## File Changes",
      
        7479
                        f"- `{index}`",
      
        7480
                        f"- `{chapters / '01-introduction.html'}`",
      
        7481
                        f"- `{chapters / '02-installation.html'}`",
      
        7482
                        "",
      
        7483
                    ]
      
        7484
                )
      
        7485
            )
      
        7486
        
        7487
            context = build_context(
      
        7488
                temp_dir=temp_dir,
      
        7489
                messages=[],
      
        7490
                safeguards=FakeSafeguards(),
      
        7491
                assess_confidence=assess_confidence,
      
        7492
                verify_action=verify_action,
      
        7493
            )
      
        7494
            queued: list[str] = []
      
        7495
            context.queue_steering_message_callback = queued.append
      
        7496
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        7497
            dod = create_definition_of_done("Create a guide.")
      
        7498
            dod.implementation_plan = str(implementation_plan)
      
        7499
            dod.verification_commands = [f"ls -la {guide}"]
      
        7500
            dod.touched_files = [str(index), str(chapters / "01-introduction.html"), str(chapters / "02-installation.html")]
      
        7501
        
        7502
            runner._queue_blocked_html_missing_target_nudge(
      
        7503
                ToolCall(
      
        7504
                    id="edit-root",
      
        7505
                    name="edit",
      
        7506
                    arguments={"file_path": str(index)},
      
        7507
                ),
      
        7508
                (
      
        7509
                    "[Blocked - Edited HTML links point to files that do not exist] "
      
        7510
                    "Suggestion: Use only existing local targets for href values and avoid introducing missing links. "
      
        7511
                    "Broken href(s): chapters/08-advanced-configuration.html. "
      
        7512
                    "Replace them with an existing local target or remove the broken link."
      
        7513
                ),
      
        7514
                dod=dod,
      
        7515
            )
      
        7516
        
        7517
            assert queued
      
        7518
            assert "All explicitly planned artifacts already exist on disk." in queued[0]
      
        7519
            assert f"Stay on `{index}`." in queued[0]
      
        7520
            assert "Do not introduce new local-link targets beyond the current output set." in queued[0]
      
        7521
            assert "Repair the existing generated files instead of expanding the guide." in queued[0]
      
        7522
            assert "Replace broken hrefs with existing local targets or remove the broken link." in queued[0]
      
        7523
        
        7524
        
        7525
        @pytest.mark.asyncio
      
        7526
        async def test_tool_batch_runner_blocked_empty_file_path_nudges_concrete_next_artifact(
      
        7527
            temp_dir: Path,
      
        7528
        ) -> None:
      
        7529
            async def assess_confidence(
      
        7530
                tool_name: str,
      
        7531
                tool_args: dict,
      
        7532
                context: str,
      
        7533
            ) -> ConfidenceAssessment:
      
        7534
                raise AssertionError("Confidence scoring should be disabled in this scenario")
      
        7535
        
        7536
            async def verify_action(
      
        7537
                tool_name: str,
      
        7538
                tool_args: dict,
      
        7539
                result: str,
      
        7540
                expected: str = "",
      
        7541
            ) -> ActionVerification:
      
        7542
                raise AssertionError("Verification should not run in this scenario")
      
        7543
        
        7544
            guide_root = temp_dir / "guides" / "nginx"
      
        7545
            chapters = guide_root / "chapters"
      
        7546
            chapters.mkdir(parents=True)
      
        7547
            index_path = guide_root / "index.html"
      
        7548
            chapter_one = chapters / "01-introduction.html"
      
        7549
            chapter_two = chapters / "02-installation.html"
      
        7550
            index_path.write_text("<html></html>\n")
      
        7551
            chapter_one.write_text("<h1>Intro</h1>\n")
      
        7552
        
        7553
            implementation_plan = temp_dir / "implementation.md"
      
        7554
            implementation_plan.write_text(
      
        7555
                "\n".join(
      
        7556
                    [
      
        7557
                        "# Implementation Plan",
      
        7558
                        "",
      
        7559
                        "## File Changes",
      
        7560
                        f"- `{index_path}`",
      
        7561
                        f"- `{chapter_one}`",
      
        7562
                        f"- `{chapter_two}`",
      
        7563
                        "",
      
        7564
                    ]
      
        7565
                )
      
        7566
            )
      
        7567
        
        7568
            context = build_context(
      
        7569
                temp_dir=temp_dir,
      
        7570
                messages=[],
      
        7571
                safeguards=FakeSafeguards(),
      
        7572
                assess_confidence=assess_confidence,
      
        7573
                verify_action=verify_action,
      
        7574
                auto_recover=False,
      
        7575
            )
      
        7576
            queued: list[str] = []
      
        7577
            context.queue_steering_message_callback = queued.append
      
        7578
            runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
      
        7579
            tool_call = ToolCall(
      
        7580
                id="write-2",
      
        7581
                name="write",
      
        7582
                arguments={"file_path": "", "content": "<html></html>\n"},
      
        7583
            )
      
        7584
            blocked_message = "[Blocked - Empty file path] Suggestion: Provide a valid file path"
      
        7585
            executor = FakeExecutor(
      
        7586
                [
      
        7587
                    ToolExecutionOutcome(
      
        7588
                        tool_call=tool_call,
      
        7589
                        state=ToolExecutionState.BLOCKED,
      
        7590
                        message=Message.tool_result_message(
      
        7591
                            tool_call_id=tool_call.id,
      
        7592
                            display_content=blocked_message,
      
        7593
                            result_content=blocked_message,
      
        7594
                            is_error=True,
      
        7595
                        ),
      
        7596
                        event_content=blocked_message,
      
        7597
                        is_error=True,
      
        7598
                        result_output=blocked_message,
      
        7599
                    )
      
        7600
                ]
      
        7601
            )
      
        7602
            dod = create_definition_of_done("Create a multi-file nginx guide.")
      
        7603
            dod.implementation_plan = str(implementation_plan)
      
        7604
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        7605
            dod.pending_items.append("Creating Chapter 2: Installation and Setup")
      
        7606
        
        7607
            await runner.execute_batch(
      
        7608
                tool_calls=[tool_call],
      
        7609
                tool_source="assistant",
      
        7610
                pending_tool_calls_seen=set(),
      
        7611
                emit=_noop_emit,
      
        7612
                summary=TurnSummary(final_response=""),
      
        7613
                dod=dod,
      
        7614
                executor=executor,  # type: ignore[arg-type]
      
        7615
                on_confirmation=None,
      
        7616
                on_user_question=None,
      
        7617
                emit_confirmation=None,
      
        7618
                consecutive_errors=0,
      
        7619
            )
      
        7620
        
        7621
            assert queued
      
        7622
            assert "did not provide a valid `file_path`" in queued[0]
      
        7623
            assert "Resume by creating `02-installation.html` now." in queued[0]
      
        7624
            assert (
      
        7625
                f"Prefer one `write` call for `{display_runtime_path(chapter_two)}` instead of more rereads."
      
        7626
                in queued[0]
      
        7627
            )
      
        7628
            assert context.recovery_context is not None
      
        7629
            assert context.recovery_context.attempts[-1].error == blocked_message