loader Public

Watch 0 Fork 0 Star 0
Python · 42568 bytes Raw Blame History
  
        1
        """Tests for finalization helpers on RuntimeContext."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        from pathlib import Path
      
        6
        from types import SimpleNamespace
      
        7
        
        8
        import pytest
      
        9
        
        10
        from loader.llm.base import Message, Role, ToolCall
      
        11
        from loader.runtime.completion_trace import CompletionTraceEntry
      
        12
        from loader.runtime.context import RuntimeContext
      
        13
        from loader.runtime.dod import (
      
        14
            DefinitionOfDoneStore,
      
        15
            VerificationEvidence,
      
        16
            create_definition_of_done,
      
        17
        )
      
        18
        from loader.runtime.events import TurnSummary
      
        19
        from loader.runtime.executor import ToolExecutionOutcome, ToolExecutionState
      
        20
        from loader.runtime.finalization import (
      
        21
            TurnFinalizer,
      
        22
            _build_verification_repair_guidance,
      
        23
        )
      
        24
        from loader.runtime.permissions import (
      
        25
            PermissionMode,
      
        26
            build_permission_policy,
      
        27
            load_permission_rules,
      
        28
        )
      
        29
        from loader.runtime.repair_focus import extract_active_repair_context
      
        30
        from loader.runtime.tracing import RuntimeTracer
      
        31
        from loader.runtime.verification_observations import VerificationObservationStatus
      
        32
        from loader.tools.base import ToolResult as RegistryToolResult
      
        33
        from loader.tools.base import create_default_registry
      
        34
        from tests.helpers.runtime_harness import ScriptedBackend
      
        35
        
        36
        
        37
        class FakeSession:
      
        38
            def __init__(self) -> None:
      
        39
                self.messages: list[Message] = []
      
        40
                self.session_id = "session-test-123"
      
        41
                self.recorded_calls: list[dict[str, object]] = []
      
        42
                self.last_completion_decision_code = "verification_passed"
      
        43
                self.last_completion_decision_summary = (
      
        44
                    "accepted the response after verification evidence passed"
      
        45
                )
      
        46
                self.completion_trace = [
      
        47
                    CompletionTraceEntry(
      
        48
                        stage="definition_of_done",
      
        49
                        outcome="complete",
      
        50
                        decision_code="verification_passed",
      
        51
                        decision_summary="accepted the response after verification evidence passed",
      
        52
                    )
      
        53
                ]
      
        54
                self.last_turn_transition_summary = (
      
        55
                    "completion -> finalize [terminal] Finalizing completed turn"
      
        56
                )
      
        57
                self.workflow_timeline = []
      
        58
        
        59
            def append(self, message: Message) -> None:
      
        60
                self.messages.append(message)
      
        61
        
        62
            def append_workflow_timeline_entry(self, entry) -> None:
      
        63
                self.workflow_timeline.append(entry)
      
        64
        
        65
            def record_turn_usage(
      
        66
                self,
      
        67
                usage: dict[str, int],
      
        68
                *,
      
        69
                tool_calls: int,
      
        70
                iterations: int,
      
        71
            ) -> dict[str, int]:
      
        72
                payload = {
      
        73
                    "usage": dict(usage),
      
        74
                    "tool_calls": tool_calls,
      
        75
                    "iterations": iterations,
      
        76
                }
      
        77
                self.recorded_calls.append(payload)
      
        78
                return {"turns": 1, "tool_calls": tool_calls, "iterations": iterations}
      
        79
        
        80
        
        81
        class FakeCodeFilter:
      
        82
            def reset(self) -> None:
      
        83
                return None
      
        84
        
        85
        
        86
        class FakeSafeguards:
      
        87
            def __init__(self) -> None:
      
        88
                self.action_tracker = object()
      
        89
                self.validator = object()
      
        90
                self.code_filter = FakeCodeFilter()
      
        91
        
        92
            def filter_stream_chunk(self, content: str) -> str:
      
        93
                return content
      
        94
        
        95
            def filter_complete_content(self, content: str) -> str:
      
        96
                return content
      
        97
        
        98
            def should_steer(self) -> bool:
      
        99
                return False
      
        100
        
        101
            def get_steering_message(self) -> str | None:
      
        102
                return None
      
        103
        
        104
            def record_response(self, content: str) -> None:
      
        105
                return None
      
        106
        
        107
            def detect_text_loop(self, content: str) -> tuple[bool, str]:
      
        108
                return False, ""
      
        109
        
        110
            def detect_loop(self) -> tuple[bool, str]:
      
        111
                return False, ""
      
        112
        
        113
        
        114
        class FakeExecutor:
      
        115
            def __init__(self, outcomes: list[ToolExecutionOutcome]) -> None:
      
        116
                self._outcomes = list(outcomes)
      
        117
        
        118
            async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
      
        119
                if not self._outcomes:
      
        120
                    raise AssertionError("No fake verification outcome queued")
      
        121
                return self._outcomes.pop(0)
      
        122
        
        123
        
        124
        class RecordingExecutor:
      
        125
            def __init__(self) -> None:
      
        126
                self.commands: list[str] = []
      
        127
        
        128
            async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
      
        129
                command = str(tool_call.arguments.get("command", ""))
      
        130
                self.commands.append(command)
      
        131
                return tool_outcome(
      
        132
                    tool_call=tool_call,
      
        133
                    output="ok",
      
        134
                    is_error=False,
      
        135
                    exit_code=0,
      
        136
                    stdout="ok",
      
        137
                )
      
        138
        
        139
        
        140
        class SelectiveRecordingExecutor:
      
        141
            def __init__(self, failing_match: str) -> None:
      
        142
                self.commands: list[str] = []
      
        143
                self.failing_match = failing_match
      
        144
        
        145
            async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
      
        146
                command = str(tool_call.arguments.get("command", ""))
      
        147
                self.commands.append(command)
      
        148
                failed = self.failing_match in command
      
        149
                return tool_outcome(
      
        150
                    tool_call=tool_call,
      
        151
                    output="failed" if failed else "ok",
      
        152
                    is_error=failed,
      
        153
                    exit_code=1 if failed else 0,
      
        154
                    stdout="" if failed else "ok",
      
        155
                    stderr="failed" if failed else "",
      
        156
                )
      
        157
        
        158
        
        159
        def build_context(temp_dir: Path, session: FakeSession) -> RuntimeContext:
      
        160
            registry = create_default_registry(temp_dir)
      
        161
            registry.configure_workspace_root(temp_dir)
      
        162
            rule_status = load_permission_rules(temp_dir)
      
        163
            policy = build_permission_policy(
      
        164
                active_mode=PermissionMode.WORKSPACE_WRITE,
      
        165
                workspace_root=temp_dir,
      
        166
                tool_requirements=registry.get_tool_requirements(),
      
        167
                rules=rule_status.rules,
      
        168
            )
      
        169
            return RuntimeContext(
      
        170
                project_root=temp_dir,
      
        171
                backend=ScriptedBackend(),
      
        172
                registry=registry,
      
        173
                session=session,  # type: ignore[arg-type]
      
        174
                config=SimpleNamespace(
      
        175
                    force_react=False,
      
        176
                    verification_retry_budget=3,
      
        177
                    reasoning=SimpleNamespace(
      
        178
                        rollback=False,
      
        179
                        show_rollback_plan=False,
      
        180
                        completion_check=True,
      
        181
                        use_quick_completion=True,
      
        182
                        max_continuation_prompts=5,
      
        183
                        self_critique=False,
      
        184
                        confidence_scoring=False,
      
        185
                        min_confidence_for_action=3,
      
        186
                        verification=False,
      
        187
                    ),
      
        188
                ),
      
        189
                capability_profile=SimpleNamespace(supports_native_tools=True),  # type: ignore[arg-type]
      
        190
                project_context=None,
      
        191
                permission_policy=policy,
      
        192
                permission_config_status=rule_status,
      
        193
                workflow_mode="execute",
      
        194
                safeguards=FakeSafeguards(),
      
        195
            )
      
        196
        
        197
        
        198
        def tool_outcome(
      
        199
            *,
      
        200
            tool_call: ToolCall,
      
        201
            output: str,
      
        202
            is_error: bool,
      
        203
            exit_code: int,
      
        204
            stdout: str = "",
      
        205
            stderr: str = "",
      
        206
        ) -> ToolExecutionOutcome:
      
        207
            return ToolExecutionOutcome(
      
        208
                tool_call=tool_call,
      
        209
                state=ToolExecutionState.EXECUTED,
      
        210
                message=Message.tool_result_message(
      
        211
                    tool_call_id=tool_call.id,
      
        212
                    display_content=output,
      
        213
                    result_content=output,
      
        214
                    is_error=is_error,
      
        215
                ),
      
        216
                event_content=output,
      
        217
                is_error=is_error,
      
        218
                result_output=output,
      
        219
                registry_result=RegistryToolResult(
      
        220
                    output=output,
      
        221
                    is_error=is_error,
      
        222
                    metadata={
      
        223
                        "exit_code": exit_code,
      
        224
                        "stdout": stdout,
      
        225
                        "stderr": stderr,
      
        226
                    },
      
        227
                ),
      
        228
            )
      
        229
        
        230
        
        231
        async def _noop_set_workflow_mode(mode, dod, emit, summary) -> None:
      
        232
            return None
      
        233
        
        234
        
        235
        def test_turn_finalizer_finalize_summary_uses_runtime_context(
      
        236
            temp_dir: Path,
      
        237
            monkeypatch: pytest.MonkeyPatch,
      
        238
        ) -> None:
      
        239
            session = FakeSession()
      
        240
            context = build_context(temp_dir, session)
      
        241
            tracer = RuntimeTracer()
      
        242
            tracer.record("turn.completed", reason="done")
      
        243
            finalizer = TurnFinalizer(
      
        244
                context,
      
        245
                tracer,
      
        246
                DefinitionOfDoneStore(temp_dir),
      
        247
                set_workflow_mode=_noop_set_workflow_mode,
      
        248
            )
      
        249
            dod = create_definition_of_done("Finish the task")
      
        250
            dod.status = "done"
      
        251
            summary = TurnSummary(
      
        252
                final_response="All set.",
      
        253
                definition_of_done=dod,
      
        254
                iterations=2,
      
        255
                usage={"prompt_tokens": 10},
      
        256
                tool_result_messages=[Message(role=Role.TOOL, content="tool output")],
      
        257
            )
      
        258
            captured: dict[str, str] = {}
      
        259
        
        260
            def capture_definition_of_done(self, summary_text: str) -> None:
      
        261
                captured["summary"] = summary_text
      
        262
        
        263
            monkeypatch.setattr(
      
        264
                "loader.runtime.finalization.MemoryStore.capture_definition_of_done",
      
        265
                capture_definition_of_done,
      
        266
            )
      
        267
        
        268
            final_summary = finalizer.finalize_summary(summary)
      
        269
        
        270
            assert final_summary.session_id == "session-test-123"
      
        271
            assert final_summary.cumulative_usage == {"turns": 1, "tool_calls": 1, "iterations": 2}
      
        272
            assert session.recorded_calls == [
      
        273
                {
      
        274
                    "usage": {"prompt_tokens": 10, "tool_calls": 1, "iterations": 2},
      
        275
                    "tool_calls": 1,
      
        276
                    "iterations": 2,
      
        277
                }
      
        278
            ]
      
        279
            assert "summary" in captured
      
        280
            assert final_summary.trace
      
        281
            assert final_summary.completion_decision_code == "verification_passed"
      
        282
            assert final_summary.completion_decision_summary == (
      
        283
                "accepted the response after verification evidence passed"
      
        284
            )
      
        285
            assert [entry.decision_code for entry in final_summary.completion_trace] == [
      
        286
                "verification_passed"
      
        287
            ]
      
        288
        
        289
        
        290
        def test_verification_repair_guidance_uses_existing_artifacts_as_source_of_truth(
      
        291
            temp_dir: Path,
      
        292
        ) -> None:
      
        293
            guide_root = temp_dir / "guides" / "nginx"
      
        294
            chapters = guide_root / "chapters"
      
        295
            chapters.mkdir(parents=True)
      
        296
            index_path = guide_root / "index.html"
      
        297
            chapter_one = chapters / "01-getting-started.html"
      
        298
            chapter_two = chapters / "02-installation.html"
      
        299
            chapter_three = chapters / "03-first-website.html"
      
        300
            chapter_four = chapters / "04-configuration-basics.html"
      
        301
        
        302
            for path in (index_path, chapter_one, chapter_two, chapter_three, chapter_four):
      
        303
                path.write_text("<html></html>\n")
      
        304
        
        305
            implementation_plan = temp_dir / "implementation.md"
      
        306
            implementation_plan.write_text(
      
        307
                "\n".join(
      
        308
                    [
      
        309
                        "# Implementation Plan",
      
        310
                        "",
      
        311
                        "## File Changes",
      
        312
                        f"- `{guide_root}/`",
      
        313
                        f"- `{chapters}/`",
      
        314
                        f"- `{index_path}`",
      
        315
                        f"- `{chapter_one}`",
      
        316
                        f"- `{chapter_two}`",
      
        317
                        f"- `{chapter_three}`",
      
        318
                        f"- `{chapter_four}`",
      
        319
                        "",
      
        320
                    ]
      
        321
                )
      
        322
            )
      
        323
        
        324
            dod = create_definition_of_done("Repair the nginx guide index.")
      
        325
            dod.implementation_plan = str(implementation_plan)
      
        326
            dod.evidence = [
      
        327
                VerificationEvidence(
      
        328
                    command="verify-links",
      
        329
                    passed=False,
      
        330
                    output=(
      
        331
                        "Missing local HTML links:\n"
      
        332
                        f"{index_path}:chapters/01-introduction.html -> {chapters / '01-introduction.html'}\n"
      
        333
                        f"{index_path}:chapters/04-server-blocks.html -> {chapters / '04-server-blocks.html'}\n"
      
        334
                    ),
      
        335
                )
      
        336
            ]
      
        337
        
        338
            guidance = _build_verification_repair_guidance(
      
        339
                dod,
      
        340
                project_root=temp_dir,
      
        341
            )
      
        342
        
        343
            assert "Use the existing artifact files as the source of truth" in guidance
      
        344
            assert str(chapter_one) in guidance
      
        345
            assert str(chapter_two) in guidance
      
        346
            assert str(chapter_four) in guidance
      
        347
        
        348
        
        349
        def test_verification_repair_guidance_does_not_create_out_of_scope_link_target(
      
        350
            temp_dir: Path,
      
        351
        ) -> None:
      
        352
            guide_root = temp_dir / "guides" / "nginx"
      
        353
            chapters = guide_root / "chapters"
      
        354
            chapters.mkdir(parents=True)
      
        355
            index_path = guide_root / "index.html"
      
        356
            chapter_one = chapters / "01-introduction.html"
      
        357
            index_path.write_text('<a href="../index.html">All guides</a>\n')
      
        358
            chapter_one.write_text('<a href="../index.html">Back</a>\n')
      
        359
            parent_index = temp_dir / "guides" / "index.html"
      
        360
        
        361
            implementation_plan = temp_dir / "implementation.md"
      
        362
            implementation_plan.write_text(
      
        363
                "\n".join(
      
        364
                    [
      
        365
                        "# Implementation Plan",
      
        366
                        "",
      
        367
                        "## File Changes",
      
        368
                        f"- `{guide_root}/`",
      
        369
                        f"- `{chapters}/`",
      
        370
                        f"- `{index_path}`",
      
        371
                        f"- `{chapter_one}`",
      
        372
                        "",
      
        373
                    ]
      
        374
                )
      
        375
            )
      
        376
        
        377
            dod = create_definition_of_done("Create the nginx guide under guides/nginx.")
      
        378
            dod.implementation_plan = str(implementation_plan)
      
        379
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        380
            dod.evidence = [
      
        381
                VerificationEvidence(
      
        382
                    command="verify-links",
      
        383
                    passed=False,
      
        384
                    output=(
      
        385
                        "Missing local HTML links:\n"
      
        386
                        f"{index_path}:../index.html -> {parent_index}\n"
      
        387
                    ),
      
        388
                )
      
        389
            ]
      
        390
        
        391
            guidance = _build_verification_repair_guidance(
      
        392
                dod,
      
        393
                project_root=temp_dir,
      
        394
            )
      
        395
            repair = extract_active_repair_context([Message(role=Role.USER, content=guidance)])
      
        396
        
        397
            assert "outside the requested artifact scope" in guidance
      
        398
            assert "do not create that outside file" in guidance
      
        399
            assert f"create `{parent_index}`" not in guidance
      
        400
            assert repair is not None
      
        401
            assert str(parent_index.resolve(strict=False)) not in repair.allowed_paths
      
        402
            assert str(index_path.resolve(strict=False)) in repair.allowed_paths
      
        403
        
        404
        
        405
        @pytest.mark.asyncio
      
        406
        async def test_turn_finalizer_records_skipped_verification_observation(
      
        407
            temp_dir: Path,
      
        408
        ) -> None:
      
        409
            session = FakeSession()
      
        410
            context = build_context(temp_dir, session)
      
        411
            finalizer = TurnFinalizer(
      
        412
                context,
      
        413
                RuntimeTracer(),
      
        414
                DefinitionOfDoneStore(temp_dir),
      
        415
                set_workflow_mode=_noop_set_workflow_mode,
      
        416
            )
      
        417
            dod = create_definition_of_done("Explain Loader's clarify loop.")
      
        418
            summary = TurnSummary(final_response="")
      
        419
            events = []
      
        420
        
        421
            async def capture(event) -> None:
      
        422
                events.append(event)
      
        423
        
        424
            result = await finalizer.run_definition_of_done_gate(
      
        425
                dod=dod,
      
        426
                candidate_response="Loader uses a bounded clarify loop before execution.",
      
        427
                emit=capture,
      
        428
                summary=summary,
      
        429
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        430
            )
      
        431
        
        432
            assert result.should_continue is False
      
        433
            assert result.reason_code == "non_mutating_response_accepted"
      
        434
            assert [item.status for item in result.verification_observations] == [
      
        435
                VerificationObservationStatus.SKIPPED.value
      
        436
            ]
      
        437
            assert [item.summary for item in result.verification_observations] == [
      
        438
                "verification was skipped because no mutating work required checks"
      
        439
            ]
      
        440
            assert summary.verification_status == "skipped"
      
        441
            assert "Complete the requested work" not in dod.pending_items
      
        442
            assert "Complete the requested work" in dod.completed_items
      
        443
            assert session.workflow_timeline[-1].kind == "verify_skip"
      
        444
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        445
                VerificationObservationStatus.SKIPPED.value
      
        446
            ]
      
        447
            assert any(event.type == "dod_status" and event.dod_status == "done" for event in events)
      
        448
        
        449
        
        450
        @pytest.mark.asyncio
      
        451
        async def test_turn_finalizer_accepts_noop_completion_with_task_restatement_todo(
      
        452
            temp_dir: Path,
      
        453
        ) -> None:
      
        454
            session = FakeSession()
      
        455
            context = build_context(temp_dir, session)
      
        456
            finalizer = TurnFinalizer(
      
        457
                context,
      
        458
                RuntimeTracer(),
      
        459
                DefinitionOfDoneStore(temp_dir),
      
        460
                set_workflow_mode=_noop_set_workflow_mode,
      
        461
            )
      
        462
            task = (
      
        463
                "Have a look at ~/Loader/guides/fortran/index.html, then "
      
        464
                "~/Loader/guides/fortran/chapters. The table of contents links in "
      
        465
                "index.html are inaccurate and the href’s are wrong. Let’s update the "
      
        466
                "links and their link texts to be correct."
      
        467
            )
      
        468
            dod = create_definition_of_done(task)
      
        469
            dod.pending_items = [task, "Complete the requested work"]
      
        470
            summary = TurnSummary(final_response="")
      
        471
        
        472
            async def capture(event) -> None:
      
        473
                return None
      
        474
        
        475
            result = await finalizer.run_definition_of_done_gate(
      
        476
                dod=dod,
      
        477
                candidate_response="The table of contents is already correct, so no edit is needed.",
      
        478
                emit=capture,
      
        479
                summary=summary,
      
        480
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        481
            )
      
        482
        
        483
            assert result.should_continue is False
      
        484
            assert result.reason_code == "non_mutating_response_accepted"
      
        485
        
        486
        
        487
        @pytest.mark.asyncio
      
        488
        async def test_turn_finalizer_records_passed_verification_observation(
      
        489
            temp_dir: Path,
      
        490
        ) -> None:
      
        491
            session = FakeSession()
      
        492
            context = build_context(temp_dir, session)
      
        493
            finalizer = TurnFinalizer(
      
        494
                context,
      
        495
                RuntimeTracer(),
      
        496
                DefinitionOfDoneStore(temp_dir),
      
        497
                set_workflow_mode=_noop_set_workflow_mode,
      
        498
            )
      
        499
            dod = create_definition_of_done("Update the runtime tests.")
      
        500
            dod.mutating_actions.append("write")
      
        501
            dod.verification_commands = ["uv run pytest -q"]
      
        502
            summary = TurnSummary(final_response="")
      
        503
            tool_call = ToolCall(
      
        504
                id="verify-1-1",
      
        505
                name="bash",
      
        506
                arguments={"command": "uv run pytest -q", "cwd": str(temp_dir)},
      
        507
            )
      
        508
        
        509
            async def capture(event) -> None:
      
        510
                return None
      
        511
        
        512
            result = await finalizer.run_definition_of_done_gate(
      
        513
                dod=dod,
      
        514
                candidate_response="Updated the runtime tests.",
      
        515
                emit=capture,
      
        516
                summary=summary,
      
        517
                executor=FakeExecutor(
      
        518
                    [
      
        519
                        tool_outcome(
      
        520
                            tool_call=tool_call,
      
        521
                            output="219 passed",
      
        522
                            is_error=False,
      
        523
                            exit_code=0,
      
        524
                            stdout="219 passed",
      
        525
                        )
      
        526
                    ]
      
        527
                ),  # type: ignore[arg-type]
      
        528
            )
      
        529
        
        530
            assert result.should_continue is False
      
        531
            assert result.reason_code == "verification_passed"
      
        532
            assert [item.status for item in result.verification_observations] == [
      
        533
                VerificationObservationStatus.PASSED.value
      
        534
            ]
      
        535
            assert result.verification_observations[0].attempt_id == "verification-attempt-1"
      
        536
            assert result.verification_observations[0].attempt_number == 1
      
        537
            assert result.verification_observations[0].command == "uv run pytest -q"
      
        538
            assert result.verification_observations[0].detail == "219 passed"
      
        539
            assert summary.verification_status == "passed"
      
        540
            assert [entry.reason_code for entry in session.workflow_timeline[-2:]] == [
      
        541
                "verification_pending",
      
        542
                "verification_command_passed",
      
        543
            ]
      
        544
            assert [item.status for item in session.workflow_timeline[-2].verification_observations] == [
      
        545
                VerificationObservationStatus.PENDING.value
      
        546
            ]
      
        547
            assert (
      
        548
                session.workflow_timeline[-2].verification_observations[0].attempt_id
      
        549
                == "verification-attempt-1"
      
        550
            )
      
        551
            assert session.workflow_timeline[-2].verification_observations[0].command == (
      
        552
                "uv run pytest -q"
      
        553
            )
      
        554
            assert session.workflow_timeline[-1].kind == "verify_observation"
      
        555
            assert session.workflow_timeline[-1].reason_code == "verification_command_passed"
      
        556
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        557
                VerificationObservationStatus.PASSED.value
      
        558
            ]
      
        559
        
        560
        
        561
        @pytest.mark.asyncio
      
        562
        async def test_turn_finalizer_appends_runtime_semantic_verifier_to_planned_commands(
      
        563
            temp_dir: Path,
      
        564
        ) -> None:
      
        565
            chapters = temp_dir / "chapters"
      
        566
            chapters.mkdir()
      
        567
            (chapters / "01-introduction.html").write_text(
      
        568
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        569
            )
      
        570
            index = temp_dir / "index.html"
      
        571
            index.write_text(
      
        572
                "\n".join(
      
        573
                    [
      
        574
                        '<ul class="chapter-list">',
      
        575
                        '  <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>',
      
        576
                        "</ul>",
      
        577
                    ]
      
        578
                )
      
        579
            )
      
        580
        
        581
            session = FakeSession()
      
        582
            context = build_context(temp_dir, session)
      
        583
            finalizer = TurnFinalizer(
      
        584
                context,
      
        585
                RuntimeTracer(),
      
        586
                DefinitionOfDoneStore(temp_dir),
      
        587
                set_workflow_mode=_noop_set_workflow_mode,
      
        588
            )
      
        589
            dod = create_definition_of_done(
      
        590
                "Update index.html so the table of contents links and chapter titles are correct."
      
        591
            )
      
        592
            dod.mutating_actions.append("edit")
      
        593
            dod.touched_files.append(str(index))
      
        594
            dod.verification_commands = ['grep -n "href=" index.html']
      
        595
            summary = TurnSummary(final_response="")
      
        596
            executor = RecordingExecutor()
      
        597
        
        598
            async def capture(event) -> None:
      
        599
                return None
      
        600
        
        601
            result = await finalizer.run_definition_of_done_gate(
      
        602
                dod=dod,
      
        603
                candidate_response="Updated the index.html links.",
      
        604
                emit=capture,
      
        605
                summary=summary,
      
        606
                executor=executor,  # type: ignore[arg-type]
      
        607
            )
      
        608
        
        609
            assert result.should_continue is False
      
        610
            assert any(command == 'grep -n "href=" index.html' for command in executor.commands)
      
        611
            assert any(command.startswith("python3 - <<'PY'") for command in executor.commands)
      
        612
            assert (
      
        613
                session.workflow_timeline[-1].verification_observations[0].attempt_id
      
        614
                == "verification-attempt-1"
      
        615
            )
      
        616
        
        617
        
        618
        @pytest.mark.asyncio
      
        619
        async def test_turn_finalizer_does_not_append_repo_defaults_to_external_verification_plan(
      
        620
            temp_dir: Path,
      
        621
        ) -> None:
      
        622
            (temp_dir / "pyproject.toml").write_text("[project]\nname='loader'\n")
      
        623
            (temp_dir / "package.json").write_text("{}\n")
      
        624
            external_root = temp_dir.parent / "external-nginx-guide"
      
        625
            external_root.mkdir(exist_ok=True)
      
        626
            external_index = external_root / "index.html"
      
        627
            external_index.write_text("<html></html>\n")
      
        628
        
        629
            session = FakeSession()
      
        630
            context = build_context(temp_dir, session)
      
        631
            finalizer = TurnFinalizer(
      
        632
                context,
      
        633
                RuntimeTracer(),
      
        634
                DefinitionOfDoneStore(temp_dir),
      
        635
                set_workflow_mode=_noop_set_workflow_mode,
      
        636
            )
      
        637
            dod = create_definition_of_done("Create an external nginx guide.")
      
        638
            dod.mutating_actions.append("write")
      
        639
            dod.touched_files.append(str(external_index))
      
        640
            dod.verification_commands = [
      
        641
                f"ls -la {external_root}",
      
        642
                f"grep -n \"html\" {external_index}",
      
        643
            ]
      
        644
            summary = TurnSummary(final_response="")
      
        645
            executor = RecordingExecutor()
      
        646
        
        647
            async def capture(event) -> None:
      
        648
                return None
      
        649
        
        650
            result = await finalizer.run_definition_of_done_gate(
      
        651
                dod=dod,
      
        652
                candidate_response="Created the external nginx guide.",
      
        653
                emit=capture,
      
        654
                summary=summary,
      
        655
                executor=executor,  # type: ignore[arg-type]
      
        656
            )
      
        657
        
        658
            assert result.should_continue is False
      
        659
            assert executor.commands == [
      
        660
                f"ls -la {external_root}",
      
        661
                f'grep -n "html" {external_index}',
      
        662
            ]
      
        663
        
        664
        
        665
        @pytest.mark.asyncio
      
        666
        async def test_turn_finalizer_filters_reference_side_verification_commands(
      
        667
            temp_dir: Path,
      
        668
        ) -> None:
      
        669
            guide_root = temp_dir / "Loader" / "guides" / "nginx"
      
        670
            chapters = guide_root / "chapters"
      
        671
            chapters.mkdir(parents=True)
      
        672
            index_path = guide_root / "index.html"
      
        673
            chapter_one = chapters / "01-introduction.html"
      
        674
            index_path.write_text("<html><body><h1>Guide</h1></body></html>\n")
      
        675
            chapter_one.write_text("<html><body><h1>Intro</h1></body></html>\n")
      
        676
        
        677
            reference_root = temp_dir / "Loader" / "guides" / "fortran"
      
        678
            reference_root.mkdir(parents=True)
      
        679
        
        680
            implementation_plan = temp_dir / "implementation.md"
      
        681
            implementation_plan.write_text(
      
        682
                "\n".join(
      
        683
                    [
      
        684
                        "# Implementation Plan",
      
        685
                        "",
      
        686
                        "## File Changes",
      
        687
                        f"- `{guide_root}`",
      
        688
                        f"- `{chapters}`",
      
        689
                        f"- `{index_path}`",
      
        690
                        f"- `{chapter_one}`",
      
        691
                        "",
      
        692
                    ]
      
        693
                )
      
        694
            )
      
        695
            verification_plan = temp_dir / "verification.md"
      
        696
            verification_plan.write_text(
      
        697
                "\n".join(
      
        698
                    [
      
        699
                        "# Verification Plan",
      
        700
                        "",
      
        701
                        "## Verification Commands",
      
        702
                        "```bash",
      
        703
                        f"ls -la {guide_root}",
      
        704
                        f"ls -la {reference_root}",
      
        705
                        "```",
      
        706
                        "",
      
        707
                    ]
      
        708
                )
      
        709
            )
      
        710
        
        711
            session = FakeSession()
      
        712
            context = build_context(temp_dir, session)
      
        713
            finalizer = TurnFinalizer(
      
        714
                context,
      
        715
                RuntimeTracer(),
      
        716
                DefinitionOfDoneStore(temp_dir),
      
        717
                set_workflow_mode=_noop_set_workflow_mode,
      
        718
            )
      
        719
            dod = create_definition_of_done("Create an nginx guide from an external reference.")
      
        720
            dod.mutating_actions.append("write")
      
        721
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        722
            dod.implementation_plan = str(implementation_plan)
      
        723
            dod.verification_plan = str(verification_plan)
      
        724
            summary = TurnSummary(final_response="")
      
        725
            executor = RecordingExecutor()
      
        726
        
        727
            async def capture(event) -> None:
      
        728
                return None
      
        729
        
        730
            result = await finalizer.run_definition_of_done_gate(
      
        731
                dod=dod,
      
        732
                candidate_response="Created the nginx guide.",
      
        733
                emit=capture,
      
        734
                summary=summary,
      
        735
                executor=executor,  # type: ignore[arg-type]
      
        736
            )
      
        737
        
        738
            assert result.should_continue is False
      
        739
            assert any(str(guide_root) in command for command in executor.commands)
      
        740
            assert all(str(reference_root) not in command for command in executor.commands)
      
        741
        
        742
        
        743
        @pytest.mark.asyncio
      
        744
        async def test_turn_finalizer_blocks_completion_when_planned_artifacts_are_missing(
      
        745
            temp_dir: Path,
      
        746
        ) -> None:
      
        747
            docs = temp_dir / "docs"
      
        748
            chapters = docs / "chapters"
      
        749
            chapters.mkdir(parents=True)
      
        750
            index = docs / "index.html"
      
        751
            first = chapters / "01-intro.html"
      
        752
            second = chapters / "02-installation.html"
      
        753
            index.write_text(
      
        754
                "\n".join(
      
        755
                    [
      
        756
                        '<a href="chapters/01-intro.html">Intro</a>',
      
        757
                        '<a href="chapters/02-installation.html">Installation</a>',
      
        758
                    ]
      
        759
                )
      
        760
            )
      
        761
            first.write_text("<h1>Intro</h1>\n")
      
        762
            implementation_plan = temp_dir / "implementation.md"
      
        763
            implementation_plan.write_text(
      
        764
                "\n".join(
      
        765
                    [
      
        766
                        "# Implementation Plan",
      
        767
                        "",
      
        768
                        "## File Changes",
      
        769
                        f"- `{index}`",
      
        770
                        f"- `{first}`",
      
        771
                        f"- `{second}`",
      
        772
                    ]
      
        773
                )
      
        774
            )
      
        775
        
        776
            session = FakeSession()
      
        777
            context = build_context(temp_dir, session)
      
        778
            finalizer = TurnFinalizer(
      
        779
                context,
      
        780
                RuntimeTracer(),
      
        781
                DefinitionOfDoneStore(temp_dir),
      
        782
                set_workflow_mode=_noop_set_workflow_mode,
      
        783
            )
      
        784
            dod = create_definition_of_done("Create a small multi-page HTML guide.")
      
        785
            dod.mutating_actions.append("write")
      
        786
            dod.touched_files.extend([str(index), str(first)])
      
        787
            dod.implementation_plan = str(implementation_plan)
      
        788
            dod.verification_commands = [f"ls -la {docs}"]
      
        789
            summary = TurnSummary(final_response="")
      
        790
            executor = RecordingExecutor()
      
        791
        
        792
            async def capture(event) -> None:
      
        793
                return None
      
        794
        
        795
            result = await finalizer.run_definition_of_done_gate(
      
        796
                dod=dod,
      
        797
                candidate_response="Finished the guide.",
      
        798
                emit=capture,
      
        799
                summary=summary,
      
        800
                executor=executor,  # type: ignore[arg-type]
      
        801
            )
      
        802
        
        803
            assert result.should_continue is True
      
        804
            assert result.reason_code == "planned_artifacts_missing_continue"
      
        805
            assert executor.commands == []
      
        806
            assert dod.status == "draft"
      
        807
            assert "Complete the requested work" in dod.pending_items
      
        808
            assert "Complete the requested work" not in dod.completed_items
      
        809
            assert session.messages[-1].content.startswith("[PLANNED ARTIFACTS STILL MISSING]")
      
        810
            assert "`02-installation.html`" in session.messages[-1].content
      
        811
        
        812
        
        813
        @pytest.mark.asyncio
      
        814
        async def test_turn_finalizer_records_missing_verification_observation(
      
        815
            temp_dir: Path,
      
        816
        ) -> None:
      
        817
            session = FakeSession()
      
        818
            context = build_context(temp_dir, session)
      
        819
            finalizer = TurnFinalizer(
      
        820
                context,
      
        821
                RuntimeTracer(),
      
        822
                DefinitionOfDoneStore(temp_dir),
      
        823
                set_workflow_mode=_noop_set_workflow_mode,
      
        824
            )
      
        825
            dod = create_definition_of_done("Edit the loader bootstrap.")
      
        826
            dod.mutating_actions.append("edit")
      
        827
            summary = TurnSummary(final_response="")
      
        828
        
        829
            async def capture(event) -> None:
      
        830
                return None
      
        831
        
        832
            result = await finalizer.run_definition_of_done_gate(
      
        833
                dod=dod,
      
        834
                candidate_response="Updated the bootstrap code.",
      
        835
                emit=capture,
      
        836
                summary=summary,
      
        837
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        838
            )
      
        839
        
        840
            assert result.should_continue is True
      
        841
            assert result.reason_code == "verification_failed_reentry"
      
        842
            assert [item.status for item in result.verification_observations] == [
      
        843
                VerificationObservationStatus.MISSING.value
      
        844
            ]
      
        845
            assert result.verification_observations[0].attempt_id == "verification-attempt-1"
      
        846
            assert result.verification_observations[0].attempt_number == 1
      
        847
            assert [item.summary for item in result.verification_observations] == [
      
        848
                "verification commands were still missing at execution time"
      
        849
            ]
      
        850
            assert summary.verification_status == "failed"
      
        851
            assert session.workflow_timeline[-1].kind == "verify_observation"
      
        852
            assert session.workflow_timeline[-1].reason_code == "verification_commands_missing"
      
        853
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        854
                VerificationObservationStatus.MISSING.value
      
        855
            ]
      
        856
            assert (
      
        857
                session.workflow_timeline[-1].verification_observations[0].attempt_id
      
        858
                == "verification-attempt-1"
      
        859
            )
      
        860
            assert session.messages[-1].role == Role.USER
      
        861
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
      
        862
        
        863
        
        864
        @pytest.mark.asyncio
      
        865
        async def test_turn_finalizer_ignores_unplanned_expansion_pending_items_once_plan_exists(
      
        866
            temp_dir: Path,
      
        867
        ) -> None:
      
        868
            session = FakeSession()
      
        869
            context = build_context(temp_dir, session)
      
        870
            finalizer = TurnFinalizer(
      
        871
                context,
      
        872
                RuntimeTracer(),
      
        873
                DefinitionOfDoneStore(temp_dir),
      
        874
                set_workflow_mode=_noop_set_workflow_mode,
      
        875
            )
      
        876
        
        877
            docs = temp_dir / "guides" / "nginx"
      
        878
            chapters = docs / "chapters"
      
        879
            docs.mkdir(parents=True)
      
        880
            chapters.mkdir()
      
        881
            index = docs / "index.html"
      
        882
            first = chapters / "01-getting-started.html"
      
        883
            second = chapters / "02-installation.html"
      
        884
            index.write_text("<html></html>\n")
      
        885
            first.write_text("<h1>One</h1>\n")
      
        886
            second.write_text("<h1>Two</h1>\n")
      
        887
        
        888
            implementation_plan = temp_dir / "implementation.md"
      
        889
            implementation_plan.write_text(
      
        890
                "\n".join(
      
        891
                    [
      
        892
                        "# Implementation Plan",
      
        893
                        "",
      
        894
                        "## File Changes",
      
        895
                        f"- `{docs}/`",
      
        896
                        f"- `{chapters}/`",
      
        897
                        f"- `{index}`",
      
        898
                        f"- `{first}`",
      
        899
                        f"- `{second}`",
      
        900
                        "",
      
        901
                    ]
      
        902
                )
      
        903
            )
      
        904
        
        905
            dod = create_definition_of_done("Create a small multi-page HTML guide.")
      
        906
            dod.implementation_plan = str(implementation_plan)
      
        907
            dod.pending_items = [
      
        908
                "Create 07-performance-tuning.html",
      
        909
                "Complete the requested work",
      
        910
            ]
      
        911
            summary = TurnSummary(final_response="")
      
        912
        
        913
            async def capture(event) -> None:
      
        914
                return None
      
        915
        
        916
            result = await finalizer.run_definition_of_done_gate(
      
        917
                dod=dod,
      
        918
                candidate_response="Finished the guide.",
      
        919
                emit=capture,
      
        920
                summary=summary,
      
        921
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        922
            )
      
        923
        
        924
            assert result.should_continue is False
      
        925
            assert result.reason_code == "non_mutating_response_accepted"
      
        926
        
        927
        
        928
        @pytest.mark.asyncio
      
        929
        async def test_turn_finalizer_verification_failure_reentry_points_at_concrete_repair(
      
        930
            temp_dir: Path,
      
        931
            monkeypatch: pytest.MonkeyPatch,
      
        932
        ) -> None:
      
        933
            session = FakeSession()
      
        934
            context = build_context(temp_dir, session)
      
        935
            queued_messages: list[str] = []
      
        936
            context.queue_steering_message_callback = queued_messages.append
      
        937
            finalizer = TurnFinalizer(
      
        938
                context,
      
        939
                RuntimeTracer(),
      
        940
                DefinitionOfDoneStore(temp_dir),
      
        941
                set_workflow_mode=_noop_set_workflow_mode,
      
        942
            )
      
        943
            broken_file = temp_dir / "guides" / "nginx" / "chapters" / "05-advanced-configurations.html"
      
        944
            broken_file.parent.mkdir(parents=True, exist_ok=True)
      
        945
            broken_file.write_text('<link rel="stylesheet" href="../styles.css">\n')
      
        946
            missing_target = temp_dir / "guides" / "nginx" / "styles.css"
      
        947
            dod = create_definition_of_done("Create the nginx guide.")
      
        948
            dod.mutating_actions.append("write")
      
        949
            dod.touched_files.append(str(broken_file))
      
        950
            dod.verification_commands = ["python3 verify_links.py"]
      
        951
            summary = TurnSummary(final_response="")
      
        952
            verify_call = ToolCall(
      
        953
                id="verify-1-1",
      
        954
                name="bash",
      
        955
                arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
      
        956
            )
      
        957
            failure_output = (
      
        958
                "Missing local HTML links:\n"
      
        959
                f"{broken_file}:../styles.css -> {missing_target}\n"
      
        960
            )
      
        961
        
        962
            async def capture(event) -> None:
      
        963
                return None
      
        964
        
        965
            monkeypatch.setattr(
      
        966
                "loader.runtime.finalization.derive_verification_commands",
      
        967
                lambda *args, **kwargs: [],
      
        968
            )
      
        969
        
        970
            result = await finalizer.run_definition_of_done_gate(
      
        971
                dod=dod,
      
        972
                candidate_response="The guide is complete.",
      
        973
                emit=capture,
      
        974
                summary=summary,
      
        975
                executor=FakeExecutor(
      
        976
                    [
      
        977
                        tool_outcome(
      
        978
                            tool_call=verify_call,
      
        979
                            output=failure_output,
      
        980
                            is_error=True,
      
        981
                            exit_code=1,
      
        982
                            stdout=failure_output,
      
        983
                        )
      
        984
                    ]
      
        985
                ),  # type: ignore[arg-type]
      
        986
            )
      
        987
        
        988
            assert result.should_continue is True
      
        989
            assert result.reason_code == "verification_failed_reentry"
      
        990
            assert queued_messages
      
        991
            assert str(broken_file) in queued_messages[-1]
      
        992
            assert "../styles.css" in queued_messages[-1]
      
        993
            assert str(missing_target) in queued_messages[-1]
      
        994
            assert "Do not restart discovery or reread unrelated references." in queued_messages[-1]
      
        995
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
      
        996
            assert f"Immediate next step: edit `{broken_file}`." in session.messages[-1].content
      
        997
            assert f"create `{missing_target}`" in session.messages[-1].content
      
        998
            assert (
      
        999
                "Do not reread unrelated reference materials or restart discovery"
      
        1000
                in session.messages[-1].content
      
        1001
            )
      
        1002
        
        1003
        
        1004
        @pytest.mark.asyncio
      
        1005
        async def test_turn_finalizer_verification_failure_reentry_prioritizes_missing_planned_outputs(
      
        1006
            temp_dir: Path,
      
        1007
            monkeypatch: pytest.MonkeyPatch,
      
        1008
        ) -> None:
      
        1009
            session = FakeSession()
      
        1010
            context = build_context(temp_dir, session)
      
        1011
            queued_messages: list[str] = []
      
        1012
            context.queue_steering_message_callback = queued_messages.append
      
        1013
            finalizer = TurnFinalizer(
      
        1014
                context,
      
        1015
                RuntimeTracer(),
      
        1016
                DefinitionOfDoneStore(temp_dir),
      
        1017
                set_workflow_mode=_noop_set_workflow_mode,
      
        1018
            )
      
        1019
            guide_root = temp_dir / "guides" / "nginx"
      
        1020
            chapters = guide_root / "chapters"
      
        1021
            chapters.mkdir(parents=True, exist_ok=True)
      
        1022
            index = guide_root / "index.html"
      
        1023
            first = chapters / "01-installation.html"
      
        1024
            second = chapters / "02-configuration.html"
      
        1025
            third = chapters / "03-basic-usage.html"
      
        1026
            index.write_text(
      
        1027
                "\n".join(
      
        1028
                    [
      
        1029
                        '<a href="chapters/01-installation.html">Installation</a>',
      
        1030
                        '<a href="chapters/02-configuration.html">Configuration</a>',
      
        1031
                        '<a href="chapters/03-basic-usage.html">Basic Usage</a>',
      
        1032
                    ]
      
        1033
                )
      
        1034
            )
      
        1035
            first.write_text("<h1>Installation</h1>\n")
      
        1036
            implementation_plan = temp_dir / "implementation.md"
      
        1037
            implementation_plan.write_text(
      
        1038
                "\n".join(
      
        1039
                    [
      
        1040
                        "# Implementation Plan",
      
        1041
                        "",
      
        1042
                        "## File Changes",
      
        1043
                        f"- `{guide_root}/`",
      
        1044
                        f"- `{chapters}/`",
      
        1045
                        f"- `{index}`",
      
        1046
                        f"- `{first}`",
      
        1047
                        "",
      
        1048
                    ]
      
        1049
                )
      
        1050
            )
      
        1051
            dod = create_definition_of_done("Create the nginx guide.")
      
        1052
            dod.mutating_actions.append("write")
      
        1053
            dod.touched_files.extend([str(index), str(first)])
      
        1054
            dod.implementation_plan = str(implementation_plan)
      
        1055
            dod.verification_commands = ["python3 verify_links.py"]
      
        1056
            summary = TurnSummary(final_response="")
      
        1057
            verify_call = ToolCall(
      
        1058
                id="verify-1-1",
      
        1059
                name="bash",
      
        1060
                arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
      
        1061
            )
      
        1062
            normalized_second = str(second.resolve(strict=False))
      
        1063
            normalized_third = str(third.resolve(strict=False))
      
        1064
            failure_output = (
      
        1065
                "Missing local HTML links:\n"
      
        1066
                f"{index}:chapters/02-configuration.html -> {second}\n"
      
        1067
                f"{index}:chapters/03-basic-usage.html -> {third}\n"
      
        1068
            )
      
        1069
        
        1070
            async def capture(event) -> None:
      
        1071
                return None
      
        1072
        
        1073
            monkeypatch.setattr(
      
        1074
                "loader.runtime.finalization.derive_verification_commands",
      
        1075
                lambda *args, **kwargs: [],
      
        1076
            )
      
        1077
        
        1078
            result = await finalizer.run_definition_of_done_gate(
      
        1079
                dod=dod,
      
        1080
                candidate_response="The guide is complete.",
      
        1081
                emit=capture,
      
        1082
                summary=summary,
      
        1083
                executor=FakeExecutor(
      
        1084
                    [
      
        1085
                        tool_outcome(
      
        1086
                            tool_call=verify_call,
      
        1087
                            output=failure_output,
      
        1088
                            is_error=True,
      
        1089
                            exit_code=1,
      
        1090
                            stdout=failure_output,
      
        1091
                        )
      
        1092
                    ]
      
        1093
                ),  # type: ignore[arg-type]
      
        1094
            )
      
        1095
        
        1096
            assert result.should_continue is True
      
        1097
            assert result.reason_code == "verification_failed_reentry"
      
        1098
            assert queued_messages
      
        1099
            assert normalized_second in queued_messages[-1]
      
        1100
            assert "Do not rewrite the existing aggregate files" in queued_messages[-1]
      
        1101
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
      
        1102
            assert f"Immediate next step: write `{normalized_second}`." in session.messages[-1].content
      
        1103
            assert (
      
        1104
                f"creating missing planned artifact `{normalized_second}`"
      
        1105
                in session.messages[-1].content
      
        1106
            )
      
        1107
            assert (
      
        1108
                f"creating missing planned artifact `{normalized_third}`"
      
        1109
                in session.messages[-1].content
      
        1110
            )
      
        1111
            assert f"Immediate next step: edit `{index}`." not in session.messages[-1].content
      
        1112
            assert "Do not rewrite existing aggregate files" in session.messages[-1].content
      
        1113
        
        1114
        
        1115
        @pytest.mark.asyncio
      
        1116
        async def test_turn_finalizer_does_not_reverify_without_new_changes(
      
        1117
            temp_dir: Path,
      
        1118
        ) -> None:
      
        1119
            session = FakeSession()
      
        1120
            context = build_context(temp_dir, session)
      
        1121
            finalizer = TurnFinalizer(
      
        1122
                context,
      
        1123
                RuntimeTracer(),
      
        1124
                DefinitionOfDoneStore(temp_dir),
      
        1125
                set_workflow_mode=_noop_set_workflow_mode,
      
        1126
            )
      
        1127
            index = temp_dir / "index.html"
      
        1128
            index.write_text("<ul></ul>\n")
      
        1129
            dod = create_definition_of_done("Fix the chapter list in index.html.")
      
        1130
            dod.mutating_actions.append("edit")
      
        1131
            dod.touched_files.append(str(index))
      
        1132
            dod.line_changes = 12
      
        1133
            dod.last_verification_result = "failed"
      
        1134
            dod.last_verification_signature = (
      
        1135
                f"lines={dod.line_changes};touched={index};actions=1;commands="
      
        1136
            )
      
        1137
            dod.evidence = []
      
        1138
            summary = TurnSummary(final_response="")
      
        1139
            executor = RecordingExecutor()
      
        1140
        
        1141
            async def capture(event) -> None:
      
        1142
                return None
      
        1143
        
        1144
            result = await finalizer.run_definition_of_done_gate(
      
        1145
                dod=dod,
      
        1146
                candidate_response="I checked the file again.",
      
        1147
                emit=capture,
      
        1148
                summary=summary,
      
        1149
                executor=executor,  # type: ignore[arg-type]
      
        1150
            )
      
        1151
        
        1152
            assert result.should_continue is True
      
        1153
            assert result.reason_code == "verification_failed_no_new_changes"
      
        1154
            assert executor.commands == []
      
        1155
            assert summary.verification_status == "failed"
      
        1156
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK STILL FAILING]")
      
        1157
        
        1158
        
        1159
        @pytest.mark.asyncio
      
        1160
        async def test_turn_finalizer_accepts_missing_optional_html5validator_when_semantic_check_passes(
      
        1161
            temp_dir: Path,
      
        1162
            monkeypatch: pytest.MonkeyPatch,
      
        1163
        ) -> None:
      
        1164
            session = FakeSession()
      
        1165
            context = build_context(temp_dir, session)
      
        1166
            finalizer = TurnFinalizer(
      
        1167
                context,
      
        1168
                RuntimeTracer(),
      
        1169
                DefinitionOfDoneStore(temp_dir),
      
        1170
                set_workflow_mode=_noop_set_workflow_mode,
      
        1171
            )
      
        1172
            dod = create_definition_of_done(
      
        1173
                "Update index.html so the table of contents links and chapter titles are correct."
      
        1174
            )
      
        1175
            dod.mutating_actions.append("edit")
      
        1176
            dod.touched_files.append(str(temp_dir / "index.html"))
      
        1177
            dod.verification_commands = [
      
        1178
                "python3 - <<'PY'\nprint('semantic ok')\nPY",
      
        1179
                "html5validator --root /tmp/fortran-qwen-recovery-check/",
      
        1180
            ]
      
        1181
            summary = TurnSummary(final_response="")
      
        1182
            semantic_call = ToolCall(
      
        1183
                id="verify-1-1",
      
        1184
                name="bash",
      
        1185
                arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
      
        1186
            )
      
        1187
            html5validator_call = ToolCall(
      
        1188
                id="verify-1-2",
      
        1189
                name="bash",
      
        1190
                arguments={"command": dod.verification_commands[1], "cwd": str(temp_dir)},
      
        1191
            )
      
        1192
        
        1193
            async def capture(event) -> None:
      
        1194
                return None
      
        1195
        
        1196
            monkeypatch.setattr(
      
        1197
                "loader.runtime.finalization.derive_verification_commands",
      
        1198
                lambda *args, **kwargs: [],
      
        1199
            )
      
        1200
        
        1201
            result = await finalizer.run_definition_of_done_gate(
      
        1202
                dod=dod,
      
        1203
                candidate_response="Updated the chapter links and titles.",
      
        1204
                emit=capture,
      
        1205
                summary=summary,
      
        1206
                executor=FakeExecutor(
      
        1207
                    [
      
        1208
                        tool_outcome(
      
        1209
                            tool_call=semantic_call,
      
        1210
                            output="semantic ok",
      
        1211
                            is_error=False,
      
        1212
                            exit_code=0,
      
        1213
                            stdout="semantic ok",
      
        1214
                        ),
      
        1215
                        tool_outcome(
      
        1216
                            tool_call=html5validator_call,
      
        1217
                            output="/bin/sh: html5validator: command not found",
      
        1218
                            is_error=True,
      
        1219
                            exit_code=127,
      
        1220
                            stderr="/bin/sh: html5validator: command not found",
      
        1221
                        ),
      
        1222
                    ]
      
        1223
                ),  # type: ignore[arg-type]
      
        1224
            )
      
        1225
        
        1226
            assert result.should_continue is False
      
        1227
            assert result.reason_code == "verification_passed"
      
        1228
            assert summary.verification_status == "passed"
      
        1229
            assert dod.status == "done"
      
        1230
            assert dod.last_verification_result == "passed"
      
        1231
            assert [item.passed for item in dod.evidence] == [True, False]
      
        1232
            assert [item.skipped for item in dod.evidence] == [False, True]
      
        1233
            assert "SKIP" in result.final_response
      
        1234
            assert "html5validator" in result.final_response
      
        1235
            assert session.workflow_timeline[-2].reason_code == "verification_command_passed"
      
        1236
            assert session.workflow_timeline[-1].reason_code == "verification_command_skipped"
      
        1237
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        1238
                VerificationObservationStatus.SKIPPED.value
      
        1239
            ]