loader Public

Watch 0 Fork 0 Star 0
Python · 40532 bytes Raw Blame History
  
        1
        """Tests for finalization helpers on RuntimeContext."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        from pathlib import Path
      
        6
        from types import SimpleNamespace
      
        7
        
        8
        import pytest
      
        9
        
        10
        from loader.llm.base import Message, Role, ToolCall
      
        11
        from loader.runtime.completion_trace import CompletionTraceEntry
      
        12
        from loader.runtime.context import RuntimeContext
      
        13
        from loader.runtime.dod import (
      
        14
            DefinitionOfDoneStore,
      
        15
            VerificationEvidence,
      
        16
            create_definition_of_done,
      
        17
        )
      
        18
        from loader.runtime.events import TurnSummary
      
        19
        from loader.runtime.executor import ToolExecutionOutcome, ToolExecutionState
      
        20
        from loader.runtime.finalization import (
      
        21
            TurnFinalizer,
      
        22
            _build_verification_repair_guidance,
      
        23
        )
      
        24
        from loader.runtime.permissions import (
      
        25
            PermissionMode,
      
        26
            build_permission_policy,
      
        27
            load_permission_rules,
      
        28
        )
      
        29
        from loader.runtime.tracing import RuntimeTracer
      
        30
        from loader.runtime.verification_observations import VerificationObservationStatus
      
        31
        from loader.tools.base import ToolResult as RegistryToolResult
      
        32
        from loader.tools.base import create_default_registry
      
        33
        from tests.helpers.runtime_harness import ScriptedBackend
      
        34
        
        35
        
        36
        class FakeSession:
      
        37
            def __init__(self) -> None:
      
        38
                self.messages: list[Message] = []
      
        39
                self.session_id = "session-test-123"
      
        40
                self.recorded_calls: list[dict[str, object]] = []
      
        41
                self.last_completion_decision_code = "verification_passed"
      
        42
                self.last_completion_decision_summary = (
      
        43
                    "accepted the response after verification evidence passed"
      
        44
                )
      
        45
                self.completion_trace = [
      
        46
                    CompletionTraceEntry(
      
        47
                        stage="definition_of_done",
      
        48
                        outcome="complete",
      
        49
                        decision_code="verification_passed",
      
        50
                        decision_summary="accepted the response after verification evidence passed",
      
        51
                    )
      
        52
                ]
      
        53
                self.last_turn_transition_summary = (
      
        54
                    "completion -> finalize [terminal] Finalizing completed turn"
      
        55
                )
      
        56
                self.workflow_timeline = []
      
        57
        
        58
            def append(self, message: Message) -> None:
      
        59
                self.messages.append(message)
      
        60
        
        61
            def append_workflow_timeline_entry(self, entry) -> None:
      
        62
                self.workflow_timeline.append(entry)
      
        63
        
        64
            def record_turn_usage(
      
        65
                self,
      
        66
                usage: dict[str, int],
      
        67
                *,
      
        68
                tool_calls: int,
      
        69
                iterations: int,
      
        70
            ) -> dict[str, int]:
      
        71
                payload = {
      
        72
                    "usage": dict(usage),
      
        73
                    "tool_calls": tool_calls,
      
        74
                    "iterations": iterations,
      
        75
                }
      
        76
                self.recorded_calls.append(payload)
      
        77
                return {"turns": 1, "tool_calls": tool_calls, "iterations": iterations}
      
        78
        
        79
        
        80
        class FakeCodeFilter:
      
        81
            def reset(self) -> None:
      
        82
                return None
      
        83
        
        84
        
        85
        class FakeSafeguards:
      
        86
            def __init__(self) -> None:
      
        87
                self.action_tracker = object()
      
        88
                self.validator = object()
      
        89
                self.code_filter = FakeCodeFilter()
      
        90
        
        91
            def filter_stream_chunk(self, content: str) -> str:
      
        92
                return content
      
        93
        
        94
            def filter_complete_content(self, content: str) -> str:
      
        95
                return content
      
        96
        
        97
            def should_steer(self) -> bool:
      
        98
                return False
      
        99
        
        100
            def get_steering_message(self) -> str | None:
      
        101
                return None
      
        102
        
        103
            def record_response(self, content: str) -> None:
      
        104
                return None
      
        105
        
        106
            def detect_text_loop(self, content: str) -> tuple[bool, str]:
      
        107
                return False, ""
      
        108
        
        109
            def detect_loop(self) -> tuple[bool, str]:
      
        110
                return False, ""
      
        111
        
        112
        
        113
        class FakeExecutor:
      
        114
            def __init__(self, outcomes: list[ToolExecutionOutcome]) -> None:
      
        115
                self._outcomes = list(outcomes)
      
        116
        
        117
            async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
      
        118
                if not self._outcomes:
      
        119
                    raise AssertionError("No fake verification outcome queued")
      
        120
                return self._outcomes.pop(0)
      
        121
        
        122
        
        123
        class RecordingExecutor:
      
        124
            def __init__(self) -> None:
      
        125
                self.commands: list[str] = []
      
        126
        
        127
            async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
      
        128
                command = str(tool_call.arguments.get("command", ""))
      
        129
                self.commands.append(command)
      
        130
                return tool_outcome(
      
        131
                    tool_call=tool_call,
      
        132
                    output="ok",
      
        133
                    is_error=False,
      
        134
                    exit_code=0,
      
        135
                    stdout="ok",
      
        136
                )
      
        137
        
        138
        
        139
        class SelectiveRecordingExecutor:
      
        140
            def __init__(self, failing_match: str) -> None:
      
        141
                self.commands: list[str] = []
      
        142
                self.failing_match = failing_match
      
        143
        
        144
            async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
      
        145
                command = str(tool_call.arguments.get("command", ""))
      
        146
                self.commands.append(command)
      
        147
                failed = self.failing_match in command
      
        148
                return tool_outcome(
      
        149
                    tool_call=tool_call,
      
        150
                    output="failed" if failed else "ok",
      
        151
                    is_error=failed,
      
        152
                    exit_code=1 if failed else 0,
      
        153
                    stdout="" if failed else "ok",
      
        154
                    stderr="failed" if failed else "",
      
        155
                )
      
        156
        
        157
        
        158
        def build_context(temp_dir: Path, session: FakeSession) -> RuntimeContext:
      
        159
            registry = create_default_registry(temp_dir)
      
        160
            registry.configure_workspace_root(temp_dir)
      
        161
            rule_status = load_permission_rules(temp_dir)
      
        162
            policy = build_permission_policy(
      
        163
                active_mode=PermissionMode.WORKSPACE_WRITE,
      
        164
                workspace_root=temp_dir,
      
        165
                tool_requirements=registry.get_tool_requirements(),
      
        166
                rules=rule_status.rules,
      
        167
            )
      
        168
            return RuntimeContext(
      
        169
                project_root=temp_dir,
      
        170
                backend=ScriptedBackend(),
      
        171
                registry=registry,
      
        172
                session=session,  # type: ignore[arg-type]
      
        173
                config=SimpleNamespace(
      
        174
                    force_react=False,
      
        175
                    verification_retry_budget=3,
      
        176
                    reasoning=SimpleNamespace(
      
        177
                        rollback=False,
      
        178
                        show_rollback_plan=False,
      
        179
                        completion_check=True,
      
        180
                        use_quick_completion=True,
      
        181
                        max_continuation_prompts=5,
      
        182
                        self_critique=False,
      
        183
                        confidence_scoring=False,
      
        184
                        min_confidence_for_action=3,
      
        185
                        verification=False,
      
        186
                    ),
      
        187
                ),
      
        188
                capability_profile=SimpleNamespace(supports_native_tools=True),  # type: ignore[arg-type]
      
        189
                project_context=None,
      
        190
                permission_policy=policy,
      
        191
                permission_config_status=rule_status,
      
        192
                workflow_mode="execute",
      
        193
                safeguards=FakeSafeguards(),
      
        194
            )
      
        195
        
        196
        
        197
        def tool_outcome(
      
        198
            *,
      
        199
            tool_call: ToolCall,
      
        200
            output: str,
      
        201
            is_error: bool,
      
        202
            exit_code: int,
      
        203
            stdout: str = "",
      
        204
            stderr: str = "",
      
        205
        ) -> ToolExecutionOutcome:
      
        206
            return ToolExecutionOutcome(
      
        207
                tool_call=tool_call,
      
        208
                state=ToolExecutionState.EXECUTED,
      
        209
                message=Message.tool_result_message(
      
        210
                    tool_call_id=tool_call.id,
      
        211
                    display_content=output,
      
        212
                    result_content=output,
      
        213
                    is_error=is_error,
      
        214
                ),
      
        215
                event_content=output,
      
        216
                is_error=is_error,
      
        217
                result_output=output,
      
        218
                registry_result=RegistryToolResult(
      
        219
                    output=output,
      
        220
                    is_error=is_error,
      
        221
                    metadata={
      
        222
                        "exit_code": exit_code,
      
        223
                        "stdout": stdout,
      
        224
                        "stderr": stderr,
      
        225
                    },
      
        226
                ),
      
        227
            )
      
        228
        
        229
        
        230
        async def _noop_set_workflow_mode(mode, dod, emit, summary) -> None:
      
        231
            return None
      
        232
        
        233
        
        234
        def test_turn_finalizer_finalize_summary_uses_runtime_context(
      
        235
            temp_dir: Path,
      
        236
            monkeypatch: pytest.MonkeyPatch,
      
        237
        ) -> None:
      
        238
            session = FakeSession()
      
        239
            context = build_context(temp_dir, session)
      
        240
            tracer = RuntimeTracer()
      
        241
            tracer.record("turn.completed", reason="done")
      
        242
            finalizer = TurnFinalizer(
      
        243
                context,
      
        244
                tracer,
      
        245
                DefinitionOfDoneStore(temp_dir),
      
        246
                set_workflow_mode=_noop_set_workflow_mode,
      
        247
            )
      
        248
            dod = create_definition_of_done("Finish the task")
      
        249
            dod.status = "done"
      
        250
            summary = TurnSummary(
      
        251
                final_response="All set.",
      
        252
                definition_of_done=dod,
      
        253
                iterations=2,
      
        254
                usage={"prompt_tokens": 10},
      
        255
                tool_result_messages=[Message(role=Role.TOOL, content="tool output")],
      
        256
            )
      
        257
            captured: dict[str, str] = {}
      
        258
        
        259
            def capture_definition_of_done(self, summary_text: str) -> None:
      
        260
                captured["summary"] = summary_text
      
        261
        
        262
            monkeypatch.setattr(
      
        263
                "loader.runtime.finalization.MemoryStore.capture_definition_of_done",
      
        264
                capture_definition_of_done,
      
        265
            )
      
        266
        
        267
            final_summary = finalizer.finalize_summary(summary)
      
        268
        
        269
            assert final_summary.session_id == "session-test-123"
      
        270
            assert final_summary.cumulative_usage == {"turns": 1, "tool_calls": 1, "iterations": 2}
      
        271
            assert session.recorded_calls == [
      
        272
                {
      
        273
                    "usage": {"prompt_tokens": 10, "tool_calls": 1, "iterations": 2},
      
        274
                    "tool_calls": 1,
      
        275
                    "iterations": 2,
      
        276
                }
      
        277
            ]
      
        278
            assert "summary" in captured
      
        279
            assert final_summary.trace
      
        280
            assert final_summary.completion_decision_code == "verification_passed"
      
        281
            assert final_summary.completion_decision_summary == (
      
        282
                "accepted the response after verification evidence passed"
      
        283
            )
      
        284
            assert [entry.decision_code for entry in final_summary.completion_trace] == [
      
        285
                "verification_passed"
      
        286
            ]
      
        287
        
        288
        
        289
        def test_verification_repair_guidance_uses_existing_artifacts_as_source_of_truth(
      
        290
            temp_dir: Path,
      
        291
        ) -> None:
      
        292
            guide_root = temp_dir / "guides" / "nginx"
      
        293
            chapters = guide_root / "chapters"
      
        294
            chapters.mkdir(parents=True)
      
        295
            index_path = guide_root / "index.html"
      
        296
            chapter_one = chapters / "01-getting-started.html"
      
        297
            chapter_two = chapters / "02-installation.html"
      
        298
            chapter_three = chapters / "03-first-website.html"
      
        299
            chapter_four = chapters / "04-configuration-basics.html"
      
        300
        
        301
            for path in (index_path, chapter_one, chapter_two, chapter_three, chapter_four):
      
        302
                path.write_text("<html></html>\n")
      
        303
        
        304
            implementation_plan = temp_dir / "implementation.md"
      
        305
            implementation_plan.write_text(
      
        306
                "\n".join(
      
        307
                    [
      
        308
                        "# Implementation Plan",
      
        309
                        "",
      
        310
                        "## File Changes",
      
        311
                        f"- `{guide_root}/`",
      
        312
                        f"- `{chapters}/`",
      
        313
                        f"- `{index_path}`",
      
        314
                        f"- `{chapter_one}`",
      
        315
                        f"- `{chapter_two}`",
      
        316
                        f"- `{chapter_three}`",
      
        317
                        f"- `{chapter_four}`",
      
        318
                        "",
      
        319
                    ]
      
        320
                )
      
        321
            )
      
        322
        
        323
            dod = create_definition_of_done("Repair the nginx guide index.")
      
        324
            dod.implementation_plan = str(implementation_plan)
      
        325
            dod.evidence = [
      
        326
                VerificationEvidence(
      
        327
                    command="verify-links",
      
        328
                    passed=False,
      
        329
                    output=(
      
        330
                        "Missing local HTML links:\n"
      
        331
                        f"{index_path}:chapters/01-introduction.html -> {chapters / '01-introduction.html'}\n"
      
        332
                        f"{index_path}:chapters/04-server-blocks.html -> {chapters / '04-server-blocks.html'}\n"
      
        333
                    ),
      
        334
                )
      
        335
            ]
      
        336
        
        337
            guidance = _build_verification_repair_guidance(
      
        338
                dod,
      
        339
                project_root=temp_dir,
      
        340
            )
      
        341
        
        342
            assert "Use the existing artifact files as the source of truth" in guidance
      
        343
            assert str(chapter_one) in guidance
      
        344
            assert str(chapter_two) in guidance
      
        345
            assert str(chapter_four) in guidance
      
        346
        
        347
        
        348
        @pytest.mark.asyncio
      
        349
        async def test_turn_finalizer_records_skipped_verification_observation(
      
        350
            temp_dir: Path,
      
        351
        ) -> None:
      
        352
            session = FakeSession()
      
        353
            context = build_context(temp_dir, session)
      
        354
            finalizer = TurnFinalizer(
      
        355
                context,
      
        356
                RuntimeTracer(),
      
        357
                DefinitionOfDoneStore(temp_dir),
      
        358
                set_workflow_mode=_noop_set_workflow_mode,
      
        359
            )
      
        360
            dod = create_definition_of_done("Explain Loader's clarify loop.")
      
        361
            summary = TurnSummary(final_response="")
      
        362
            events = []
      
        363
        
        364
            async def capture(event) -> None:
      
        365
                events.append(event)
      
        366
        
        367
            result = await finalizer.run_definition_of_done_gate(
      
        368
                dod=dod,
      
        369
                candidate_response="Loader uses a bounded clarify loop before execution.",
      
        370
                emit=capture,
      
        371
                summary=summary,
      
        372
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        373
            )
      
        374
        
        375
            assert result.should_continue is False
      
        376
            assert result.reason_code == "non_mutating_response_accepted"
      
        377
            assert [item.status for item in result.verification_observations] == [
      
        378
                VerificationObservationStatus.SKIPPED.value
      
        379
            ]
      
        380
            assert [item.summary for item in result.verification_observations] == [
      
        381
                "verification was skipped because no mutating work required checks"
      
        382
            ]
      
        383
            assert summary.verification_status == "skipped"
      
        384
            assert "Complete the requested work" not in dod.pending_items
      
        385
            assert "Complete the requested work" in dod.completed_items
      
        386
            assert session.workflow_timeline[-1].kind == "verify_skip"
      
        387
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        388
                VerificationObservationStatus.SKIPPED.value
      
        389
            ]
      
        390
            assert any(event.type == "dod_status" and event.dod_status == "done" for event in events)
      
        391
        
        392
        
        393
        @pytest.mark.asyncio
      
        394
        async def test_turn_finalizer_accepts_noop_completion_with_task_restatement_todo(
      
        395
            temp_dir: Path,
      
        396
        ) -> None:
      
        397
            session = FakeSession()
      
        398
            context = build_context(temp_dir, session)
      
        399
            finalizer = TurnFinalizer(
      
        400
                context,
      
        401
                RuntimeTracer(),
      
        402
                DefinitionOfDoneStore(temp_dir),
      
        403
                set_workflow_mode=_noop_set_workflow_mode,
      
        404
            )
      
        405
            task = (
      
        406
                "Have a look at ~/Loader/guides/fortran/index.html, then "
      
        407
                "~/Loader/guides/fortran/chapters. The table of contents links in "
      
        408
                "index.html are inaccurate and the href’s are wrong. Let’s update the "
      
        409
                "links and their link texts to be correct."
      
        410
            )
      
        411
            dod = create_definition_of_done(task)
      
        412
            dod.pending_items = [task, "Complete the requested work"]
      
        413
            summary = TurnSummary(final_response="")
      
        414
        
        415
            async def capture(event) -> None:
      
        416
                return None
      
        417
        
        418
            result = await finalizer.run_definition_of_done_gate(
      
        419
                dod=dod,
      
        420
                candidate_response="The table of contents is already correct, so no edit is needed.",
      
        421
                emit=capture,
      
        422
                summary=summary,
      
        423
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        424
            )
      
        425
        
        426
            assert result.should_continue is False
      
        427
            assert result.reason_code == "non_mutating_response_accepted"
      
        428
        
        429
        
        430
        @pytest.mark.asyncio
      
        431
        async def test_turn_finalizer_records_passed_verification_observation(
      
        432
            temp_dir: Path,
      
        433
        ) -> None:
      
        434
            session = FakeSession()
      
        435
            context = build_context(temp_dir, session)
      
        436
            finalizer = TurnFinalizer(
      
        437
                context,
      
        438
                RuntimeTracer(),
      
        439
                DefinitionOfDoneStore(temp_dir),
      
        440
                set_workflow_mode=_noop_set_workflow_mode,
      
        441
            )
      
        442
            dod = create_definition_of_done("Update the runtime tests.")
      
        443
            dod.mutating_actions.append("write")
      
        444
            dod.verification_commands = ["uv run pytest -q"]
      
        445
            summary = TurnSummary(final_response="")
      
        446
            tool_call = ToolCall(
      
        447
                id="verify-1-1",
      
        448
                name="bash",
      
        449
                arguments={"command": "uv run pytest -q", "cwd": str(temp_dir)},
      
        450
            )
      
        451
        
        452
            async def capture(event) -> None:
      
        453
                return None
      
        454
        
        455
            result = await finalizer.run_definition_of_done_gate(
      
        456
                dod=dod,
      
        457
                candidate_response="Updated the runtime tests.",
      
        458
                emit=capture,
      
        459
                summary=summary,
      
        460
                executor=FakeExecutor(
      
        461
                    [
      
        462
                        tool_outcome(
      
        463
                            tool_call=tool_call,
      
        464
                            output="219 passed",
      
        465
                            is_error=False,
      
        466
                            exit_code=0,
      
        467
                            stdout="219 passed",
      
        468
                        )
      
        469
                    ]
      
        470
                ),  # type: ignore[arg-type]
      
        471
            )
      
        472
        
        473
            assert result.should_continue is False
      
        474
            assert result.reason_code == "verification_passed"
      
        475
            assert [item.status for item in result.verification_observations] == [
      
        476
                VerificationObservationStatus.PASSED.value
      
        477
            ]
      
        478
            assert result.verification_observations[0].attempt_id == "verification-attempt-1"
      
        479
            assert result.verification_observations[0].attempt_number == 1
      
        480
            assert result.verification_observations[0].command == "uv run pytest -q"
      
        481
            assert result.verification_observations[0].detail == "219 passed"
      
        482
            assert summary.verification_status == "passed"
      
        483
            assert [entry.reason_code for entry in session.workflow_timeline[-2:]] == [
      
        484
                "verification_pending",
      
        485
                "verification_command_passed",
      
        486
            ]
      
        487
            assert [item.status for item in session.workflow_timeline[-2].verification_observations] == [
      
        488
                VerificationObservationStatus.PENDING.value
      
        489
            ]
      
        490
            assert (
      
        491
                session.workflow_timeline[-2].verification_observations[0].attempt_id
      
        492
                == "verification-attempt-1"
      
        493
            )
      
        494
            assert session.workflow_timeline[-2].verification_observations[0].command == (
      
        495
                "uv run pytest -q"
      
        496
            )
      
        497
            assert session.workflow_timeline[-1].kind == "verify_observation"
      
        498
            assert session.workflow_timeline[-1].reason_code == "verification_command_passed"
      
        499
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        500
                VerificationObservationStatus.PASSED.value
      
        501
            ]
      
        502
        
        503
        
        504
        @pytest.mark.asyncio
      
        505
        async def test_turn_finalizer_appends_runtime_semantic_verifier_to_planned_commands(
      
        506
            temp_dir: Path,
      
        507
        ) -> None:
      
        508
            chapters = temp_dir / "chapters"
      
        509
            chapters.mkdir()
      
        510
            (chapters / "01-introduction.html").write_text(
      
        511
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        512
            )
      
        513
            index = temp_dir / "index.html"
      
        514
            index.write_text(
      
        515
                "\n".join(
      
        516
                    [
      
        517
                        '<ul class="chapter-list">',
      
        518
                        '  <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>',
      
        519
                        "</ul>",
      
        520
                    ]
      
        521
                )
      
        522
            )
      
        523
        
        524
            session = FakeSession()
      
        525
            context = build_context(temp_dir, session)
      
        526
            finalizer = TurnFinalizer(
      
        527
                context,
      
        528
                RuntimeTracer(),
      
        529
                DefinitionOfDoneStore(temp_dir),
      
        530
                set_workflow_mode=_noop_set_workflow_mode,
      
        531
            )
      
        532
            dod = create_definition_of_done(
      
        533
                "Update index.html so the table of contents links and chapter titles are correct."
      
        534
            )
      
        535
            dod.mutating_actions.append("edit")
      
        536
            dod.touched_files.append(str(index))
      
        537
            dod.verification_commands = ['grep -n "href=" index.html']
      
        538
            summary = TurnSummary(final_response="")
      
        539
            executor = RecordingExecutor()
      
        540
        
        541
            async def capture(event) -> None:
      
        542
                return None
      
        543
        
        544
            result = await finalizer.run_definition_of_done_gate(
      
        545
                dod=dod,
      
        546
                candidate_response="Updated the index.html links.",
      
        547
                emit=capture,
      
        548
                summary=summary,
      
        549
                executor=executor,  # type: ignore[arg-type]
      
        550
            )
      
        551
        
        552
            assert result.should_continue is False
      
        553
            assert any(command == 'grep -n "href=" index.html' for command in executor.commands)
      
        554
            assert any(command.startswith("python3 - <<'PY'") for command in executor.commands)
      
        555
            assert (
      
        556
                session.workflow_timeline[-1].verification_observations[0].attempt_id
      
        557
                == "verification-attempt-1"
      
        558
            )
      
        559
        
        560
        
        561
        @pytest.mark.asyncio
      
        562
        async def test_turn_finalizer_does_not_append_repo_defaults_to_external_verification_plan(
      
        563
            temp_dir: Path,
      
        564
        ) -> None:
      
        565
            (temp_dir / "pyproject.toml").write_text("[project]\nname='loader'\n")
      
        566
            (temp_dir / "package.json").write_text("{}\n")
      
        567
            external_root = temp_dir.parent / "external-nginx-guide"
      
        568
            external_root.mkdir(exist_ok=True)
      
        569
            external_index = external_root / "index.html"
      
        570
            external_index.write_text("<html></html>\n")
      
        571
        
        572
            session = FakeSession()
      
        573
            context = build_context(temp_dir, session)
      
        574
            finalizer = TurnFinalizer(
      
        575
                context,
      
        576
                RuntimeTracer(),
      
        577
                DefinitionOfDoneStore(temp_dir),
      
        578
                set_workflow_mode=_noop_set_workflow_mode,
      
        579
            )
      
        580
            dod = create_definition_of_done("Create an external nginx guide.")
      
        581
            dod.mutating_actions.append("write")
      
        582
            dod.touched_files.append(str(external_index))
      
        583
            dod.verification_commands = [
      
        584
                f"ls -la {external_root}",
      
        585
                f"grep -n \"html\" {external_index}",
      
        586
            ]
      
        587
            summary = TurnSummary(final_response="")
      
        588
            executor = RecordingExecutor()
      
        589
        
        590
            async def capture(event) -> None:
      
        591
                return None
      
        592
        
        593
            result = await finalizer.run_definition_of_done_gate(
      
        594
                dod=dod,
      
        595
                candidate_response="Created the external nginx guide.",
      
        596
                emit=capture,
      
        597
                summary=summary,
      
        598
                executor=executor,  # type: ignore[arg-type]
      
        599
            )
      
        600
        
        601
            assert result.should_continue is False
      
        602
            assert executor.commands == [
      
        603
                f"ls -la {external_root}",
      
        604
                f'grep -n "html" {external_index}',
      
        605
            ]
      
        606
        
        607
        
        608
        @pytest.mark.asyncio
      
        609
        async def test_turn_finalizer_filters_reference_side_verification_commands(
      
        610
            temp_dir: Path,
      
        611
        ) -> None:
      
        612
            guide_root = temp_dir / "Loader" / "guides" / "nginx"
      
        613
            chapters = guide_root / "chapters"
      
        614
            chapters.mkdir(parents=True)
      
        615
            index_path = guide_root / "index.html"
      
        616
            chapter_one = chapters / "01-introduction.html"
      
        617
            index_path.write_text("<html><body><h1>Guide</h1></body></html>\n")
      
        618
            chapter_one.write_text("<html><body><h1>Intro</h1></body></html>\n")
      
        619
        
        620
            reference_root = temp_dir / "Loader" / "guides" / "fortran"
      
        621
            reference_root.mkdir(parents=True)
      
        622
        
        623
            implementation_plan = temp_dir / "implementation.md"
      
        624
            implementation_plan.write_text(
      
        625
                "\n".join(
      
        626
                    [
      
        627
                        "# Implementation Plan",
      
        628
                        "",
      
        629
                        "## File Changes",
      
        630
                        f"- `{guide_root}`",
      
        631
                        f"- `{chapters}`",
      
        632
                        f"- `{index_path}`",
      
        633
                        f"- `{chapter_one}`",
      
        634
                        "",
      
        635
                    ]
      
        636
                )
      
        637
            )
      
        638
            verification_plan = temp_dir / "verification.md"
      
        639
            verification_plan.write_text(
      
        640
                "\n".join(
      
        641
                    [
      
        642
                        "# Verification Plan",
      
        643
                        "",
      
        644
                        "## Verification Commands",
      
        645
                        "```bash",
      
        646
                        f"ls -la {guide_root}",
      
        647
                        f"ls -la {reference_root}",
      
        648
                        "```",
      
        649
                        "",
      
        650
                    ]
      
        651
                )
      
        652
            )
      
        653
        
        654
            session = FakeSession()
      
        655
            context = build_context(temp_dir, session)
      
        656
            finalizer = TurnFinalizer(
      
        657
                context,
      
        658
                RuntimeTracer(),
      
        659
                DefinitionOfDoneStore(temp_dir),
      
        660
                set_workflow_mode=_noop_set_workflow_mode,
      
        661
            )
      
        662
            dod = create_definition_of_done("Create an nginx guide from an external reference.")
      
        663
            dod.mutating_actions.append("write")
      
        664
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        665
            dod.implementation_plan = str(implementation_plan)
      
        666
            dod.verification_plan = str(verification_plan)
      
        667
            summary = TurnSummary(final_response="")
      
        668
            executor = RecordingExecutor()
      
        669
        
        670
            async def capture(event) -> None:
      
        671
                return None
      
        672
        
        673
            result = await finalizer.run_definition_of_done_gate(
      
        674
                dod=dod,
      
        675
                candidate_response="Created the nginx guide.",
      
        676
                emit=capture,
      
        677
                summary=summary,
      
        678
                executor=executor,  # type: ignore[arg-type]
      
        679
            )
      
        680
        
        681
            assert result.should_continue is False
      
        682
            assert any(str(guide_root) in command for command in executor.commands)
      
        683
            assert all(str(reference_root) not in command for command in executor.commands)
      
        684
        
        685
        
        686
        @pytest.mark.asyncio
      
        687
        async def test_turn_finalizer_blocks_completion_when_planned_artifacts_are_missing(
      
        688
            temp_dir: Path,
      
        689
        ) -> None:
      
        690
            docs = temp_dir / "docs"
      
        691
            chapters = docs / "chapters"
      
        692
            chapters.mkdir(parents=True)
      
        693
            index = docs / "index.html"
      
        694
            first = chapters / "01-intro.html"
      
        695
            second = chapters / "02-installation.html"
      
        696
            index.write_text(
      
        697
                "\n".join(
      
        698
                    [
      
        699
                        '<a href="chapters/01-intro.html">Intro</a>',
      
        700
                        '<a href="chapters/02-installation.html">Installation</a>',
      
        701
                    ]
      
        702
                )
      
        703
            )
      
        704
            first.write_text("<h1>Intro</h1>\n")
      
        705
            implementation_plan = temp_dir / "implementation.md"
      
        706
            implementation_plan.write_text(
      
        707
                "\n".join(
      
        708
                    [
      
        709
                        "# Implementation Plan",
      
        710
                        "",
      
        711
                        "## File Changes",
      
        712
                        f"- `{index}`",
      
        713
                        f"- `{first}`",
      
        714
                        f"- `{second}`",
      
        715
                    ]
      
        716
                )
      
        717
            )
      
        718
        
        719
            session = FakeSession()
      
        720
            context = build_context(temp_dir, session)
      
        721
            finalizer = TurnFinalizer(
      
        722
                context,
      
        723
                RuntimeTracer(),
      
        724
                DefinitionOfDoneStore(temp_dir),
      
        725
                set_workflow_mode=_noop_set_workflow_mode,
      
        726
            )
      
        727
            dod = create_definition_of_done("Create a small multi-page HTML guide.")
      
        728
            dod.mutating_actions.append("write")
      
        729
            dod.touched_files.extend([str(index), str(first)])
      
        730
            dod.implementation_plan = str(implementation_plan)
      
        731
            dod.verification_commands = [f"ls -la {docs}"]
      
        732
            summary = TurnSummary(final_response="")
      
        733
            executor = RecordingExecutor()
      
        734
        
        735
            async def capture(event) -> None:
      
        736
                return None
      
        737
        
        738
            result = await finalizer.run_definition_of_done_gate(
      
        739
                dod=dod,
      
        740
                candidate_response="Finished the guide.",
      
        741
                emit=capture,
      
        742
                summary=summary,
      
        743
                executor=executor,  # type: ignore[arg-type]
      
        744
            )
      
        745
        
        746
            assert result.should_continue is True
      
        747
            assert result.reason_code == "planned_artifacts_missing_continue"
      
        748
            assert executor.commands == []
      
        749
            assert dod.status == "draft"
      
        750
            assert "Complete the requested work" in dod.pending_items
      
        751
            assert "Complete the requested work" not in dod.completed_items
      
        752
            assert session.messages[-1].content.startswith("[PLANNED ARTIFACTS STILL MISSING]")
      
        753
            assert "`02-installation.html`" in session.messages[-1].content
      
        754
        
        755
        
        756
        @pytest.mark.asyncio
      
        757
        async def test_turn_finalizer_records_missing_verification_observation(
      
        758
            temp_dir: Path,
      
        759
        ) -> None:
      
        760
            session = FakeSession()
      
        761
            context = build_context(temp_dir, session)
      
        762
            finalizer = TurnFinalizer(
      
        763
                context,
      
        764
                RuntimeTracer(),
      
        765
                DefinitionOfDoneStore(temp_dir),
      
        766
                set_workflow_mode=_noop_set_workflow_mode,
      
        767
            )
      
        768
            dod = create_definition_of_done("Edit the loader bootstrap.")
      
        769
            dod.mutating_actions.append("edit")
      
        770
            summary = TurnSummary(final_response="")
      
        771
        
        772
            async def capture(event) -> None:
      
        773
                return None
      
        774
        
        775
            result = await finalizer.run_definition_of_done_gate(
      
        776
                dod=dod,
      
        777
                candidate_response="Updated the bootstrap code.",
      
        778
                emit=capture,
      
        779
                summary=summary,
      
        780
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        781
            )
      
        782
        
        783
            assert result.should_continue is True
      
        784
            assert result.reason_code == "verification_failed_reentry"
      
        785
            assert [item.status for item in result.verification_observations] == [
      
        786
                VerificationObservationStatus.MISSING.value
      
        787
            ]
      
        788
            assert result.verification_observations[0].attempt_id == "verification-attempt-1"
      
        789
            assert result.verification_observations[0].attempt_number == 1
      
        790
            assert [item.summary for item in result.verification_observations] == [
      
        791
                "verification commands were still missing at execution time"
      
        792
            ]
      
        793
            assert summary.verification_status == "failed"
      
        794
            assert session.workflow_timeline[-1].kind == "verify_observation"
      
        795
            assert session.workflow_timeline[-1].reason_code == "verification_commands_missing"
      
        796
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        797
                VerificationObservationStatus.MISSING.value
      
        798
            ]
      
        799
            assert (
      
        800
                session.workflow_timeline[-1].verification_observations[0].attempt_id
      
        801
                == "verification-attempt-1"
      
        802
            )
      
        803
            assert session.messages[-1].role == Role.USER
      
        804
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
      
        805
        
        806
        
        807
        @pytest.mark.asyncio
      
        808
        async def test_turn_finalizer_ignores_unplanned_expansion_pending_items_once_plan_exists(
      
        809
            temp_dir: Path,
      
        810
        ) -> None:
      
        811
            session = FakeSession()
      
        812
            context = build_context(temp_dir, session)
      
        813
            finalizer = TurnFinalizer(
      
        814
                context,
      
        815
                RuntimeTracer(),
      
        816
                DefinitionOfDoneStore(temp_dir),
      
        817
                set_workflow_mode=_noop_set_workflow_mode,
      
        818
            )
      
        819
        
        820
            docs = temp_dir / "guides" / "nginx"
      
        821
            chapters = docs / "chapters"
      
        822
            docs.mkdir(parents=True)
      
        823
            chapters.mkdir()
      
        824
            index = docs / "index.html"
      
        825
            first = chapters / "01-getting-started.html"
      
        826
            second = chapters / "02-installation.html"
      
        827
            index.write_text("<html></html>\n")
      
        828
            first.write_text("<h1>One</h1>\n")
      
        829
            second.write_text("<h1>Two</h1>\n")
      
        830
        
        831
            implementation_plan = temp_dir / "implementation.md"
      
        832
            implementation_plan.write_text(
      
        833
                "\n".join(
      
        834
                    [
      
        835
                        "# Implementation Plan",
      
        836
                        "",
      
        837
                        "## File Changes",
      
        838
                        f"- `{docs}/`",
      
        839
                        f"- `{chapters}/`",
      
        840
                        f"- `{index}`",
      
        841
                        f"- `{first}`",
      
        842
                        f"- `{second}`",
      
        843
                        "",
      
        844
                    ]
      
        845
                )
      
        846
            )
      
        847
        
        848
            dod = create_definition_of_done("Create a small multi-page HTML guide.")
      
        849
            dod.implementation_plan = str(implementation_plan)
      
        850
            dod.pending_items = [
      
        851
                "Create 07-performance-tuning.html",
      
        852
                "Complete the requested work",
      
        853
            ]
      
        854
            summary = TurnSummary(final_response="")
      
        855
        
        856
            async def capture(event) -> None:
      
        857
                return None
      
        858
        
        859
            result = await finalizer.run_definition_of_done_gate(
      
        860
                dod=dod,
      
        861
                candidate_response="Finished the guide.",
      
        862
                emit=capture,
      
        863
                summary=summary,
      
        864
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        865
            )
      
        866
        
        867
            assert result.should_continue is False
      
        868
            assert result.reason_code == "non_mutating_response_accepted"
      
        869
        
        870
        
        871
        @pytest.mark.asyncio
      
        872
        async def test_turn_finalizer_verification_failure_reentry_points_at_concrete_repair(
      
        873
            temp_dir: Path,
      
        874
            monkeypatch: pytest.MonkeyPatch,
      
        875
        ) -> None:
      
        876
            session = FakeSession()
      
        877
            context = build_context(temp_dir, session)
      
        878
            queued_messages: list[str] = []
      
        879
            context.queue_steering_message_callback = queued_messages.append
      
        880
            finalizer = TurnFinalizer(
      
        881
                context,
      
        882
                RuntimeTracer(),
      
        883
                DefinitionOfDoneStore(temp_dir),
      
        884
                set_workflow_mode=_noop_set_workflow_mode,
      
        885
            )
      
        886
            broken_file = temp_dir / "guides" / "nginx" / "chapters" / "05-advanced-configurations.html"
      
        887
            broken_file.parent.mkdir(parents=True, exist_ok=True)
      
        888
            broken_file.write_text('<link rel="stylesheet" href="../styles.css">\n')
      
        889
            missing_target = temp_dir / "guides" / "nginx" / "styles.css"
      
        890
            dod = create_definition_of_done("Create the nginx guide.")
      
        891
            dod.mutating_actions.append("write")
      
        892
            dod.touched_files.append(str(broken_file))
      
        893
            dod.verification_commands = ["python3 verify_links.py"]
      
        894
            summary = TurnSummary(final_response="")
      
        895
            verify_call = ToolCall(
      
        896
                id="verify-1-1",
      
        897
                name="bash",
      
        898
                arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
      
        899
            )
      
        900
            failure_output = (
      
        901
                "Missing local HTML links:\n"
      
        902
                f"{broken_file}:../styles.css -> {missing_target}\n"
      
        903
            )
      
        904
        
        905
            async def capture(event) -> None:
      
        906
                return None
      
        907
        
        908
            monkeypatch.setattr(
      
        909
                "loader.runtime.finalization.derive_verification_commands",
      
        910
                lambda *args, **kwargs: [],
      
        911
            )
      
        912
        
        913
            result = await finalizer.run_definition_of_done_gate(
      
        914
                dod=dod,
      
        915
                candidate_response="The guide is complete.",
      
        916
                emit=capture,
      
        917
                summary=summary,
      
        918
                executor=FakeExecutor(
      
        919
                    [
      
        920
                        tool_outcome(
      
        921
                            tool_call=verify_call,
      
        922
                            output=failure_output,
      
        923
                            is_error=True,
      
        924
                            exit_code=1,
      
        925
                            stdout=failure_output,
      
        926
                        )
      
        927
                    ]
      
        928
                ),  # type: ignore[arg-type]
      
        929
            )
      
        930
        
        931
            assert result.should_continue is True
      
        932
            assert result.reason_code == "verification_failed_reentry"
      
        933
            assert queued_messages
      
        934
            assert str(broken_file) in queued_messages[-1]
      
        935
            assert "../styles.css" in queued_messages[-1]
      
        936
            assert str(missing_target) in queued_messages[-1]
      
        937
            assert "Do not restart discovery or reread unrelated references." in queued_messages[-1]
      
        938
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
      
        939
            assert f"Immediate next step: edit `{broken_file}`." in session.messages[-1].content
      
        940
            assert f"create `{missing_target}`" in session.messages[-1].content
      
        941
            assert (
      
        942
                "Do not reread unrelated reference materials or restart discovery"
      
        943
                in session.messages[-1].content
      
        944
            )
      
        945
        
        946
        
        947
        @pytest.mark.asyncio
      
        948
        async def test_turn_finalizer_verification_failure_reentry_prioritizes_missing_planned_outputs(
      
        949
            temp_dir: Path,
      
        950
            monkeypatch: pytest.MonkeyPatch,
      
        951
        ) -> None:
      
        952
            session = FakeSession()
      
        953
            context = build_context(temp_dir, session)
      
        954
            queued_messages: list[str] = []
      
        955
            context.queue_steering_message_callback = queued_messages.append
      
        956
            finalizer = TurnFinalizer(
      
        957
                context,
      
        958
                RuntimeTracer(),
      
        959
                DefinitionOfDoneStore(temp_dir),
      
        960
                set_workflow_mode=_noop_set_workflow_mode,
      
        961
            )
      
        962
            guide_root = temp_dir / "guides" / "nginx"
      
        963
            chapters = guide_root / "chapters"
      
        964
            chapters.mkdir(parents=True, exist_ok=True)
      
        965
            index = guide_root / "index.html"
      
        966
            first = chapters / "01-installation.html"
      
        967
            second = chapters / "02-configuration.html"
      
        968
            third = chapters / "03-basic-usage.html"
      
        969
            index.write_text(
      
        970
                "\n".join(
      
        971
                    [
      
        972
                        '<a href="chapters/01-installation.html">Installation</a>',
      
        973
                        '<a href="chapters/02-configuration.html">Configuration</a>',
      
        974
                        '<a href="chapters/03-basic-usage.html">Basic Usage</a>',
      
        975
                    ]
      
        976
                )
      
        977
            )
      
        978
            first.write_text("<h1>Installation</h1>\n")
      
        979
            implementation_plan = temp_dir / "implementation.md"
      
        980
            implementation_plan.write_text(
      
        981
                "\n".join(
      
        982
                    [
      
        983
                        "# Implementation Plan",
      
        984
                        "",
      
        985
                        "## File Changes",
      
        986
                        f"- `{guide_root}/`",
      
        987
                        f"- `{chapters}/`",
      
        988
                        f"- `{index}`",
      
        989
                        f"- `{first}`",
      
        990
                        "",
      
        991
                    ]
      
        992
                )
      
        993
            )
      
        994
            dod = create_definition_of_done("Create the nginx guide.")
      
        995
            dod.mutating_actions.append("write")
      
        996
            dod.touched_files.extend([str(index), str(first)])
      
        997
            dod.implementation_plan = str(implementation_plan)
      
        998
            dod.verification_commands = ["python3 verify_links.py"]
      
        999
            summary = TurnSummary(final_response="")
      
        1000
            verify_call = ToolCall(
      
        1001
                id="verify-1-1",
      
        1002
                name="bash",
      
        1003
                arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
      
        1004
            )
      
        1005
            normalized_second = str(second.resolve(strict=False))
      
        1006
            normalized_third = str(third.resolve(strict=False))
      
        1007
            failure_output = (
      
        1008
                "Missing local HTML links:\n"
      
        1009
                f"{index}:chapters/02-configuration.html -> {second}\n"
      
        1010
                f"{index}:chapters/03-basic-usage.html -> {third}\n"
      
        1011
            )
      
        1012
        
        1013
            async def capture(event) -> None:
      
        1014
                return None
      
        1015
        
        1016
            monkeypatch.setattr(
      
        1017
                "loader.runtime.finalization.derive_verification_commands",
      
        1018
                lambda *args, **kwargs: [],
      
        1019
            )
      
        1020
        
        1021
            result = await finalizer.run_definition_of_done_gate(
      
        1022
                dod=dod,
      
        1023
                candidate_response="The guide is complete.",
      
        1024
                emit=capture,
      
        1025
                summary=summary,
      
        1026
                executor=FakeExecutor(
      
        1027
                    [
      
        1028
                        tool_outcome(
      
        1029
                            tool_call=verify_call,
      
        1030
                            output=failure_output,
      
        1031
                            is_error=True,
      
        1032
                            exit_code=1,
      
        1033
                            stdout=failure_output,
      
        1034
                        )
      
        1035
                    ]
      
        1036
                ),  # type: ignore[arg-type]
      
        1037
            )
      
        1038
        
        1039
            assert result.should_continue is True
      
        1040
            assert result.reason_code == "verification_failed_reentry"
      
        1041
            assert queued_messages
      
        1042
            assert normalized_second in queued_messages[-1]
      
        1043
            assert "Do not rewrite the existing aggregate files" in queued_messages[-1]
      
        1044
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
      
        1045
            assert f"Immediate next step: write `{normalized_second}`." in session.messages[-1].content
      
        1046
            assert (
      
        1047
                f"creating missing planned artifact `{normalized_second}`"
      
        1048
                in session.messages[-1].content
      
        1049
            )
      
        1050
            assert (
      
        1051
                f"creating missing planned artifact `{normalized_third}`"
      
        1052
                in session.messages[-1].content
      
        1053
            )
      
        1054
            assert f"Immediate next step: edit `{index}`." not in session.messages[-1].content
      
        1055
            assert "Do not rewrite existing aggregate files" in session.messages[-1].content
      
        1056
        
        1057
        
        1058
        @pytest.mark.asyncio
      
        1059
        async def test_turn_finalizer_does_not_reverify_without_new_changes(
      
        1060
            temp_dir: Path,
      
        1061
        ) -> None:
      
        1062
            session = FakeSession()
      
        1063
            context = build_context(temp_dir, session)
      
        1064
            finalizer = TurnFinalizer(
      
        1065
                context,
      
        1066
                RuntimeTracer(),
      
        1067
                DefinitionOfDoneStore(temp_dir),
      
        1068
                set_workflow_mode=_noop_set_workflow_mode,
      
        1069
            )
      
        1070
            index = temp_dir / "index.html"
      
        1071
            index.write_text("<ul></ul>\n")
      
        1072
            dod = create_definition_of_done("Fix the chapter list in index.html.")
      
        1073
            dod.mutating_actions.append("edit")
      
        1074
            dod.touched_files.append(str(index))
      
        1075
            dod.line_changes = 12
      
        1076
            dod.last_verification_result = "failed"
      
        1077
            dod.last_verification_signature = (
      
        1078
                f"lines={dod.line_changes};touched={index};actions=1;commands="
      
        1079
            )
      
        1080
            dod.evidence = []
      
        1081
            summary = TurnSummary(final_response="")
      
        1082
            executor = RecordingExecutor()
      
        1083
        
        1084
            async def capture(event) -> None:
      
        1085
                return None
      
        1086
        
        1087
            result = await finalizer.run_definition_of_done_gate(
      
        1088
                dod=dod,
      
        1089
                candidate_response="I checked the file again.",
      
        1090
                emit=capture,
      
        1091
                summary=summary,
      
        1092
                executor=executor,  # type: ignore[arg-type]
      
        1093
            )
      
        1094
        
        1095
            assert result.should_continue is True
      
        1096
            assert result.reason_code == "verification_failed_no_new_changes"
      
        1097
            assert executor.commands == []
      
        1098
            assert summary.verification_status == "failed"
      
        1099
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK STILL FAILING]")
      
        1100
        
        1101
        
        1102
        @pytest.mark.asyncio
      
        1103
        async def test_turn_finalizer_accepts_missing_optional_html5validator_when_semantic_check_passes(
      
        1104
            temp_dir: Path,
      
        1105
            monkeypatch: pytest.MonkeyPatch,
      
        1106
        ) -> None:
      
        1107
            session = FakeSession()
      
        1108
            context = build_context(temp_dir, session)
      
        1109
            finalizer = TurnFinalizer(
      
        1110
                context,
      
        1111
                RuntimeTracer(),
      
        1112
                DefinitionOfDoneStore(temp_dir),
      
        1113
                set_workflow_mode=_noop_set_workflow_mode,
      
        1114
            )
      
        1115
            dod = create_definition_of_done(
      
        1116
                "Update index.html so the table of contents links and chapter titles are correct."
      
        1117
            )
      
        1118
            dod.mutating_actions.append("edit")
      
        1119
            dod.touched_files.append(str(temp_dir / "index.html"))
      
        1120
            dod.verification_commands = [
      
        1121
                "python3 - <<'PY'\nprint('semantic ok')\nPY",
      
        1122
                "html5validator --root /tmp/fortran-qwen-recovery-check/",
      
        1123
            ]
      
        1124
            summary = TurnSummary(final_response="")
      
        1125
            semantic_call = ToolCall(
      
        1126
                id="verify-1-1",
      
        1127
                name="bash",
      
        1128
                arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
      
        1129
            )
      
        1130
            html5validator_call = ToolCall(
      
        1131
                id="verify-1-2",
      
        1132
                name="bash",
      
        1133
                arguments={"command": dod.verification_commands[1], "cwd": str(temp_dir)},
      
        1134
            )
      
        1135
        
        1136
            async def capture(event) -> None:
      
        1137
                return None
      
        1138
        
        1139
            monkeypatch.setattr(
      
        1140
                "loader.runtime.finalization.derive_verification_commands",
      
        1141
                lambda *args, **kwargs: [],
      
        1142
            )
      
        1143
        
        1144
            result = await finalizer.run_definition_of_done_gate(
      
        1145
                dod=dod,
      
        1146
                candidate_response="Updated the chapter links and titles.",
      
        1147
                emit=capture,
      
        1148
                summary=summary,
      
        1149
                executor=FakeExecutor(
      
        1150
                    [
      
        1151
                        tool_outcome(
      
        1152
                            tool_call=semantic_call,
      
        1153
                            output="semantic ok",
      
        1154
                            is_error=False,
      
        1155
                            exit_code=0,
      
        1156
                            stdout="semantic ok",
      
        1157
                        ),
      
        1158
                        tool_outcome(
      
        1159
                            tool_call=html5validator_call,
      
        1160
                            output="/bin/sh: html5validator: command not found",
      
        1161
                            is_error=True,
      
        1162
                            exit_code=127,
      
        1163
                            stderr="/bin/sh: html5validator: command not found",
      
        1164
                        ),
      
        1165
                    ]
      
        1166
                ),  # type: ignore[arg-type]
      
        1167
            )
      
        1168
        
        1169
            assert result.should_continue is False
      
        1170
            assert result.reason_code == "verification_passed"
      
        1171
            assert summary.verification_status == "passed"
      
        1172
            assert dod.status == "done"
      
        1173
            assert dod.last_verification_result == "passed"
      
        1174
            assert [item.passed for item in dod.evidence] == [True, False]
      
        1175
            assert [item.skipped for item in dod.evidence] == [False, True]
      
        1176
            assert "SKIP" in result.final_response
      
        1177
            assert "html5validator" in result.final_response
      
        1178
            assert session.workflow_timeline[-2].reason_code == "verification_command_passed"
      
        1179
            assert session.workflow_timeline[-1].reason_code == "verification_command_skipped"
      
        1180
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        1181
                VerificationObservationStatus.SKIPPED.value
      
        1182
            ]