loader Public

Watch 0 Fork 0 Star 0
Python · 51306 bytes Raw Blame History
  
        1
        """Tests for finalization helpers on RuntimeContext."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        from pathlib import Path
      
        6
        from types import SimpleNamespace
      
        7
        
        8
        import pytest
      
        9
        
        10
        from loader.llm.base import Message, Role, ToolCall
      
        11
        from loader.runtime.completion_trace import CompletionTraceEntry
      
        12
        from loader.runtime.context import RuntimeContext
      
        13
        from loader.runtime.dod import (
      
        14
            DefinitionOfDoneStore,
      
        15
            VerificationEvidence,
      
        16
            create_definition_of_done,
      
        17
        )
      
        18
        from loader.runtime.events import TurnSummary
      
        19
        from loader.runtime.executor import ToolExecutionOutcome, ToolExecutionState
      
        20
        from loader.runtime.finalization import (
      
        21
            TurnFinalizer,
      
        22
            _build_verification_repair_guidance,
      
        23
        )
      
        24
        from loader.runtime.permissions import (
      
        25
            PermissionMode,
      
        26
            build_permission_policy,
      
        27
            load_permission_rules,
      
        28
        )
      
        29
        from loader.runtime.repair_focus import extract_active_repair_context
      
        30
        from loader.runtime.tracing import RuntimeTracer
      
        31
        from loader.runtime.verification_observations import VerificationObservationStatus
      
        32
        from loader.tools.base import ToolResult as RegistryToolResult
      
        33
        from loader.tools.base import create_default_registry
      
        34
        from tests.helpers.runtime_harness import ScriptedBackend
      
        35
        
        36
        
        37
        class FakeSession:
      
        38
            def __init__(self) -> None:
      
        39
                self.messages: list[Message] = []
      
        40
                self.session_id = "session-test-123"
      
        41
                self.recorded_calls: list[dict[str, object]] = []
      
        42
                self.last_completion_decision_code = "verification_passed"
      
        43
                self.last_completion_decision_summary = (
      
        44
                    "accepted the response after verification evidence passed"
      
        45
                )
      
        46
                self.completion_trace = [
      
        47
                    CompletionTraceEntry(
      
        48
                        stage="definition_of_done",
      
        49
                        outcome="complete",
      
        50
                        decision_code="verification_passed",
      
        51
                        decision_summary="accepted the response after verification evidence passed",
      
        52
                    )
      
        53
                ]
      
        54
                self.last_turn_transition_summary = (
      
        55
                    "completion -> finalize [terminal] Finalizing completed turn"
      
        56
                )
      
        57
                self.workflow_timeline = []
      
        58
        
        59
            def append(self, message: Message) -> None:
      
        60
                self.messages.append(message)
      
        61
        
        62
            def append_workflow_timeline_entry(self, entry) -> None:
      
        63
                self.workflow_timeline.append(entry)
      
        64
        
        65
            def record_turn_usage(
      
        66
                self,
      
        67
                usage: dict[str, int],
      
        68
                *,
      
        69
                tool_calls: int,
      
        70
                iterations: int,
      
        71
            ) -> dict[str, int]:
      
        72
                payload = {
      
        73
                    "usage": dict(usage),
      
        74
                    "tool_calls": tool_calls,
      
        75
                    "iterations": iterations,
      
        76
                }
      
        77
                self.recorded_calls.append(payload)
      
        78
                return {"turns": 1, "tool_calls": tool_calls, "iterations": iterations}
      
        79
        
        80
        
        81
        class FakeCodeFilter:
      
        82
            def reset(self) -> None:
      
        83
                return None
      
        84
        
        85
        
        86
        class FakeSafeguards:
      
        87
            def __init__(self) -> None:
      
        88
                self.action_tracker = object()
      
        89
                self.validator = object()
      
        90
                self.code_filter = FakeCodeFilter()
      
        91
        
        92
            def filter_stream_chunk(self, content: str) -> str:
      
        93
                return content
      
        94
        
        95
            def filter_complete_content(self, content: str) -> str:
      
        96
                return content
      
        97
        
        98
            def should_steer(self) -> bool:
      
        99
                return False
      
        100
        
        101
            def get_steering_message(self) -> str | None:
      
        102
                return None
      
        103
        
        104
            def record_response(self, content: str) -> None:
      
        105
                return None
      
        106
        
        107
            def detect_text_loop(self, content: str) -> tuple[bool, str]:
      
        108
                return False, ""
      
        109
        
        110
            def detect_loop(self) -> tuple[bool, str]:
      
        111
                return False, ""
      
        112
        
        113
        
        114
        class FakeExecutor:
      
        115
            def __init__(self, outcomes: list[ToolExecutionOutcome]) -> None:
      
        116
                self._outcomes = list(outcomes)
      
        117
        
        118
            async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
      
        119
                if not self._outcomes:
      
        120
                    raise AssertionError("No fake verification outcome queued")
      
        121
                return self._outcomes.pop(0)
      
        122
        
        123
        
        124
        class RecordingExecutor:
      
        125
            def __init__(self) -> None:
      
        126
                self.commands: list[str] = []
      
        127
        
        128
            async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
      
        129
                command = str(tool_call.arguments.get("command", ""))
      
        130
                self.commands.append(command)
      
        131
                return tool_outcome(
      
        132
                    tool_call=tool_call,
      
        133
                    output="ok",
      
        134
                    is_error=False,
      
        135
                    exit_code=0,
      
        136
                    stdout="ok",
      
        137
                )
      
        138
        
        139
        
        140
        class SelectiveRecordingExecutor:
      
        141
            def __init__(self, failing_match: str) -> None:
      
        142
                self.commands: list[str] = []
      
        143
                self.failing_match = failing_match
      
        144
        
        145
            async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
      
        146
                command = str(tool_call.arguments.get("command", ""))
      
        147
                self.commands.append(command)
      
        148
                failed = self.failing_match in command
      
        149
                return tool_outcome(
      
        150
                    tool_call=tool_call,
      
        151
                    output="failed" if failed else "ok",
      
        152
                    is_error=failed,
      
        153
                    exit_code=1 if failed else 0,
      
        154
                    stdout="" if failed else "ok",
      
        155
                    stderr="failed" if failed else "",
      
        156
                )
      
        157
        
        158
        
        159
        def build_context(temp_dir: Path, session: FakeSession) -> RuntimeContext:
      
        160
            registry = create_default_registry(temp_dir)
      
        161
            registry.configure_workspace_root(temp_dir)
      
        162
            rule_status = load_permission_rules(temp_dir)
      
        163
            policy = build_permission_policy(
      
        164
                active_mode=PermissionMode.WORKSPACE_WRITE,
      
        165
                workspace_root=temp_dir,
      
        166
                tool_requirements=registry.get_tool_requirements(),
      
        167
                rules=rule_status.rules,
      
        168
            )
      
        169
            return RuntimeContext(
      
        170
                project_root=temp_dir,
      
        171
                backend=ScriptedBackend(),
      
        172
                registry=registry,
      
        173
                session=session,  # type: ignore[arg-type]
      
        174
                config=SimpleNamespace(
      
        175
                    force_react=False,
      
        176
                    verification_retry_budget=3,
      
        177
                    reasoning=SimpleNamespace(
      
        178
                        rollback=False,
      
        179
                        show_rollback_plan=False,
      
        180
                        completion_check=True,
      
        181
                        use_quick_completion=True,
      
        182
                        max_continuation_prompts=5,
      
        183
                        self_critique=False,
      
        184
                        confidence_scoring=False,
      
        185
                        min_confidence_for_action=3,
      
        186
                        verification=False,
      
        187
                    ),
      
        188
                ),
      
        189
                capability_profile=SimpleNamespace(supports_native_tools=True),  # type: ignore[arg-type]
      
        190
                project_context=None,
      
        191
                permission_policy=policy,
      
        192
                permission_config_status=rule_status,
      
        193
                workflow_mode="execute",
      
        194
                safeguards=FakeSafeguards(),
      
        195
            )
      
        196
        
        197
        
        198
        def tool_outcome(
      
        199
            *,
      
        200
            tool_call: ToolCall,
      
        201
            output: str,
      
        202
            is_error: bool,
      
        203
            exit_code: int,
      
        204
            stdout: str = "",
      
        205
            stderr: str = "",
      
        206
        ) -> ToolExecutionOutcome:
      
        207
            return ToolExecutionOutcome(
      
        208
                tool_call=tool_call,
      
        209
                state=ToolExecutionState.EXECUTED,
      
        210
                message=Message.tool_result_message(
      
        211
                    tool_call_id=tool_call.id,
      
        212
                    display_content=output,
      
        213
                    result_content=output,
      
        214
                    is_error=is_error,
      
        215
                ),
      
        216
                event_content=output,
      
        217
                is_error=is_error,
      
        218
                result_output=output,
      
        219
                registry_result=RegistryToolResult(
      
        220
                    output=output,
      
        221
                    is_error=is_error,
      
        222
                    metadata={
      
        223
                        "exit_code": exit_code,
      
        224
                        "stdout": stdout,
      
        225
                        "stderr": stderr,
      
        226
                    },
      
        227
                ),
      
        228
            )
      
        229
        
        230
        
        231
        async def _noop_set_workflow_mode(mode, dod, emit, summary) -> None:
      
        232
            return None
      
        233
        
        234
        
        235
        def test_turn_finalizer_finalize_summary_uses_runtime_context(
      
        236
            temp_dir: Path,
      
        237
            monkeypatch: pytest.MonkeyPatch,
      
        238
        ) -> None:
      
        239
            session = FakeSession()
      
        240
            context = build_context(temp_dir, session)
      
        241
            tracer = RuntimeTracer()
      
        242
            tracer.record("turn.completed", reason="done")
      
        243
            finalizer = TurnFinalizer(
      
        244
                context,
      
        245
                tracer,
      
        246
                DefinitionOfDoneStore(temp_dir),
      
        247
                set_workflow_mode=_noop_set_workflow_mode,
      
        248
            )
      
        249
            dod = create_definition_of_done("Finish the task")
      
        250
            dod.status = "done"
      
        251
            summary = TurnSummary(
      
        252
                final_response="All set.",
      
        253
                definition_of_done=dod,
      
        254
                iterations=2,
      
        255
                usage={"prompt_tokens": 10},
      
        256
                tool_result_messages=[Message(role=Role.TOOL, content="tool output")],
      
        257
            )
      
        258
            captured: dict[str, str] = {}
      
        259
        
        260
            def capture_definition_of_done(self, summary_text: str) -> None:
      
        261
                captured["summary"] = summary_text
      
        262
        
        263
            monkeypatch.setattr(
      
        264
                "loader.runtime.finalization.MemoryStore.capture_definition_of_done",
      
        265
                capture_definition_of_done,
      
        266
            )
      
        267
        
        268
            final_summary = finalizer.finalize_summary(summary)
      
        269
        
        270
            assert final_summary.session_id == "session-test-123"
      
        271
            assert final_summary.cumulative_usage == {"turns": 1, "tool_calls": 1, "iterations": 2}
      
        272
            assert session.recorded_calls == [
      
        273
                {
      
        274
                    "usage": {"prompt_tokens": 10, "tool_calls": 1, "iterations": 2},
      
        275
                    "tool_calls": 1,
      
        276
                    "iterations": 2,
      
        277
                }
      
        278
            ]
      
        279
            assert "summary" in captured
      
        280
            assert final_summary.trace
      
        281
            assert final_summary.completion_decision_code == "verification_passed"
      
        282
            assert final_summary.completion_decision_summary == (
      
        283
                "accepted the response after verification evidence passed"
      
        284
            )
      
        285
            assert [entry.decision_code for entry in final_summary.completion_trace] == [
      
        286
                "verification_passed"
      
        287
            ]
      
        288
        
        289
        
        290
        def test_verification_repair_guidance_uses_existing_artifacts_as_source_of_truth(
      
        291
            temp_dir: Path,
      
        292
        ) -> None:
      
        293
            guide_root = temp_dir / "guides" / "nginx"
      
        294
            chapters = guide_root / "chapters"
      
        295
            chapters.mkdir(parents=True)
      
        296
            index_path = guide_root / "index.html"
      
        297
            chapter_one = chapters / "01-getting-started.html"
      
        298
            chapter_two = chapters / "02-installation.html"
      
        299
            chapter_three = chapters / "03-first-website.html"
      
        300
            chapter_four = chapters / "04-configuration-basics.html"
      
        301
        
        302
            for path in (index_path, chapter_one, chapter_two, chapter_three, chapter_four):
      
        303
                path.write_text("<html></html>\n")
      
        304
        
        305
            implementation_plan = temp_dir / "implementation.md"
      
        306
            implementation_plan.write_text(
      
        307
                "\n".join(
      
        308
                    [
      
        309
                        "# Implementation Plan",
      
        310
                        "",
      
        311
                        "## File Changes",
      
        312
                        f"- `{guide_root}/`",
      
        313
                        f"- `{chapters}/`",
      
        314
                        f"- `{index_path}`",
      
        315
                        f"- `{chapter_one}`",
      
        316
                        f"- `{chapter_two}`",
      
        317
                        f"- `{chapter_three}`",
      
        318
                        f"- `{chapter_four}`",
      
        319
                        "",
      
        320
                    ]
      
        321
                )
      
        322
            )
      
        323
        
        324
            dod = create_definition_of_done("Repair the nginx guide index.")
      
        325
            dod.implementation_plan = str(implementation_plan)
      
        326
            dod.evidence = [
      
        327
                VerificationEvidence(
      
        328
                    command="verify-links",
      
        329
                    passed=False,
      
        330
                    output=(
      
        331
                        "Missing local HTML links:\n"
      
        332
                        f"{index_path}:chapters/01-introduction.html -> {chapters / '01-introduction.html'}\n"
      
        333
                        f"{index_path}:chapters/04-server-blocks.html -> {chapters / '04-server-blocks.html'}\n"
      
        334
                    ),
      
        335
                )
      
        336
            ]
      
        337
        
        338
            guidance = _build_verification_repair_guidance(
      
        339
                dod,
      
        340
                project_root=temp_dir,
      
        341
            )
      
        342
        
        343
            assert "Use the existing artifact files as the source of truth" in guidance
      
        344
            assert str(chapter_one) in guidance
      
        345
            assert str(chapter_two) in guidance
      
        346
            assert str(chapter_four) in guidance
      
        347
        
        348
        
        349
        def test_verification_repair_guidance_does_not_create_out_of_scope_link_target(
      
        350
            temp_dir: Path,
      
        351
        ) -> None:
      
        352
            guide_root = temp_dir / "guides" / "nginx"
      
        353
            chapters = guide_root / "chapters"
      
        354
            chapters.mkdir(parents=True)
      
        355
            index_path = guide_root / "index.html"
      
        356
            chapter_one = chapters / "01-introduction.html"
      
        357
            index_path.write_text('<a href="../index.html">All guides</a>\n')
      
        358
            chapter_one.write_text('<a href="../index.html">Back</a>\n')
      
        359
            parent_index = temp_dir / "guides" / "index.html"
      
        360
        
        361
            implementation_plan = temp_dir / "implementation.md"
      
        362
            implementation_plan.write_text(
      
        363
                "\n".join(
      
        364
                    [
      
        365
                        "# Implementation Plan",
      
        366
                        "",
      
        367
                        "## File Changes",
      
        368
                        f"- `{guide_root}/`",
      
        369
                        f"- `{chapters}/`",
      
        370
                        f"- `{index_path}`",
      
        371
                        f"- `{chapter_one}`",
      
        372
                        "",
      
        373
                    ]
      
        374
                )
      
        375
            )
      
        376
        
        377
            dod = create_definition_of_done("Create the nginx guide under guides/nginx.")
      
        378
            dod.implementation_plan = str(implementation_plan)
      
        379
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        380
            dod.evidence = [
      
        381
                VerificationEvidence(
      
        382
                    command="verify-links",
      
        383
                    passed=False,
      
        384
                    output=(
      
        385
                        "Missing local HTML links:\n"
      
        386
                        f"{index_path}:../index.html -> {parent_index}\n"
      
        387
                    ),
      
        388
                )
      
        389
            ]
      
        390
        
        391
            guidance = _build_verification_repair_guidance(
      
        392
                dod,
      
        393
                project_root=temp_dir,
      
        394
            )
      
        395
            repair = extract_active_repair_context([Message(role=Role.USER, content=guidance)])
      
        396
        
        397
            assert "outside the requested artifact scope" in guidance
      
        398
            assert "do not create that outside file" in guidance
      
        399
            assert f"create `{parent_index}`" not in guidance
      
        400
            assert repair is not None
      
        401
            assert str(parent_index.resolve(strict=False)) not in repair.allowed_paths
      
        402
            assert str(index_path.resolve(strict=False)) in repair.allowed_paths
      
        403
        
        404
        
        405
        def test_verification_repair_guidance_replaces_stale_focus_for_html_quality_issue(
      
        406
            temp_dir: Path,
      
        407
        ) -> None:
      
        408
            stale_index = temp_dir / "guides" / "nginx" / "index.html"
      
        409
            stale_index.parent.mkdir(parents=True)
      
        410
            stale_index.write_text("<h1>Index</h1>\n")
      
        411
            first_chapter = temp_dir / "guides" / "nginx" / "chapters" / "01-introduction.html"
      
        412
            third_chapter = temp_dir / "guides" / "nginx" / "chapters" / "03-configuration.html"
      
        413
            first_chapter.parent.mkdir(parents=True)
      
        414
            first_chapter.write_text("<h1>Intro</h1>\n")
      
        415
            third_chapter.write_text("<h1>Config</h1>\n")
      
        416
            stale_message = Message(
      
        417
                role=Role.USER,
      
        418
                content=(
      
        419
                    "Repair focus:\n"
      
        420
                    f"- Fix the broken local reference `../index.html` in `{stale_index}`.\n"
      
        421
                    f"- Immediate next step: edit `{stale_index}`.\n"
      
        422
                ),
      
        423
            )
      
        424
            dod = create_definition_of_done("Create an equally thorough HTML guide.")
      
        425
            dod.evidence = [
      
        426
                VerificationEvidence(
      
        427
                    command="quality",
      
        428
                    passed=False,
      
        429
                    output=(
      
        430
                        "HTML guide content quality issues:\n"
      
        431
                        f"{first_chapter}: insufficient structured content (13 blocks, expected at least 18)\n"
      
        432
                        f"{third_chapter}: thin content (1505 text chars, expected at least 1758)\n"
      
        433
                    ),
      
        434
                )
      
        435
            ]
      
        436
        
        437
            guidance = _build_verification_repair_guidance(
      
        438
                dod,
      
        439
                project_root=temp_dir,
      
        440
            )
      
        441
            repair = extract_active_repair_context(
      
        442
                [stale_message, Message(role=Role.USER, content=guidance)]
      
        443
            )
      
        444
        
        445
            assert guidance.startswith("Repair focus:")
      
        446
            assert f"Immediate next step: edit `{first_chapter}` with a substantial" in guidance
      
        447
            assert "Repair every listed quality target in order before any final answer" in guidance
      
        448
            assert "HTML guide content quality issues" not in guidance
      
        449
            assert repair is not None
      
        450
            assert repair.artifact_path == str(first_chapter.resolve(strict=False))
      
        451
            assert str(stale_index.resolve(strict=False)) not in repair.allowed_paths
      
        452
            assert str(third_chapter.resolve(strict=False)) in repair.allowed_paths
      
        453
        
        454
        
        455
        def test_verification_repair_guidance_prioritizes_structural_html_quality_issue(
      
        456
            temp_dir: Path,
      
        457
        ) -> None:
      
        458
            chapter = temp_dir / "guides" / "nginx" / "chapters" / "08-troubleshooting.html"
      
        459
            chapter.parent.mkdir(parents=True)
      
        460
            chapter.write_text(
      
        461
                "<!DOCTYPE html><html><body><h1>Troubleshooting</h1></body></html>\n"
      
        462
                "<p>Trailing content.</p>\n"
      
        463
            )
      
        464
            dod = create_definition_of_done("Create an equally thorough HTML guide.")
      
        465
            dod.evidence = [
      
        466
                VerificationEvidence(
      
        467
                    command="quality",
      
        468
                    passed=False,
      
        469
                    output=(
      
        470
                        "HTML guide content quality issues:\n"
      
        471
                        f"{chapter}: expected exactly one closing </html> tag (found 2)\n"
      
        472
                    ),
      
        473
                )
      
        474
            ]
      
        475
        
        476
            guidance = _build_verification_repair_guidance(
      
        477
                dod,
      
        478
                project_root=temp_dir,
      
        479
            )
      
        480
            repair = extract_active_repair_context([Message(role=Role.USER, content=guidance)])
      
        481
        
        482
            assert f"Improve `{chapter}`: expected exactly one closing </html> tag" in guidance
      
        483
            assert f"Immediate next step: replace `{chapter}` with one complete" in guidance
      
        484
            assert "replace the malformed file with one complete valid HTML document" in guidance
      
        485
            assert "do not append more content after an existing closing tag" in guidance
      
        486
            assert repair is not None
      
        487
            assert repair.artifact_path == str(chapter.resolve(strict=False))
      
        488
        
        489
        
        490
        def test_verification_repair_guidance_keeps_multi_file_quality_worklist(
      
        491
            temp_dir: Path,
      
        492
        ) -> None:
      
        493
            chapters = temp_dir / "guides" / "nginx" / "chapters"
      
        494
            chapters.mkdir(parents=True)
      
        495
            chapter_paths = [
      
        496
                chapters / f"{index:02d}-chapter-{index}.html"
      
        497
                for index in range(1, 9)
      
        498
            ]
      
        499
            for path in chapter_paths:
      
        500
                path.write_text(f"<h1>{path.stem}</h1>\n")
      
        501
            dod = create_definition_of_done("Create an equally thorough HTML guide.")
      
        502
            dod.evidence = [
      
        503
                VerificationEvidence(
      
        504
                    command="quality",
      
        505
                    passed=False,
      
        506
                    output=(
      
        507
                        "HTML guide content quality issues:\n"
      
        508
                        + "\n".join(
      
        509
                            f"{path}: thin content (200 text chars, expected at least 1758)"
      
        510
                            for path in chapter_paths
      
        511
                        )
      
        512
                    ),
      
        513
                )
      
        514
            ]
      
        515
        
        516
            guidance = _build_verification_repair_guidance(
      
        517
                dod,
      
        518
                project_root=temp_dir,
      
        519
            )
      
        520
            repair = extract_active_repair_context(
      
        521
                [Message(role=Role.USER, content=guidance)]
      
        522
            )
      
        523
        
        524
            assert f"Improve `{chapter_paths[0]}`: thin content" in guidance
      
        525
            assert f"Improve `{chapter_paths[-1]}`: thin content" in guidance
      
        526
            assert "add enough concrete prose" in guidance
      
        527
            assert "bounded append-style" in guidance
      
        528
            assert "avoid whole-file rewrites" in guidance
      
        529
            assert "not table-of-contents inflation" in guidance
      
        530
            assert "do not add duplicate navigation entries" in guidance
      
        531
            assert "do not stop after touching only the first file" in guidance
      
        532
            assert repair is not None
      
        533
            assert repair.artifact_path == str(chapter_paths[0].resolve(strict=False))
      
        534
            assert str(chapter_paths[-1].resolve(strict=False)) in repair.allowed_paths
      
        535
        
        536
        
        537
        def test_verification_repair_guidance_keeps_quality_targets_with_link_repairs(
      
        538
            temp_dir: Path,
      
        539
        ) -> None:
      
        540
            guide = temp_dir / "guides" / "nginx"
      
        541
            chapters = guide / "chapters"
      
        542
            chapters.mkdir(parents=True)
      
        543
            broken_asset_page = chapters / "07-performance.html"
      
        544
            thin_page = chapters / "06-security.html"
      
        545
            broken_asset_page.write_text('<link rel="stylesheet" href="../styles.css">\n')
      
        546
            thin_page.write_text("<h1>Security</h1>\n")
      
        547
            dod = create_definition_of_done("Create an equally thorough HTML guide.")
      
        548
            dod.evidence = [
      
        549
                VerificationEvidence(
      
        550
                    command="links",
      
        551
                    passed=False,
      
        552
                    output=(
      
        553
                        "Missing local HTML links:\n"
      
        554
                        f"{broken_asset_page}:../styles.css -> {guide / 'styles.css'}\n"
      
        555
                    ),
      
        556
                ),
      
        557
                VerificationEvidence(
      
        558
                    command="quality",
      
        559
                    passed=False,
      
        560
                    output=(
      
        561
                        "HTML guide content quality issues:\n"
      
        562
                        f"{thin_page}: thin content (1348 text chars, expected at least 1758)\n"
      
        563
                    ),
      
        564
                ),
      
        565
            ]
      
        566
        
        567
            guidance = _build_verification_repair_guidance(
      
        568
                dod,
      
        569
                project_root=temp_dir,
      
        570
            )
      
        571
            repair = extract_active_repair_context(
      
        572
                [Message(role=Role.USER, content=guidance)]
      
        573
            )
      
        574
        
        575
            assert f"Fix the broken local reference `../styles.css` in `{broken_asset_page}`" in guidance
      
        576
            assert f"Improve `{thin_page}`: thin content" in guidance
      
        577
            assert "continue with the listed content-quality targets" in guidance
      
        578
            assert "do not declare completion while any listed quality target remains" in guidance
      
        579
            assert repair is not None
      
        580
            assert repair.artifact_path == str(broken_asset_page.resolve(strict=False))
      
        581
            assert str(thin_page.resolve(strict=False)) in repair.allowed_paths
      
        582
        
        583
        
        584
        @pytest.mark.asyncio
      
        585
        async def test_turn_finalizer_records_skipped_verification_observation(
      
        586
            temp_dir: Path,
      
        587
        ) -> None:
      
        588
            session = FakeSession()
      
        589
            context = build_context(temp_dir, session)
      
        590
            finalizer = TurnFinalizer(
      
        591
                context,
      
        592
                RuntimeTracer(),
      
        593
                DefinitionOfDoneStore(temp_dir),
      
        594
                set_workflow_mode=_noop_set_workflow_mode,
      
        595
            )
      
        596
            dod = create_definition_of_done("Explain Loader's clarify loop.")
      
        597
            summary = TurnSummary(final_response="")
      
        598
            events = []
      
        599
        
        600
            async def capture(event) -> None:
      
        601
                events.append(event)
      
        602
        
        603
            result = await finalizer.run_definition_of_done_gate(
      
        604
                dod=dod,
      
        605
                candidate_response="Loader uses a bounded clarify loop before execution.",
      
        606
                emit=capture,
      
        607
                summary=summary,
      
        608
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        609
            )
      
        610
        
        611
            assert result.should_continue is False
      
        612
            assert result.reason_code == "non_mutating_response_accepted"
      
        613
            assert [item.status for item in result.verification_observations] == [
      
        614
                VerificationObservationStatus.SKIPPED.value
      
        615
            ]
      
        616
            assert [item.summary for item in result.verification_observations] == [
      
        617
                "verification was skipped because no mutating work required checks"
      
        618
            ]
      
        619
            assert summary.verification_status == "skipped"
      
        620
            assert "Complete the requested work" not in dod.pending_items
      
        621
            assert "Complete the requested work" in dod.completed_items
      
        622
            assert session.workflow_timeline[-1].kind == "verify_skip"
      
        623
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        624
                VerificationObservationStatus.SKIPPED.value
      
        625
            ]
      
        626
            assert any(event.type == "dod_status" and event.dod_status == "done" for event in events)
      
        627
        
        628
        
        629
        @pytest.mark.asyncio
      
        630
        async def test_turn_finalizer_accepts_noop_completion_with_task_restatement_todo(
      
        631
            temp_dir: Path,
      
        632
        ) -> None:
      
        633
            session = FakeSession()
      
        634
            context = build_context(temp_dir, session)
      
        635
            finalizer = TurnFinalizer(
      
        636
                context,
      
        637
                RuntimeTracer(),
      
        638
                DefinitionOfDoneStore(temp_dir),
      
        639
                set_workflow_mode=_noop_set_workflow_mode,
      
        640
            )
      
        641
            task = (
      
        642
                "Have a look at ~/Loader/guides/fortran/index.html, then "
      
        643
                "~/Loader/guides/fortran/chapters. The table of contents links in "
      
        644
                "index.html are inaccurate and the href’s are wrong. Let’s update the "
      
        645
                "links and their link texts to be correct."
      
        646
            )
      
        647
            dod = create_definition_of_done(task)
      
        648
            dod.pending_items = [task, "Complete the requested work"]
      
        649
            summary = TurnSummary(final_response="")
      
        650
        
        651
            async def capture(event) -> None:
      
        652
                return None
      
        653
        
        654
            result = await finalizer.run_definition_of_done_gate(
      
        655
                dod=dod,
      
        656
                candidate_response="The table of contents is already correct, so no edit is needed.",
      
        657
                emit=capture,
      
        658
                summary=summary,
      
        659
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        660
            )
      
        661
        
        662
            assert result.should_continue is False
      
        663
            assert result.reason_code == "non_mutating_response_accepted"
      
        664
        
        665
        
        666
        @pytest.mark.asyncio
      
        667
        async def test_turn_finalizer_records_passed_verification_observation(
      
        668
            temp_dir: Path,
      
        669
        ) -> None:
      
        670
            session = FakeSession()
      
        671
            context = build_context(temp_dir, session)
      
        672
            finalizer = TurnFinalizer(
      
        673
                context,
      
        674
                RuntimeTracer(),
      
        675
                DefinitionOfDoneStore(temp_dir),
      
        676
                set_workflow_mode=_noop_set_workflow_mode,
      
        677
            )
      
        678
            dod = create_definition_of_done("Update the runtime tests.")
      
        679
            dod.mutating_actions.append("write")
      
        680
            dod.verification_commands = ["uv run pytest -q"]
      
        681
            summary = TurnSummary(final_response="")
      
        682
            tool_call = ToolCall(
      
        683
                id="verify-1-1",
      
        684
                name="bash",
      
        685
                arguments={"command": "uv run pytest -q", "cwd": str(temp_dir)},
      
        686
            )
      
        687
        
        688
            async def capture(event) -> None:
      
        689
                return None
      
        690
        
        691
            result = await finalizer.run_definition_of_done_gate(
      
        692
                dod=dod,
      
        693
                candidate_response="Updated the runtime tests.",
      
        694
                emit=capture,
      
        695
                summary=summary,
      
        696
                executor=FakeExecutor(
      
        697
                    [
      
        698
                        tool_outcome(
      
        699
                            tool_call=tool_call,
      
        700
                            output="219 passed",
      
        701
                            is_error=False,
      
        702
                            exit_code=0,
      
        703
                            stdout="219 passed",
      
        704
                        )
      
        705
                    ]
      
        706
                ),  # type: ignore[arg-type]
      
        707
            )
      
        708
        
        709
            assert result.should_continue is False
      
        710
            assert result.reason_code == "verification_passed"
      
        711
            assert [item.status for item in result.verification_observations] == [
      
        712
                VerificationObservationStatus.PASSED.value
      
        713
            ]
      
        714
            assert result.verification_observations[0].attempt_id == "verification-attempt-1"
      
        715
            assert result.verification_observations[0].attempt_number == 1
      
        716
            assert result.verification_observations[0].command == "uv run pytest -q"
      
        717
            assert result.verification_observations[0].detail == "219 passed"
      
        718
            assert summary.verification_status == "passed"
      
        719
            assert [entry.reason_code for entry in session.workflow_timeline[-2:]] == [
      
        720
                "verification_pending",
      
        721
                "verification_command_passed",
      
        722
            ]
      
        723
            assert [item.status for item in session.workflow_timeline[-2].verification_observations] == [
      
        724
                VerificationObservationStatus.PENDING.value
      
        725
            ]
      
        726
            assert (
      
        727
                session.workflow_timeline[-2].verification_observations[0].attempt_id
      
        728
                == "verification-attempt-1"
      
        729
            )
      
        730
            assert session.workflow_timeline[-2].verification_observations[0].command == (
      
        731
                "uv run pytest -q"
      
        732
            )
      
        733
            assert session.workflow_timeline[-1].kind == "verify_observation"
      
        734
            assert session.workflow_timeline[-1].reason_code == "verification_command_passed"
      
        735
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        736
                VerificationObservationStatus.PASSED.value
      
        737
            ]
      
        738
        
        739
        
        740
        @pytest.mark.asyncio
      
        741
        async def test_turn_finalizer_appends_runtime_semantic_verifier_to_planned_commands(
      
        742
            temp_dir: Path,
      
        743
        ) -> None:
      
        744
            chapters = temp_dir / "chapters"
      
        745
            chapters.mkdir()
      
        746
            (chapters / "01-introduction.html").write_text(
      
        747
                "<h1>Chapter 1: Introduction to Fortran</h1>\n"
      
        748
            )
      
        749
            index = temp_dir / "index.html"
      
        750
            index.write_text(
      
        751
                "\n".join(
      
        752
                    [
      
        753
                        '<ul class="chapter-list">',
      
        754
                        '  <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>',
      
        755
                        "</ul>",
      
        756
                    ]
      
        757
                )
      
        758
            )
      
        759
        
        760
            session = FakeSession()
      
        761
            context = build_context(temp_dir, session)
      
        762
            finalizer = TurnFinalizer(
      
        763
                context,
      
        764
                RuntimeTracer(),
      
        765
                DefinitionOfDoneStore(temp_dir),
      
        766
                set_workflow_mode=_noop_set_workflow_mode,
      
        767
            )
      
        768
            dod = create_definition_of_done(
      
        769
                "Update index.html so the table of contents links and chapter titles are correct."
      
        770
            )
      
        771
            dod.mutating_actions.append("edit")
      
        772
            dod.touched_files.append(str(index))
      
        773
            dod.verification_commands = ['grep -n "href=" index.html']
      
        774
            summary = TurnSummary(final_response="")
      
        775
            executor = RecordingExecutor()
      
        776
        
        777
            async def capture(event) -> None:
      
        778
                return None
      
        779
        
        780
            result = await finalizer.run_definition_of_done_gate(
      
        781
                dod=dod,
      
        782
                candidate_response="Updated the index.html links.",
      
        783
                emit=capture,
      
        784
                summary=summary,
      
        785
                executor=executor,  # type: ignore[arg-type]
      
        786
            )
      
        787
        
        788
            assert result.should_continue is False
      
        789
            assert any(command == 'grep -n "href=" index.html' for command in executor.commands)
      
        790
            assert any(command.startswith("python3 - <<'PY'") for command in executor.commands)
      
        791
            assert (
      
        792
                session.workflow_timeline[-1].verification_observations[0].attempt_id
      
        793
                == "verification-attempt-1"
      
        794
            )
      
        795
        
        796
        
        797
        @pytest.mark.asyncio
      
        798
        async def test_turn_finalizer_does_not_append_repo_defaults_to_external_verification_plan(
      
        799
            temp_dir: Path,
      
        800
        ) -> None:
      
        801
            (temp_dir / "pyproject.toml").write_text("[project]\nname='loader'\n")
      
        802
            (temp_dir / "package.json").write_text("{}\n")
      
        803
            external_root = temp_dir.parent / "external-nginx-guide"
      
        804
            external_root.mkdir(exist_ok=True)
      
        805
            external_index = external_root / "index.html"
      
        806
            external_index.write_text("<html></html>\n")
      
        807
        
        808
            session = FakeSession()
      
        809
            context = build_context(temp_dir, session)
      
        810
            finalizer = TurnFinalizer(
      
        811
                context,
      
        812
                RuntimeTracer(),
      
        813
                DefinitionOfDoneStore(temp_dir),
      
        814
                set_workflow_mode=_noop_set_workflow_mode,
      
        815
            )
      
        816
            dod = create_definition_of_done("Create an external nginx guide.")
      
        817
            dod.mutating_actions.append("write")
      
        818
            dod.touched_files.append(str(external_index))
      
        819
            dod.verification_commands = [
      
        820
                f"ls -la {external_root}",
      
        821
                f"grep -n \"html\" {external_index}",
      
        822
            ]
      
        823
            summary = TurnSummary(final_response="")
      
        824
            executor = RecordingExecutor()
      
        825
        
        826
            async def capture(event) -> None:
      
        827
                return None
      
        828
        
        829
            result = await finalizer.run_definition_of_done_gate(
      
        830
                dod=dod,
      
        831
                candidate_response="Created the external nginx guide.",
      
        832
                emit=capture,
      
        833
                summary=summary,
      
        834
                executor=executor,  # type: ignore[arg-type]
      
        835
            )
      
        836
        
        837
            assert result.should_continue is False
      
        838
            assert executor.commands == [
      
        839
                f"ls -la {external_root}",
      
        840
                f'grep -n "html" {external_index}',
      
        841
            ]
      
        842
        
        843
        
        844
        @pytest.mark.asyncio
      
        845
        async def test_turn_finalizer_filters_reference_side_verification_commands(
      
        846
            temp_dir: Path,
      
        847
        ) -> None:
      
        848
            guide_root = temp_dir / "Loader" / "guides" / "nginx"
      
        849
            chapters = guide_root / "chapters"
      
        850
            chapters.mkdir(parents=True)
      
        851
            index_path = guide_root / "index.html"
      
        852
            chapter_one = chapters / "01-introduction.html"
      
        853
            index_path.write_text("<html><body><h1>Guide</h1></body></html>\n")
      
        854
            chapter_one.write_text("<html><body><h1>Intro</h1></body></html>\n")
      
        855
        
        856
            reference_root = temp_dir / "Loader" / "guides" / "fortran"
      
        857
            reference_root.mkdir(parents=True)
      
        858
        
        859
            implementation_plan = temp_dir / "implementation.md"
      
        860
            implementation_plan.write_text(
      
        861
                "\n".join(
      
        862
                    [
      
        863
                        "# Implementation Plan",
      
        864
                        "",
      
        865
                        "## File Changes",
      
        866
                        f"- `{guide_root}`",
      
        867
                        f"- `{chapters}`",
      
        868
                        f"- `{index_path}`",
      
        869
                        f"- `{chapter_one}`",
      
        870
                        "",
      
        871
                    ]
      
        872
                )
      
        873
            )
      
        874
            verification_plan = temp_dir / "verification.md"
      
        875
            verification_plan.write_text(
      
        876
                "\n".join(
      
        877
                    [
      
        878
                        "# Verification Plan",
      
        879
                        "",
      
        880
                        "## Verification Commands",
      
        881
                        "```bash",
      
        882
                        f"ls -la {guide_root}",
      
        883
                        f"ls -la {reference_root}",
      
        884
                        "```",
      
        885
                        "",
      
        886
                    ]
      
        887
                )
      
        888
            )
      
        889
        
        890
            session = FakeSession()
      
        891
            context = build_context(temp_dir, session)
      
        892
            finalizer = TurnFinalizer(
      
        893
                context,
      
        894
                RuntimeTracer(),
      
        895
                DefinitionOfDoneStore(temp_dir),
      
        896
                set_workflow_mode=_noop_set_workflow_mode,
      
        897
            )
      
        898
            dod = create_definition_of_done("Create an nginx guide from an external reference.")
      
        899
            dod.mutating_actions.append("write")
      
        900
            dod.touched_files.extend([str(index_path), str(chapter_one)])
      
        901
            dod.implementation_plan = str(implementation_plan)
      
        902
            dod.verification_plan = str(verification_plan)
      
        903
            summary = TurnSummary(final_response="")
      
        904
            executor = RecordingExecutor()
      
        905
        
        906
            async def capture(event) -> None:
      
        907
                return None
      
        908
        
        909
            result = await finalizer.run_definition_of_done_gate(
      
        910
                dod=dod,
      
        911
                candidate_response="Created the nginx guide.",
      
        912
                emit=capture,
      
        913
                summary=summary,
      
        914
                executor=executor,  # type: ignore[arg-type]
      
        915
            )
      
        916
        
        917
            assert result.should_continue is False
      
        918
            assert any(str(guide_root) in command for command in executor.commands)
      
        919
            assert all(str(reference_root) not in command for command in executor.commands)
      
        920
        
        921
        
        922
        @pytest.mark.asyncio
      
        923
        async def test_turn_finalizer_blocks_completion_when_planned_artifacts_are_missing(
      
        924
            temp_dir: Path,
      
        925
        ) -> None:
      
        926
            docs = temp_dir / "docs"
      
        927
            chapters = docs / "chapters"
      
        928
            chapters.mkdir(parents=True)
      
        929
            index = docs / "index.html"
      
        930
            first = chapters / "01-intro.html"
      
        931
            second = chapters / "02-installation.html"
      
        932
            index.write_text(
      
        933
                "\n".join(
      
        934
                    [
      
        935
                        '<a href="chapters/01-intro.html">Intro</a>',
      
        936
                        '<a href="chapters/02-installation.html">Installation</a>',
      
        937
                    ]
      
        938
                )
      
        939
            )
      
        940
            first.write_text("<h1>Intro</h1>\n")
      
        941
            implementation_plan = temp_dir / "implementation.md"
      
        942
            implementation_plan.write_text(
      
        943
                "\n".join(
      
        944
                    [
      
        945
                        "# Implementation Plan",
      
        946
                        "",
      
        947
                        "## File Changes",
      
        948
                        f"- `{index}`",
      
        949
                        f"- `{first}`",
      
        950
                        f"- `{second}`",
      
        951
                    ]
      
        952
                )
      
        953
            )
      
        954
        
        955
            session = FakeSession()
      
        956
            context = build_context(temp_dir, session)
      
        957
            finalizer = TurnFinalizer(
      
        958
                context,
      
        959
                RuntimeTracer(),
      
        960
                DefinitionOfDoneStore(temp_dir),
      
        961
                set_workflow_mode=_noop_set_workflow_mode,
      
        962
            )
      
        963
            dod = create_definition_of_done("Create a small multi-page HTML guide.")
      
        964
            dod.mutating_actions.append("write")
      
        965
            dod.touched_files.extend([str(index), str(first)])
      
        966
            dod.implementation_plan = str(implementation_plan)
      
        967
            dod.verification_commands = [f"ls -la {docs}"]
      
        968
            summary = TurnSummary(final_response="")
      
        969
            executor = RecordingExecutor()
      
        970
        
        971
            async def capture(event) -> None:
      
        972
                return None
      
        973
        
        974
            result = await finalizer.run_definition_of_done_gate(
      
        975
                dod=dod,
      
        976
                candidate_response="Finished the guide.",
      
        977
                emit=capture,
      
        978
                summary=summary,
      
        979
                executor=executor,  # type: ignore[arg-type]
      
        980
            )
      
        981
        
        982
            assert result.should_continue is True
      
        983
            assert result.reason_code == "planned_artifacts_missing_continue"
      
        984
            assert executor.commands == []
      
        985
            assert dod.status == "draft"
      
        986
            assert "Complete the requested work" in dod.pending_items
      
        987
            assert "Complete the requested work" not in dod.completed_items
      
        988
            assert session.messages[-1].content.startswith("[PLANNED ARTIFACTS STILL MISSING]")
      
        989
            assert "`02-installation.html`" in session.messages[-1].content
      
        990
        
        991
        
        992
        @pytest.mark.asyncio
      
        993
        async def test_turn_finalizer_records_missing_verification_observation(
      
        994
            temp_dir: Path,
      
        995
        ) -> None:
      
        996
            session = FakeSession()
      
        997
            context = build_context(temp_dir, session)
      
        998
            finalizer = TurnFinalizer(
      
        999
                context,
      
        1000
                RuntimeTracer(),
      
        1001
                DefinitionOfDoneStore(temp_dir),
      
        1002
                set_workflow_mode=_noop_set_workflow_mode,
      
        1003
            )
      
        1004
            dod = create_definition_of_done("Edit the loader bootstrap.")
      
        1005
            dod.mutating_actions.append("edit")
      
        1006
            summary = TurnSummary(final_response="")
      
        1007
        
        1008
            async def capture(event) -> None:
      
        1009
                return None
      
        1010
        
        1011
            result = await finalizer.run_definition_of_done_gate(
      
        1012
                dod=dod,
      
        1013
                candidate_response="Updated the bootstrap code.",
      
        1014
                emit=capture,
      
        1015
                summary=summary,
      
        1016
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        1017
            )
      
        1018
        
        1019
            assert result.should_continue is True
      
        1020
            assert result.reason_code == "verification_failed_reentry"
      
        1021
            assert [item.status for item in result.verification_observations] == [
      
        1022
                VerificationObservationStatus.MISSING.value
      
        1023
            ]
      
        1024
            assert result.verification_observations[0].attempt_id == "verification-attempt-1"
      
        1025
            assert result.verification_observations[0].attempt_number == 1
      
        1026
            assert [item.summary for item in result.verification_observations] == [
      
        1027
                "verification commands were still missing at execution time"
      
        1028
            ]
      
        1029
            assert summary.verification_status == "failed"
      
        1030
            assert session.workflow_timeline[-1].kind == "verify_observation"
      
        1031
            assert session.workflow_timeline[-1].reason_code == "verification_commands_missing"
      
        1032
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        1033
                VerificationObservationStatus.MISSING.value
      
        1034
            ]
      
        1035
            assert (
      
        1036
                session.workflow_timeline[-1].verification_observations[0].attempt_id
      
        1037
                == "verification-attempt-1"
      
        1038
            )
      
        1039
            assert session.messages[-1].role == Role.USER
      
        1040
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
      
        1041
        
        1042
        
        1043
        @pytest.mark.asyncio
      
        1044
        async def test_turn_finalizer_ignores_unplanned_expansion_pending_items_once_plan_exists(
      
        1045
            temp_dir: Path,
      
        1046
        ) -> None:
      
        1047
            session = FakeSession()
      
        1048
            context = build_context(temp_dir, session)
      
        1049
            finalizer = TurnFinalizer(
      
        1050
                context,
      
        1051
                RuntimeTracer(),
      
        1052
                DefinitionOfDoneStore(temp_dir),
      
        1053
                set_workflow_mode=_noop_set_workflow_mode,
      
        1054
            )
      
        1055
        
        1056
            docs = temp_dir / "guides" / "nginx"
      
        1057
            chapters = docs / "chapters"
      
        1058
            docs.mkdir(parents=True)
      
        1059
            chapters.mkdir()
      
        1060
            index = docs / "index.html"
      
        1061
            first = chapters / "01-getting-started.html"
      
        1062
            second = chapters / "02-installation.html"
      
        1063
            index.write_text("<html></html>\n")
      
        1064
            first.write_text("<h1>One</h1>\n")
      
        1065
            second.write_text("<h1>Two</h1>\n")
      
        1066
        
        1067
            implementation_plan = temp_dir / "implementation.md"
      
        1068
            implementation_plan.write_text(
      
        1069
                "\n".join(
      
        1070
                    [
      
        1071
                        "# Implementation Plan",
      
        1072
                        "",
      
        1073
                        "## File Changes",
      
        1074
                        f"- `{docs}/`",
      
        1075
                        f"- `{chapters}/`",
      
        1076
                        f"- `{index}`",
      
        1077
                        f"- `{first}`",
      
        1078
                        f"- `{second}`",
      
        1079
                        "",
      
        1080
                    ]
      
        1081
                )
      
        1082
            )
      
        1083
        
        1084
            dod = create_definition_of_done("Create a small multi-page HTML guide.")
      
        1085
            dod.implementation_plan = str(implementation_plan)
      
        1086
            dod.pending_items = [
      
        1087
                "Create 07-performance-tuning.html",
      
        1088
                "Complete the requested work",
      
        1089
            ]
      
        1090
            summary = TurnSummary(final_response="")
      
        1091
        
        1092
            async def capture(event) -> None:
      
        1093
                return None
      
        1094
        
        1095
            result = await finalizer.run_definition_of_done_gate(
      
        1096
                dod=dod,
      
        1097
                candidate_response="Finished the guide.",
      
        1098
                emit=capture,
      
        1099
                summary=summary,
      
        1100
                executor=FakeExecutor([]),  # type: ignore[arg-type]
      
        1101
            )
      
        1102
        
        1103
            assert result.should_continue is False
      
        1104
            assert result.reason_code == "non_mutating_response_accepted"
      
        1105
        
        1106
        
        1107
        @pytest.mark.asyncio
      
        1108
        async def test_turn_finalizer_verification_failure_reentry_points_at_concrete_repair(
      
        1109
            temp_dir: Path,
      
        1110
            monkeypatch: pytest.MonkeyPatch,
      
        1111
        ) -> None:
      
        1112
            session = FakeSession()
      
        1113
            context = build_context(temp_dir, session)
      
        1114
            queued_messages: list[str] = []
      
        1115
            context.queue_steering_message_callback = queued_messages.append
      
        1116
            finalizer = TurnFinalizer(
      
        1117
                context,
      
        1118
                RuntimeTracer(),
      
        1119
                DefinitionOfDoneStore(temp_dir),
      
        1120
                set_workflow_mode=_noop_set_workflow_mode,
      
        1121
            )
      
        1122
            broken_file = temp_dir / "guides" / "nginx" / "chapters" / "05-advanced-configurations.html"
      
        1123
            broken_file.parent.mkdir(parents=True, exist_ok=True)
      
        1124
            broken_file.write_text('<link rel="stylesheet" href="../styles.css">\n')
      
        1125
            missing_target = temp_dir / "guides" / "nginx" / "styles.css"
      
        1126
            dod = create_definition_of_done("Create the nginx guide.")
      
        1127
            dod.mutating_actions.append("write")
      
        1128
            dod.touched_files.append(str(broken_file))
      
        1129
            dod.verification_commands = ["python3 verify_links.py"]
      
        1130
            summary = TurnSummary(final_response="")
      
        1131
            verify_call = ToolCall(
      
        1132
                id="verify-1-1",
      
        1133
                name="bash",
      
        1134
                arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
      
        1135
            )
      
        1136
            failure_output = (
      
        1137
                "Missing local HTML links:\n"
      
        1138
                f"{broken_file}:../styles.css -> {missing_target}\n"
      
        1139
            )
      
        1140
        
        1141
            async def capture(event) -> None:
      
        1142
                return None
      
        1143
        
        1144
            monkeypatch.setattr(
      
        1145
                "loader.runtime.finalization.derive_verification_commands",
      
        1146
                lambda *args, **kwargs: [],
      
        1147
            )
      
        1148
        
        1149
            result = await finalizer.run_definition_of_done_gate(
      
        1150
                dod=dod,
      
        1151
                candidate_response="The guide is complete.",
      
        1152
                emit=capture,
      
        1153
                summary=summary,
      
        1154
                executor=FakeExecutor(
      
        1155
                    [
      
        1156
                        tool_outcome(
      
        1157
                            tool_call=verify_call,
      
        1158
                            output=failure_output,
      
        1159
                            is_error=True,
      
        1160
                            exit_code=1,
      
        1161
                            stdout=failure_output,
      
        1162
                        )
      
        1163
                    ]
      
        1164
                ),  # type: ignore[arg-type]
      
        1165
            )
      
        1166
        
        1167
            assert result.should_continue is True
      
        1168
            assert result.reason_code == "verification_failed_reentry"
      
        1169
            assert queued_messages
      
        1170
            assert str(broken_file) in queued_messages[-1]
      
        1171
            assert "../styles.css" in queued_messages[-1]
      
        1172
            assert str(missing_target) in queued_messages[-1]
      
        1173
            assert "Do not restart discovery or reread unrelated references." in queued_messages[-1]
      
        1174
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
      
        1175
            assert f"Immediate next step: edit `{broken_file}`." in session.messages[-1].content
      
        1176
            assert f"create `{missing_target}`" in session.messages[-1].content
      
        1177
            assert (
      
        1178
                "Do not reread unrelated reference materials or restart discovery"
      
        1179
                in session.messages[-1].content
      
        1180
            )
      
        1181
        
        1182
        
        1183
        @pytest.mark.asyncio
      
        1184
        async def test_turn_finalizer_verification_failure_reentry_prioritizes_missing_planned_outputs(
      
        1185
            temp_dir: Path,
      
        1186
            monkeypatch: pytest.MonkeyPatch,
      
        1187
        ) -> None:
      
        1188
            session = FakeSession()
      
        1189
            context = build_context(temp_dir, session)
      
        1190
            queued_messages: list[str] = []
      
        1191
            context.queue_steering_message_callback = queued_messages.append
      
        1192
            finalizer = TurnFinalizer(
      
        1193
                context,
      
        1194
                RuntimeTracer(),
      
        1195
                DefinitionOfDoneStore(temp_dir),
      
        1196
                set_workflow_mode=_noop_set_workflow_mode,
      
        1197
            )
      
        1198
            guide_root = temp_dir / "guides" / "nginx"
      
        1199
            chapters = guide_root / "chapters"
      
        1200
            chapters.mkdir(parents=True, exist_ok=True)
      
        1201
            index = guide_root / "index.html"
      
        1202
            first = chapters / "01-installation.html"
      
        1203
            second = chapters / "02-configuration.html"
      
        1204
            third = chapters / "03-basic-usage.html"
      
        1205
            index.write_text(
      
        1206
                "\n".join(
      
        1207
                    [
      
        1208
                        '<a href="chapters/01-installation.html">Installation</a>',
      
        1209
                        '<a href="chapters/02-configuration.html">Configuration</a>',
      
        1210
                        '<a href="chapters/03-basic-usage.html">Basic Usage</a>',
      
        1211
                    ]
      
        1212
                )
      
        1213
            )
      
        1214
            first.write_text("<h1>Installation</h1>\n")
      
        1215
            implementation_plan = temp_dir / "implementation.md"
      
        1216
            implementation_plan.write_text(
      
        1217
                "\n".join(
      
        1218
                    [
      
        1219
                        "# Implementation Plan",
      
        1220
                        "",
      
        1221
                        "## File Changes",
      
        1222
                        f"- `{guide_root}/`",
      
        1223
                        f"- `{chapters}/`",
      
        1224
                        f"- `{index}`",
      
        1225
                        f"- `{first}`",
      
        1226
                        "",
      
        1227
                    ]
      
        1228
                )
      
        1229
            )
      
        1230
            dod = create_definition_of_done("Create the nginx guide.")
      
        1231
            dod.mutating_actions.append("write")
      
        1232
            dod.touched_files.extend([str(index), str(first)])
      
        1233
            dod.implementation_plan = str(implementation_plan)
      
        1234
            dod.verification_commands = ["python3 verify_links.py"]
      
        1235
            summary = TurnSummary(final_response="")
      
        1236
            verify_call = ToolCall(
      
        1237
                id="verify-1-1",
      
        1238
                name="bash",
      
        1239
                arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
      
        1240
            )
      
        1241
            normalized_second = str(second.resolve(strict=False))
      
        1242
            normalized_third = str(third.resolve(strict=False))
      
        1243
            failure_output = (
      
        1244
                "Missing local HTML links:\n"
      
        1245
                f"{index}:chapters/02-configuration.html -> {second}\n"
      
        1246
                f"{index}:chapters/03-basic-usage.html -> {third}\n"
      
        1247
            )
      
        1248
        
        1249
            async def capture(event) -> None:
      
        1250
                return None
      
        1251
        
        1252
            monkeypatch.setattr(
      
        1253
                "loader.runtime.finalization.derive_verification_commands",
      
        1254
                lambda *args, **kwargs: [],
      
        1255
            )
      
        1256
        
        1257
            result = await finalizer.run_definition_of_done_gate(
      
        1258
                dod=dod,
      
        1259
                candidate_response="The guide is complete.",
      
        1260
                emit=capture,
      
        1261
                summary=summary,
      
        1262
                executor=FakeExecutor(
      
        1263
                    [
      
        1264
                        tool_outcome(
      
        1265
                            tool_call=verify_call,
      
        1266
                            output=failure_output,
      
        1267
                            is_error=True,
      
        1268
                            exit_code=1,
      
        1269
                            stdout=failure_output,
      
        1270
                        )
      
        1271
                    ]
      
        1272
                ),  # type: ignore[arg-type]
      
        1273
            )
      
        1274
        
        1275
            assert result.should_continue is True
      
        1276
            assert result.reason_code == "verification_failed_reentry"
      
        1277
            assert queued_messages
      
        1278
            assert normalized_second in queued_messages[-1]
      
        1279
            assert "Do not rewrite the existing aggregate files" in queued_messages[-1]
      
        1280
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
      
        1281
            assert f"Immediate next step: write `{normalized_second}`." in session.messages[-1].content
      
        1282
            assert (
      
        1283
                f"creating missing planned artifact `{normalized_second}`"
      
        1284
                in session.messages[-1].content
      
        1285
            )
      
        1286
            assert (
      
        1287
                f"creating missing planned artifact `{normalized_third}`"
      
        1288
                in session.messages[-1].content
      
        1289
            )
      
        1290
            assert f"Immediate next step: edit `{index}`." not in session.messages[-1].content
      
        1291
            assert "Do not rewrite existing aggregate files" in session.messages[-1].content
      
        1292
        
        1293
        
        1294
        @pytest.mark.asyncio
      
        1295
        async def test_turn_finalizer_does_not_reverify_without_new_changes(
      
        1296
            temp_dir: Path,
      
        1297
        ) -> None:
      
        1298
            session = FakeSession()
      
        1299
            context = build_context(temp_dir, session)
      
        1300
            finalizer = TurnFinalizer(
      
        1301
                context,
      
        1302
                RuntimeTracer(),
      
        1303
                DefinitionOfDoneStore(temp_dir),
      
        1304
                set_workflow_mode=_noop_set_workflow_mode,
      
        1305
            )
      
        1306
            index = temp_dir / "index.html"
      
        1307
            index.write_text("<ul></ul>\n")
      
        1308
            dod = create_definition_of_done("Fix the chapter list in index.html.")
      
        1309
            dod.mutating_actions.append("edit")
      
        1310
            dod.touched_files.append(str(index))
      
        1311
            dod.line_changes = 12
      
        1312
            dod.last_verification_result = "failed"
      
        1313
            dod.last_verification_signature = (
      
        1314
                f"lines={dod.line_changes};touched={index};actions=1;commands="
      
        1315
            )
      
        1316
            dod.evidence = []
      
        1317
            summary = TurnSummary(final_response="")
      
        1318
            executor = RecordingExecutor()
      
        1319
        
        1320
            async def capture(event) -> None:
      
        1321
                return None
      
        1322
        
        1323
            result = await finalizer.run_definition_of_done_gate(
      
        1324
                dod=dod,
      
        1325
                candidate_response="I checked the file again.",
      
        1326
                emit=capture,
      
        1327
                summary=summary,
      
        1328
                executor=executor,  # type: ignore[arg-type]
      
        1329
            )
      
        1330
        
        1331
            assert result.should_continue is True
      
        1332
            assert result.reason_code == "verification_failed_no_new_changes"
      
        1333
            assert executor.commands == []
      
        1334
            assert summary.verification_status == "failed"
      
        1335
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK STILL FAILING]")
      
        1336
        
        1337
        
        1338
        @pytest.mark.asyncio
      
        1339
        async def test_turn_finalizer_extends_retry_budget_when_failures_change(
      
        1340
            temp_dir: Path,
      
        1341
        ) -> None:
      
        1342
            session = FakeSession()
      
        1343
            context = build_context(temp_dir, session)
      
        1344
            finalizer = TurnFinalizer(
      
        1345
                context,
      
        1346
                RuntimeTracer(),
      
        1347
                DefinitionOfDoneStore(temp_dir),
      
        1348
                set_workflow_mode=_noop_set_workflow_mode,
      
        1349
            )
      
        1350
            target = temp_dir / "chapter.html"
      
        1351
            target.write_text("<h1>Chapter</h1>\n")
      
        1352
            dod = create_definition_of_done("Expand the generated chapter.")
      
        1353
            dod.retry_count = dod.retry_budget
      
        1354
            dod.mutating_actions.append("patch")
      
        1355
            dod.touched_files.append(str(target))
      
        1356
            dod.line_changes = 20
      
        1357
            dod.last_verification_result = "failed"
      
        1358
            dod.last_verification_signature = "lines=10;touched=chapter.html;actions=1;commands="
      
        1359
            dod.last_failed_verification_issue_signature = "old failing artifact set"
      
        1360
            dod.verification_commands = ["python check_quality.py"]
      
        1361
            summary = TurnSummary(final_response="")
      
        1362
            executor = SelectiveRecordingExecutor("check_quality.py")
      
        1363
        
        1364
            async def capture(event) -> None:
      
        1365
                return None
      
        1366
        
        1367
            result = await finalizer.run_definition_of_done_gate(
      
        1368
                dod=dod,
      
        1369
                candidate_response="I expanded one failing file.",
      
        1370
                emit=capture,
      
        1371
                summary=summary,
      
        1372
                executor=executor,  # type: ignore[arg-type]
      
        1373
            )
      
        1374
        
        1375
            assert result.should_continue is True
      
        1376
            assert result.reason_code == "verification_failed_reentry"
      
        1377
            assert dod.retry_count == 1
      
        1378
            assert dod.status == "fixing"
      
        1379
            assert "python check_quality.py" in executor.commands
      
        1380
            assert session.messages[-1].content.startswith("[DEFINITION OF DONE CHECK FAILED]")
      
        1381
        
        1382
        
        1383
        @pytest.mark.asyncio
      
        1384
        async def test_turn_finalizer_accepts_missing_optional_html5validator_when_semantic_check_passes(
      
        1385
            temp_dir: Path,
      
        1386
            monkeypatch: pytest.MonkeyPatch,
      
        1387
        ) -> None:
      
        1388
            session = FakeSession()
      
        1389
            context = build_context(temp_dir, session)
      
        1390
            finalizer = TurnFinalizer(
      
        1391
                context,
      
        1392
                RuntimeTracer(),
      
        1393
                DefinitionOfDoneStore(temp_dir),
      
        1394
                set_workflow_mode=_noop_set_workflow_mode,
      
        1395
            )
      
        1396
            dod = create_definition_of_done(
      
        1397
                "Update index.html so the table of contents links and chapter titles are correct."
      
        1398
            )
      
        1399
            dod.mutating_actions.append("edit")
      
        1400
            dod.touched_files.append(str(temp_dir / "index.html"))
      
        1401
            dod.verification_commands = [
      
        1402
                "python3 - <<'PY'\nprint('semantic ok')\nPY",
      
        1403
                "html5validator --root /tmp/fortran-qwen-recovery-check/",
      
        1404
            ]
      
        1405
            summary = TurnSummary(final_response="")
      
        1406
            semantic_call = ToolCall(
      
        1407
                id="verify-1-1",
      
        1408
                name="bash",
      
        1409
                arguments={"command": dod.verification_commands[0], "cwd": str(temp_dir)},
      
        1410
            )
      
        1411
            html5validator_call = ToolCall(
      
        1412
                id="verify-1-2",
      
        1413
                name="bash",
      
        1414
                arguments={"command": dod.verification_commands[1], "cwd": str(temp_dir)},
      
        1415
            )
      
        1416
        
        1417
            async def capture(event) -> None:
      
        1418
                return None
      
        1419
        
        1420
            monkeypatch.setattr(
      
        1421
                "loader.runtime.finalization.derive_verification_commands",
      
        1422
                lambda *args, **kwargs: [],
      
        1423
            )
      
        1424
        
        1425
            result = await finalizer.run_definition_of_done_gate(
      
        1426
                dod=dod,
      
        1427
                candidate_response="Updated the chapter links and titles.",
      
        1428
                emit=capture,
      
        1429
                summary=summary,
      
        1430
                executor=FakeExecutor(
      
        1431
                    [
      
        1432
                        tool_outcome(
      
        1433
                            tool_call=semantic_call,
      
        1434
                            output="semantic ok",
      
        1435
                            is_error=False,
      
        1436
                            exit_code=0,
      
        1437
                            stdout="semantic ok",
      
        1438
                        ),
      
        1439
                        tool_outcome(
      
        1440
                            tool_call=html5validator_call,
      
        1441
                            output="/bin/sh: html5validator: command not found",
      
        1442
                            is_error=True,
      
        1443
                            exit_code=127,
      
        1444
                            stderr="/bin/sh: html5validator: command not found",
      
        1445
                        ),
      
        1446
                    ]
      
        1447
                ),  # type: ignore[arg-type]
      
        1448
            )
      
        1449
        
        1450
            assert result.should_continue is False
      
        1451
            assert result.reason_code == "verification_passed"
      
        1452
            assert summary.verification_status == "passed"
      
        1453
            assert dod.status == "done"
      
        1454
            assert dod.last_verification_result == "passed"
      
        1455
            assert [item.passed for item in dod.evidence] == [True, False]
      
        1456
            assert [item.skipped for item in dod.evidence] == [False, True]
      
        1457
            assert "SKIP" in result.final_response
      
        1458
            assert "html5validator" in result.final_response
      
        1459
            assert session.workflow_timeline[-2].reason_code == "verification_command_passed"
      
        1460
            assert session.workflow_timeline[-1].reason_code == "verification_command_skipped"
      
        1461
            assert [item.status for item in session.workflow_timeline[-1].verification_observations] == [
      
        1462
                VerificationObservationStatus.SKIPPED.value
      
        1463
            ]