"""Tests for tool-batch execution on RuntimeContext."""

from __future__ import annotations

from pathlib import Path
from types import SimpleNamespace

import pytest

from loader.llm.base import Message, Role, ToolCall
from loader.runtime.context import RuntimeContext
from loader.runtime.dod import (
    DefinitionOfDoneStore,
    VerificationEvidence,
    create_definition_of_done,
)
from loader.runtime.events import AgentEvent, TurnSummary
from loader.runtime.executor import ToolExecutionOutcome, ToolExecutionState
from loader.runtime.path_display import display_runtime_path
from loader.runtime.permissions import (
    PermissionMode,
    build_permission_policy,
    load_permission_rules,
)
from loader.runtime.reasoning_types import (
    ActionVerification,
    ConfidenceAssessment,
    ConfidenceLevel,
)
from loader.runtime.recovery import RecoveryContext
from loader.runtime.tool_batches import (
    ToolBatchRunner,
)
from loader.runtime.tool_batches import (
    _should_prioritize_missing_artifact as tool_batches_should_prioritize_missing_artifact,
)
from loader.runtime.workflow import sync_todos_to_definition_of_done
from loader.tools.base import ToolResult as RegistryToolResult
from loader.tools.base import create_default_registry
from tests.helpers.runtime_harness import ScriptedBackend


class FakeSession:
    def __init__(self, messages: list[Message]) -> None:
        self.messages = list(messages)
        self.workflow_timeline = []

    def append(self, message: Message) -> None:
        self.messages.append(message)

    def append_workflow_timeline_entry(self, entry) -> None:
        self.workflow_timeline.append(entry)


class FakeCodeFilter:
    def reset(self) -> None:
        return None


class FakeSafeguards:
    def __init__(self, *, detect_loop_result: tuple[bool, str] = (False, "")) -> None:
        self.action_tracker = object()
        self.validator = object()
        self.code_filter = FakeCodeFilter()
        self._detect_loop_result = detect_loop_result

    def filter_stream_chunk(self, content: str) -> str:
        return content

    def filter_complete_content(self, content: str) -> str:
        return content

    def should_steer(self) -> bool:
        return False

    def get_steering_message(self) -> str | None:
        return None

    def record_response(self, content: str) -> None:
        return None

    def detect_text_loop(self, content: str) -> tuple[bool, str]:
        return False, ""

    def detect_loop(self) -> tuple[bool, str]:
        return self._detect_loop_result


class FakeExecutor:
    def __init__(self, outcomes: list[ToolExecutionOutcome]) -> None:
        self._outcomes = list(outcomes)
        self.calls: list[ToolCall] = []

    async def execute_tool_call(self, tool_call: ToolCall, **_: object) -> ToolExecutionOutcome:
        self.calls.append(tool_call)
        if not self._outcomes:
            raise AssertionError("No fake tool outcome queued")
        return self._outcomes.pop(0)


def build_context(
    *,
    temp_dir: Path,
    messages: list[Message],
    safeguards: FakeSafeguards,
    assess_confidence,
    verify_action,
    recovery_context: RecoveryContext | None = None,
    confidence_scoring: bool = False,
    verification: bool = False,
    auto_recover: bool = True,
    min_confidence_for_action: int = 3,
) -> RuntimeContext:
    registry = create_default_registry(temp_dir)
    registry.configure_workspace_root(temp_dir)
    rule_status = load_permission_rules(temp_dir)
    policy = build_permission_policy(
        active_mode=PermissionMode.WORKSPACE_WRITE,
        workspace_root=temp_dir,
        tool_requirements=registry.get_tool_requirements(),
        rules=rule_status.rules,
    )
    context = RuntimeContext(
        project_root=temp_dir,
        backend=ScriptedBackend(),
        registry=registry,
        session=FakeSession(messages),  # type: ignore[arg-type]
        config=SimpleNamespace(
            force_react=False,
            max_recovery_attempts=2,
            auto_recover=auto_recover,
            reasoning=SimpleNamespace(
                rollback=False,
                show_rollback_plan=False,
                completion_check=True,
                max_continuation_prompts=5,
                self_critique=False,
                confidence_scoring=confidence_scoring,
                min_confidence_for_action=min_confidence_for_action,
                verification=verification,
            ),
        ),
        capability_profile=SimpleNamespace(supports_native_tools=True),  # type: ignore[arg-type]
        project_context=None,
        permission_policy=policy,
        permission_config_status=rule_status,
        workflow_mode="execute",
        safeguards=safeguards,
        reasoning=SimpleNamespace(
            assess_confidence=assess_confidence,
            verify_action=verify_action,
        ),
        recovery_context=recovery_context,
    )
    return context


def tool_outcome(
    *,
    tool_call: ToolCall,
    output: str,
    is_error: bool,
    state: ToolExecutionState = ToolExecutionState.EXECUTED,
    metadata: dict[str, object] | None = None,
) -> ToolExecutionOutcome:
    return ToolExecutionOutcome(
        tool_call=tool_call,
        state=state,
        message=Message.tool_result_message(
            tool_call_id=tool_call.id,
            display_content=output,
            result_content=output,
            is_error=is_error,
        ),
        event_content=output,
        is_error=is_error,
        result_output=output,
        registry_result=RegistryToolResult(
            output=output,
            is_error=is_error,
            metadata=metadata or {},
        ),
    )


@pytest.mark.asyncio
async def test_tool_batch_runner_uses_context_for_confidence_gate(temp_dir: Path) -> None:
    captured: dict[str, str] = {}

    async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
        captured["context"] = context
        return ConfidenceAssessment(
            action=f"{tool_name} with {tool_args}",
            tool_name=tool_name,
            tool_args=tool_args,
            level=ConfidenceLevel.LOW,
            reasoning="Need to inspect the target first.",
            risks=["Unknown target file"],
        )

    async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
        raise AssertionError("Verification should not run for skipped actions")

    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(role=Role.USER, content="Please inspect the project."),
            Message(role=Role.ASSISTANT, content="I will read the file next."),
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        confidence_scoring=True,
        min_confidence_for_action=3,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(id="read-1", name="read", arguments={"file_path": "README.md"})
    events: list[AgentEvent] = []

    async def emit(event: AgentEvent) -> None:
        events.append(event)

    executor = FakeExecutor([tool_outcome(tool_call=tool_call, output="unused", is_error=False)])
    result = await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=TurnSummary(final_response=""),
        dod=create_definition_of_done("Read the docs"),
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert result.actions_taken == []
    assert executor.calls == []
    assert "Please inspect the project." in captured["context"]
    assert context.session.messages[-1].role == Role.USER
    assert "[LOW CONFIDENCE WARNING]" in context.session.messages[-1].content
    event_types = [event.type for event in events]
    assert "confidence" in event_types


@pytest.mark.asyncio
async def test_tool_batch_runner_tracks_recovery_with_legacy_context(temp_dir: Path) -> None:
    async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
        raise AssertionError("Verification should not run for failed actions")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=True,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(id="bash-1", name="bash", arguments={"command": "pytest"})
    executor = FakeExecutor([tool_outcome(tool_call=tool_call, output="command failed", is_error=True)])
    summary = TurnSummary(final_response="")
    events: list[AgentEvent] = []

    async def emit(event: AgentEvent) -> None:
        events.append(event)

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=summary,
        dod=create_definition_of_done("Run tests"),
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert context.recovery_context is not None
    assert summary.tool_result_messages
    assert context.session.messages[-1] == summary.tool_result_messages[-1]
    assert any(event.type == "recovery" for event in events)


@pytest.mark.asyncio
async def test_tool_batch_runner_emits_tool_metadata(temp_dir: Path) -> None:
    async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="bash-1",
        name="bash",
        arguments={"command": "python -m http.server 8000", "background": True},
    )
    metadata = {
        "job_id": "bash-1",
        "status": "running",
        "background": True,
    }
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Started bash job bash-1",
                is_error=False,
                metadata=metadata,
            )
        ]
    )
    events: list[AgentEvent] = []

    async def emit(event: AgentEvent) -> None:
        events.append(event)

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=TurnSummary(final_response=""),
        dod=create_definition_of_done("Launch a preview server"),
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    tool_result = next(event for event in events if event.type == "tool_result")
    assert tool_result.tool_metadata == metadata


@pytest.mark.asyncio
async def test_tool_batch_runner_verifies_with_context_services(temp_dir: Path) -> None:
    verification_calls: list[str] = []

    async def assess_confidence(tool_name: str, tool_args: dict, context: str) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
        verification_calls.append(result)
        return ActionVerification(
            tool_name=tool_name,
            tool_args=tool_args,
            expected_outcome="Success",
            actual_result=result,
            verified=False,
            discrepancies=["File contents did not match"],
            needs_correction=True,
            correction_suggestion="Read the file before editing again.",
        )

    existing_recovery = RecoveryContext(
        original_tool="edit",
        original_args={"file_path": "README.md"},
    )
    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        recovery_context=existing_recovery,
        verification=True,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(id="read-1", name="read", arguments={"file_path": "README.md"})
    executor = FakeExecutor([tool_outcome(tool_call=tool_call, output="file contents", is_error=False)])
    events: list[AgentEvent] = []

    async def emit(event: AgentEvent) -> None:
        events.append(event)

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=TurnSummary(final_response=""),
        dod=create_definition_of_done("Read the docs"),
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert verification_calls == ["file contents"]
    assert context.recovery_context is existing_recovery
    assert existing_recovery.successful_steps == [
        ("read", {"file_path": "README.md"})
    ]
    assert context.session.messages[-1].role == Role.TOOL
    assert context.session.messages[-1].content == "file contents"
    assert any(event.type == "verification" for event in events)


@pytest.mark.asyncio
async def test_tool_batch_runner_preserves_recovery_context_across_diagnostic_success(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    existing_recovery = RecoveryContext(
        original_tool="read",
        original_args={"file_path": "chapters/04-data-types.html"},
    )
    existing_recovery.add_attempt(
        "read",
        {"file_path": "chapters/04-data-types.html"},
        "File not found",
    )
    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        recovery_context=existing_recovery,
        auto_recover=False,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="bash-1",
        name="bash",
        arguments={"command": "ls chapters"},
    )
    executor = FakeExecutor(
        [tool_outcome(tool_call=tool_call, output="01-introduction.html", is_error=False)]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=create_definition_of_done("Fix the chapter links"),
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert context.recovery_context is existing_recovery
    assert existing_recovery.successful_steps == [
        ("bash", {"command": "ls chapters"})
    ]


@pytest.mark.asyncio
async def test_tool_batch_runner_clears_recovery_context_after_successful_mutation(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    existing_recovery = RecoveryContext(
        original_tool="read",
        original_args={"file_path": "chapters/04-data-types.html"},
    )
    existing_recovery.add_attempt(
        "read",
        {"file_path": "chapters/04-data-types.html"},
        "File not found",
    )
    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        recovery_context=existing_recovery,
        auto_recover=False,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="patch-1",
        name="patch",
        arguments={
            "file_path": "index.html",
            "hunks": [{"old_start": 1, "old_lines": 1, "new_start": 1, "new_lines": 1, "lines": ["-a", "+b"]}],
        },
    )
    executor = FakeExecutor(
        [tool_outcome(tool_call=tool_call, output="Patched index.html", is_error=False)]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=create_definition_of_done("Fix the chapter links"),
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert context.recovery_context is None


@pytest.mark.asyncio
async def test_tool_batch_runner_queues_duplicate_observation_nudge(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    messages = [
        Message(
            role=Role.TOOL,
            content=(
                "Observation [glob]: Result: "
                f"{temp_dir}/chapters/01-introduction.html\n"
                f"{temp_dir}/chapters/02-setup.html\n"
                f"{temp_dir}/chapters/03-basics.html"
            ),
            tool_results=[],
        ),
        Message(
            role=Role.ASSISTANT,
            content="I already inspected the first chapter title.",
            tool_calls=[
                ToolCall(
                    id="read-ch1",
                    name="read",
                    arguments={"file_path": str(temp_dir / 'chapters' / '01-introduction.html')},
                )
            ],
        ),
        Message.tool_result_message(
            tool_call_id="read-ch1",
            display_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
            result_content="<h1>Chapter 1: Introduction to Fortran</h1>\n",
        ),
        Message(
            role=Role.ASSISTANT,
            content="I should update the index now.",
            tool_calls=[
                ToolCall(
                    id="read-index",
                    name="read",
                    arguments={"file_path": str(temp_dir / 'index.html')},
                )
            ],
        ),
    ]
    context = build_context(
        temp_dir=temp_dir,
        messages=messages,
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    (temp_dir / "chapters").mkdir()
    (temp_dir / "index.html").write_text("<ul></ul>\n")
    (temp_dir / "chapters" / "01-introduction.html").write_text("<h1>Intro</h1>\n")
    (temp_dir / "chapters" / "02-setup.html").write_text("<h1>Setup</h1>\n")
    (temp_dir / "chapters" / "03-basics.html").write_text("<h1>Basics</h1>\n")
    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{temp_dir / 'index.html'}`",
                f"- `{temp_dir / 'chapters' / '01-introduction.html'}`",
                f"- `{temp_dir / 'chapters' / '02-setup.html'}`",
                f"- `{temp_dir / 'chapters' / '03-basics.html'}`",
                f"- `{temp_dir / 'chapters' / '04-variables.html'}`",
            ]
        )
    )
    context.session.current_task = (
        f"Update {temp_dir / 'index.html'} with the right chapter links."
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="read-dup",
        name="read",
        arguments={"file_path": str(temp_dir / "index.html")},
    )
    duplicate_message = (
        "[Skipped - duplicate action: Already read "
        f"{temp_dir / 'index.html'} recently without any relevant intervening changes; "
        "reuse the earlier read result instead of rereading]"
    )
    executor = FakeExecutor(
        [
            ToolExecutionOutcome(
                tool_call=tool_call,
                state=ToolExecutionState.DUPLICATE,
                message=Message.tool_result_message(
                    tool_call_id=tool_call.id,
                    display_content=duplicate_message,
                    result_content=duplicate_message,
                ),
                event_content=duplicate_message,
                is_error=False,
                result_output=duplicate_message,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    dod = create_definition_of_done("Fix the chapter links")
    dod.implementation_plan = str(implementation_plan)
    dod.pending_items.append("Create the remaining chapter files")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert len(persistent_messages) == 1
    assert "Reuse the earlier observation instead of repeating it." in persistent_messages[0]
    assert "A declared output artifact is still missing." in persistent_messages[0]
    assert "Resume by creating `04-variables.html` now." in persistent_messages[0]
    assert (
        "Prefer one `write` call for "
        f"`{display_runtime_path(temp_dir / 'chapters' / '04-variables.html')}` instead of more rereads."
        in persistent_messages[0]
    )
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_duplicate_read_keeps_root_declared_missing_html_output_active(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guide"
    chapters = guide_root / "chapters"
    chapters.mkdir(parents=True)
    index = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    index.write_text(
        '<a href="chapters/01-introduction.html">Intro</a>\n'
        '<a href="chapters/02-installation.html">Install</a>\n'
    )
    chapter_one.write_text("<h1>Intro</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{index}`",
                f"- `{chapters}/` (directory for chapter files)",
            ]
        )
    )

    messages = [
        Message(
            role=Role.ASSISTANT,
            content="I should keep building the guide.",
            tool_calls=[
                ToolCall(
                    id="read-index",
                    name="read",
                    arguments={"file_path": str(index)},
                )
            ],
        ),
    ]
    context = build_context(
        temp_dir=temp_dir,
        messages=messages,
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    context.session.current_task = f"Build the guide rooted at {index}."
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="read-dup-rooted",
        name="read",
        arguments={"file_path": str(index)},
    )
    duplicate_message = (
        "[Skipped - duplicate action: Already read "
        f"{index} recently without any relevant intervening changes; "
        "reuse the earlier read result instead of rereading]"
    )
    executor = FakeExecutor(
        [
            ToolExecutionOutcome(
                tool_call=tool_call,
                state=ToolExecutionState.DUPLICATE,
                message=Message.tool_result_message(
                    tool_call_id=tool_call.id,
                    display_content=duplicate_message,
                    result_content=duplicate_message,
                ),
                event_content=duplicate_message,
                is_error=False,
                result_output=duplicate_message,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    dod = create_definition_of_done("Create a multi-file HTML guide with chapters.")
    dod.implementation_plan = str(implementation_plan)
    dod.touched_files = [str(index), str(chapter_one)]
    dod.completed_items = ["Create chapter files with appropriate content"]
    dod.pending_items.append("Create the remaining chapter files")

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert len(persistent_messages) == 1
    assert "Create the remaining chapter files" in persistent_messages[0]
    assert "Resume by creating `02-installation.html` now." in persistent_messages[0]
    assert "All explicitly planned artifacts already exist on disk." not in persistent_messages[0]
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_duplicate_read_after_edit_mismatch_steers_to_mutation(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    target = temp_dir / "guide" / "chapters" / "02-installation.html"
    target.parent.mkdir(parents=True)
    target.write_text(
        "<h1>Chapter 2: Installation Guide</h1>\n"
        "<p>This chapter is still too thin.</p>\n"
    )
    recovery_context = RecoveryContext(
        original_tool="edit",
        original_args={
            "file_path": str(target),
            "old_string": "<h1>Installation</h1>",
            "new_string": "<h1>Installation</h1><p>Expanded.</p>",
        },
        max_retries=2,
    )
    recovery_context.add_attempt(
        "edit",
        {
            "file_path": str(target),
            "old_string": "<h1>Installation</h1>",
            "new_string": "<h1>Installation</h1><p>Expanded.</p>",
        },
        "old_string not found in file. Make sure it matches exactly.",
    )
    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        recovery_context=recovery_context,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="read-dup-after-edit-miss",
        name="read",
        arguments={"file_path": str(target)},
    )
    duplicate_message = (
        "[Skipped - duplicate action: Already read "
        f"{target} recently without any relevant intervening changes; "
        "reuse the earlier read result instead of rereading]"
    )
    executor = FakeExecutor(
        [
            ToolExecutionOutcome(
                tool_call=tool_call,
                state=ToolExecutionState.DUPLICATE,
                message=Message.tool_result_message(
                    tool_call_id=tool_call.id,
                    display_content=duplicate_message,
                    result_content=duplicate_message,
                ),
                event_content=duplicate_message,
                is_error=False,
                result_output=duplicate_message,
            )
        ]
    )
    dod = create_definition_of_done("Expand thin generated guide chapters.")

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=TurnSummary(final_response=""),
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert len(persistent_messages) == 1
    assert "last edit" in persistent_messages[0]
    assert "`old_string` did not exactly match" in persistent_messages[0]
    assert "send one concrete mutation now" in persistent_messages[0]
    assert "`write` with the complete replacement content" in persistent_messages[0]


@pytest.mark.asyncio
async def test_tool_batch_runner_todo_write_does_not_regress_completed_file_todo(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create 03-first-website.html",
                "active_form": "Creating 03-first-website.html",
                "status": "pending",
            },
            {
                "content": "Create 04-configuration-basics.html",
                "active_form": "Creating 04-configuration-basics.html",
                "status": "pending",
            },
        ],
    )

    chapter_path = temp_dir / "guides" / "nginx" / "chapters" / "03-first-website.html"
    chapter_path.parent.mkdir(parents=True)
    write_call = ToolCall(
        id="write-ch3",
        name="write",
        arguments={"file_path": str(chapter_path), "content": "<html></html>\n"},
    )
    stale_todo_call = ToolCall(
        id="todo-stale",
        name="TodoWrite",
        arguments={
            "todos": [
                {
                    "content": "Create 03-first-website.html",
                    "active_form": "Creating 03-first-website.html",
                    "status": "pending",
                },
                {
                    "content": "Create 04-configuration-basics.html",
                    "active_form": "Creating 04-configuration-basics.html",
                    "status": "pending",
                },
            ]
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=write_call,
                output=f"Successfully wrote {chapter_path}",
                is_error=False,
            ),
            tool_outcome(
                tool_call=stale_todo_call,
                output="Todos updated",
                is_error=False,
                metadata={
                    "new_todos": [
                        {
                            "content": "Create 03-first-website.html",
                            "active_form": "Creating 03-first-website.html",
                            "status": "pending",
                        },
                        {
                            "content": "Create 04-configuration-basics.html",
                            "active_form": "Creating 04-configuration-basics.html",
                            "status": "pending",
                        },
                    ]
                },
            ),
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[write_call, stale_todo_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert "Create 03-first-website.html" in dod.completed_items
    assert "Create 03-first-website.html" not in dod.pending_items
    assert "Create 04-configuration-basics.html" in dod.pending_items


@pytest.mark.asyncio
async def test_tool_batch_runner_proactively_queues_verified_html_inventory(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    chapters = temp_dir / "chapters"
    chapters.mkdir()
    (chapters / "01-introduction.html").write_text(
        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
    )
    (chapters / "02-setup.html").write_text(
        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
    )
    (temp_dir / "index.html").write_text("<ul></ul>\n")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    context.session.current_task = (
        f"Update {temp_dir / 'index.html'} so the chapter links match the sibling files."
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="glob-1",
        name="glob",
        arguments={"path": str(chapters), "pattern": "*.html"},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="\n".join(
                    [
                        str(chapters / "01-introduction.html"),
                        str(chapters / "02-setup.html"),
                    ]
                ),
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=create_definition_of_done("Fix the chapter links"),
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages == []
    assert ephemeral_messages == []
    assert len(summary.tool_result_messages) == 1
    assert "Verified chapter inventory:" not in summary.tool_result_messages[0].content


@pytest.mark.asyncio
async def test_tool_batch_runner_marks_validated_html_toc_completion_after_successful_edit(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    chapters = temp_dir / "chapters"
    chapters.mkdir()
    (chapters / "01-introduction.html").write_text(
        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
    )
    (chapters / "02-setup.html").write_text(
        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
    )
    index_path = temp_dir / "index.html"
    old_block = (
        '<ul class="chapter-list">\n'
        '    <li><a href="chapters/01-old.html">Chapter 1: Old</a></li>\n'
        '    <li><a href="chapters/02-old.html">Chapter 2: Old</a></li>\n'
        "</ul>\n"
    )
    new_block = (
        '<ul class="chapter-list">\n'
        '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
        '    <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
        "</ul>\n"
    )
    index_path.write_text(new_block)

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    context.session.current_task = (
        "Update index.html so every chapter link and title matches the real HTML files in chapters/."
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="edit-1",
        name="edit",
        arguments={
            "file_path": str(index_path),
            "old_string": old_block,
            "new_string": new_block,
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=f"Successfully edited {index_path}",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=create_definition_of_done(
            "Update index.html so every chapter link and title matches the real HTML files in chapters/."
        ),
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert all(
        "Semantic verification preview:" not in message.content
        for message in summary.tool_result_messages
    )
    assert persistent_messages == []
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_does_not_apply_html_toc_handoff_to_reference_read(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    chapters = temp_dir / "chapters"
    chapters.mkdir()
    (chapters / "01-introduction.html").write_text(
        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
    )
    (chapters / "02-setup.html").write_text(
        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
    )
    index_path = temp_dir / "index.html"
    index_path.write_text(
        "<h2>Table of Contents</h2>\n"
        '<ul class="chapter-list">\n'
        '    <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
        '    <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
        "</ul>\n"
    )

    prompt = (
        "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
        "for the structure and cadence of the guide. We are going to make an all "
        "new equally thorough guide on how to use the nginx tool."
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    context.session.current_task = prompt  # type: ignore[attr-defined]
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="read-index",
        name="read",
        arguments={"file_path": str(index_path)},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=index_path.read_text(),
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=create_definition_of_done(prompt),
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages == []
    assert ephemeral_messages == []
    assert all(
        "Semantic verification preview:" not in message.content
        for message in summary.tool_result_messages
    )


@pytest.mark.asyncio
async def test_tool_batch_runner_queues_next_pending_todo_after_discovery_progress(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
    reference.parent.mkdir(parents=True)
    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
    nginx_root = temp_dir / "Loader" / "guides" / "nginx"
    chapters = nginx_root / "chapters"
    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{chapters}/`",
                f"- `{nginx_root / 'index.html'}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create an equally thorough nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Examine the existing Fortran guide structure to understand the cadence and format",
                "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
                "status": "pending",
            },
            {
                "content": "Create the nginx directory structure",
                "active_form": "Working on: Create the nginx directory structure",
                "status": "pending",
            },
            {
                "content": "Create the nginx index.html file",
                "active_form": "Working on: Create the nginx index.html file",
                "status": "pending",
            },
        ],
    )
    tool_call = ToolCall(
        id="read-reference",
        name="read",
        arguments={"file_path": str(reference)},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert (
        "Examine the existing Fortran guide structure to understand the cadence and format"
        in dod.completed_items
    )
    assert any(
        "Continue with the next pending item: `Create the nginx directory structure`"
        in message
        for message in persistent_messages
    )
    assert any(
        "Resume by creating `chapters/` now." in message
        for message in persistent_messages
    )
    assert all("01-introduction.html" not in message for message in persistent_messages)
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_queues_setup_directory_before_file_when_plan_lists_index_first(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
    reference.parent.mkdir(parents=True)
    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")
    nginx_root = temp_dir / "Loader" / "guides" / "nginx"
    chapters = nginx_root / "chapters"
    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{nginx_root / 'index.html'}`",
                f"- `{chapters}/`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create an equally thorough nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Examine the existing Fortran guide structure to understand the cadence and format",
                "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
                "status": "pending",
            },
            {
                "content": "Create the nginx directory structure",
                "active_form": "Working on: Create the nginx directory structure",
                "status": "pending",
            },
            {
                "content": "Create the nginx index.html file",
                "active_form": "Working on: Create the nginx index.html file",
                "status": "pending",
            },
        ],
        project_root=temp_dir,
    )
    tool_call = ToolCall(
        id="read-reference-index-first",
        name="read",
        arguments={"file_path": str(reference)},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages
    assert any(
        "Continue with the next pending item: `Create the nginx directory structure`"
        in message
        for message in persistent_messages
    )
    assert any(
        "Resume by creating `chapters/` now." in message
        for message in persistent_messages
    )
    assert all(
        "Next step: create `index.html`." not in message
        for message in persistent_messages
    )
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_duplicate_reference_read_prefers_next_pending_todo(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    reference = temp_dir / "fortran" / "index.html"
    reference.parent.mkdir(parents=True)
    reference.write_text("<h1>Fortran Beginner's Guide</h1>\n")

    messages = [
        Message(
            role=Role.TOOL,
            content=(
                "Observation [read]: Result: "
                "<h1>Fortran Beginner's Guide</h1>\n"
            ),
        )
    ]
    context = build_context(
        temp_dir=temp_dir,
        messages=messages,
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    prompt = (
        "Have a look at ~/Loader/guides/fortran and chapters/ within. Get a feel "
        "for the structure and cadence of the guide. We are going to make an all "
        "new equally thorough guide on how to use the nginx tool."
    )
    context.session.current_task = prompt
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done(prompt)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Examine the existing Fortran guide structure to understand the cadence and format",
                "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
                "status": "completed",
            },
            {
                "content": "Create the nginx directory structure",
                "active_form": "Working on: Create the nginx directory structure",
                "status": "pending",
            },
            {
                "content": "Create the nginx index.html file",
                "active_form": "Working on: Create the nginx index.html file",
                "status": "pending",
            },
        ],
    )
    tool_call = ToolCall(
        id="read-dup",
        name="read",
        arguments={"file_path": str(reference)},
    )
    duplicate_message = (
        "[Skipped - duplicate action: Already read "
        f"{reference} recently without any relevant intervening changes; "
        "reuse the earlier read result instead of rereading]"
    )
    executor = FakeExecutor(
        [
            ToolExecutionOutcome(
                tool_call=tool_call,
                state=ToolExecutionState.DUPLICATE,
                message=Message.tool_result_message(
                    tool_call_id=tool_call.id,
                    display_content=duplicate_message,
                    result_content=duplicate_message,
                ),
                event_content=duplicate_message,
                is_error=False,
                result_output=duplicate_message,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert len(persistent_messages) == 1
    assert "Reuse the earlier observation instead of repeating it." in persistent_messages[0]
    assert (
        "Continue with the next pending item: `Create the nginx directory structure`"
        in persistent_messages[0]
    )
    assert "Update `" not in persistent_messages[0]
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_successful_reference_read_prioritizes_concrete_missing_artifact(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "Loader" / "guides" / "nginx"
    chapters = guide_root / "chapters"
    chapters.mkdir(parents=True)
    chapter_one = chapters / "01-introduction.html"
    chapter_one.write_text("<html></html>\n")
    index_path = guide_root / "index.html"

    reference = temp_dir / "Loader" / "guides" / "fortran" / "chapters" / "01-introduction.html"
    reference.parent.mkdir(parents=True, exist_ok=True)
    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapters / '02-installation.html'}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.touched_files.append(str(chapter_one))
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Examine the existing Fortran guide structure to understand the format and cadence",
                "active_form": "Working on: Examine the existing Fortran guide structure to understand the format and cadence",
                "status": "pending",
            },
            {
                "content": "Create each chapter file with appropriate content",
                "active_form": "Working on: Create each chapter file with appropriate content",
                "status": "pending",
            },
            {
                "content": "Ensure all files follow the same structure and style as the Fortran guide",
                "active_form": "Working on: Ensure all files follow the same structure and style as the Fortran guide",
                "status": "pending",
            },
        ],
    )
    tool_call = ToolCall(
        id="read-reference-chapter",
        name="read",
        arguments={"file_path": str(reference)},
    )
    read_output = "Observation [read]: Result: <h1>Introduction</h1>\n<p>Guide cadence.</p>\n"
    executor = FakeExecutor(
        [
            ToolExecutionOutcome(
                tool_call=tool_call,
                state=ToolExecutionState.EXECUTED,
                message=Message.tool_result_message(
                    tool_call_id=tool_call.id,
                    display_content=read_output,
                    result_content=read_output,
                ),
                event_content=read_output,
                is_error=False,
                result_output=read_output,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages
    assert any(
        "Confirmed progress: `Examine the existing Fortran guide structure to understand the format and cadence`"
        in message
        for message in persistent_messages
    )
    assert any("Resume by creating `index.html` now." in message for message in persistent_messages)
    assert not any(
        "Continue with the next pending item: `Create each chapter file with appropriate content`"
        in message
        for message in persistent_messages
    )
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_duplicate_read_ignores_unplanned_expansion_after_plan_complete(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<h1>One</h1>\n")
    chapter_two.write_text("<h1>Two</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.pending_items = [
        "Create 07-performance-tuning.html",
        "Verify all guide files are linked and complete",
        "Complete the requested work",
    ]

    tool_call = ToolCall(
        id="read-dup",
        name="read",
        arguments={"file_path": str(chapter_one)},
    )
    duplicate_message = (
        "[Skipped - duplicate action: Already read "
        f"{chapter_one} recently without any relevant intervening changes; "
        "reuse the earlier read result instead of rereading]"
    )
    executor = FakeExecutor(
        [
            ToolExecutionOutcome(
                tool_call=tool_call,
                state=ToolExecutionState.DUPLICATE,
                message=Message.tool_result_message(
                    tool_call_id=tool_call.id,
                    display_content=duplicate_message,
                    result_content=duplicate_message,
                ),
                event_content=duplicate_message,
                is_error=False,
                result_output=duplicate_message,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert len(persistent_messages) == 1
    assert "Verify all guide files are linked and complete" in persistent_messages[0]
    assert "Create 07-performance-tuning.html" not in persistent_messages[0]
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_duplicate_read_after_plan_complete_pushes_verification_handoff(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<h1>One</h1>\n")
    chapter_two.write_text("<h1>Two</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]
    dod.pending_items = [
        "Create 07-performance-tuning.html",
        "Complete the requested work",
    ]

    tool_call = ToolCall(
        id="read-dup",
        name="read",
        arguments={"file_path": str(chapter_one)},
    )
    duplicate_message = (
        "[Skipped - duplicate action: Already read "
        f"{chapter_one} recently without any relevant intervening changes; "
        "reuse the earlier read result instead of rereading]"
    )
    executor = FakeExecutor(
        [
            ToolExecutionOutcome(
                tool_call=tool_call,
                state=ToolExecutionState.DUPLICATE,
                message=Message.tool_result_message(
                    tool_call_id=tool_call.id,
                    display_content=duplicate_message,
                    result_content=duplicate_message,
                ),
                event_content=duplicate_message,
                is_error=False,
                result_output=duplicate_message,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert len(persistent_messages) == 1
    assert "All explicitly planned artifacts already exist on disk." in persistent_messages[0]
    assert (
        "Finish with a final response now so Loader can run verification automatically."
        in persistent_messages[0]
    )
    assert "Create 07-performance-tuning.html" not in persistent_messages[0]
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_duplicate_read_after_plan_complete_ignores_stale_creation_todos(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<h1>One</h1>\n")
    chapter_two.write_text("<h1>Two</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]
    dod.pending_items = [
        "Create 01-getting-started.html",
        "Creating 02-installation.html",
        "Complete the requested work",
    ]

    tool_call = ToolCall(
        id="read-dup-built-stale",
        name="read",
        arguments={"file_path": str(chapter_one)},
    )
    duplicate_message = (
        "[Skipped - duplicate action: Already read "
        f"{chapter_one} recently without any relevant intervening changes; "
        "reuse the earlier read result instead of rereading]"
    )
    executor = FakeExecutor(
        [
            ToolExecutionOutcome(
                tool_call=tool_call,
                state=ToolExecutionState.DUPLICATE,
                message=Message.tool_result_message(
                    tool_call_id=tool_call.id,
                    display_content=duplicate_message,
                    result_content=duplicate_message,
                ),
                event_content=duplicate_message,
                is_error=False,
                result_output=duplicate_message,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert len(persistent_messages) == 1
    assert "All explicitly planned artifacts already exist on disk." in persistent_messages[0]
    assert (
        "Finish with a final response now so Loader can run verification automatically."
        in persistent_messages[0]
    )
    assert "Create 01-getting-started.html" not in persistent_messages[0]
    assert "Creating 02-installation.html" not in persistent_messages[0]
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_successful_read_after_plan_complete_pushes_review_handoff(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<h1>One</h1>\n")
    chapter_two.write_text("<h1>Two</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create 01-getting-started.html",
                "active_form": "Creating 01-getting-started.html",
                "status": "pending",
            },
            {
                "content": "Ensure all files are properly linked and formatted consistently",
                "active_form": "Reviewing guide consistency and linkage",
                "status": "pending",
            },
        ],
    )

    tool_call = ToolCall(
        id="read-built-review",
        name="read",
        arguments={"file_path": str(chapter_one)},
    )
    executor = FakeExecutor(
        [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages == []
    assert len(ephemeral_messages) == 1
    message = ephemeral_messages[0]
    assert "All explicitly planned artifacts already exist." in message
    assert "Ensure all files are properly linked and formatted consistently" in message
    assert "Create 01-getting-started.html" not in message
    assert "do not keep broad-rereading the output set" in message
    assert "If no specific mismatch remains, finish with a final response so Loader can verify." in message


@pytest.mark.asyncio
async def test_tool_batch_runner_successful_read_after_plan_complete_switches_to_verify(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<h1>One</h1>\n")
    chapter_two.write_text("<h1>Two</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]

    tool_call = ToolCall(
        id="read-built-verify",
        name="read",
        arguments={"file_path": str(chapter_one)},
    )
    executor = FakeExecutor(
        [tool_outcome(tool_call=tool_call, output=chapter_one.read_text(), is_error=False)]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert len(persistent_messages) == 1
    assert "All explicitly planned artifacts already exist." in persistent_messages[0]
    assert "Finish with a final response now so Loader can run verification automatically." in persistent_messages[0]
    assert "stop broad rereads" in persistent_messages[0]
    assert ephemeral_messages == []
    assert context.workflow_mode == "verify"


@pytest.mark.asyncio
async def test_tool_batch_runner_observation_handoff_pushes_mutation_step(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
    reference.parent.mkdir(parents=True)
    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Examine the existing Fortran guide structure to understand the cadence and format",
                "active_form": "Working on: Examine the existing Fortran guide structure to understand the cadence and format",
                "status": "pending",
            },
            {
                "content": "Create the nginx index.html file",
                "active_form": "Working on: Create the nginx index.html file",
                "status": "pending",
            },
        ],
    )
    tool_call = ToolCall(
        id="read-reference",
        name="read",
        arguments={"file_path": str(reference)},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert any(
        "Continue with the next pending item: `Create the nginx index.html file`"
        in message
        for message in persistent_messages
    )
    assert any(
        "stop gathering more reference material and perform the change now" in message
        for message in persistent_messages
    )
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_discovery_completion_handoff_stays_persistent(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
    reference.parent.mkdir(parents=True)
    reference.write_text("<h1>Introduction</h1>\n<p>Guide cadence.</p>\n")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "First, examine the existing fortran guide structure and content",
                "active_form": "Working on: First, examine the existing fortran guide structure and content",
                "status": "pending",
            },
            {
                "content": "Create the nginx directory structure",
                "active_form": "Working on: Create the nginx directory structure",
                "status": "pending",
            },
        ],
    )
    tool_call = ToolCall(
        id="read-reference",
        name="read",
        arguments={"file_path": str(reference)},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="<h1>Introduction</h1>\n<p>Guide cadence.</p>\n",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages
    assert any(
        "Continue with the next pending item: `Create the nginx directory structure`"
        in message
        for message in persistent_messages
    )
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_missing_artifact_nudge_names_next_file_after_setup_mkdir(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    nginx_root = temp_dir / "Loader" / "guides" / "nginx"
    chapters = nginx_root / "chapters"
    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{chapters}/`",
                f"- `{nginx_root / 'index.html'}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create the nginx directory structure",
                "active_form": "Creating the nginx directory structure",
                "status": "pending",
            },
            {
                "content": "Develop the main index.html file with proper structure",
                "active_form": "Developing the main index.html file with proper structure",
                "status": "pending",
            },
        ],
    )

    tool_call = ToolCall(
        id="mkdir-nginx",
        name="bash",
        arguments={"command": f"mkdir -p {chapters}"},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages
    message = persistent_messages[-1]
    assert "Directory setup is complete." in message
    assert "Next step: create `index.html`." in message
    assert "Write a compact but real initial version of that file now" in message
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_first_chapter_handoff_stays_persistent_until_substantive_output_exists(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    nginx_root = temp_dir / "guides" / "nginx"
    chapters = nginx_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = nginx_root / "index.html"

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapters / '01-introduction.html'}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create the main index.html file with proper structure",
                "active_form": "Creating the main index.html file with proper structure",
                "status": "pending",
            },
            {
                "content": "Create each chapter file with appropriate content",
                "active_form": "Creating each chapter file with appropriate content",
                "status": "pending",
            },
        ],
    )

    tool_call = ToolCall(
        id="write-index",
        name="write",
        arguments={
            "file_path": str(index_path),
            "content": "<html></html>\n",
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=f"Successfully wrote 14 bytes to {index_path}",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages
    assert ephemeral_messages == []
    message = persistent_messages[-1]
    assert "Confirmed progress:" in message
    assert "Next step: create `01-introduction.html`." in message
    assert (
        f"Prefer one `write(file_path=..., content=...)` call for `{(chapters / '01-introduction.html').resolve(strict=False)}` now."
        in message
    )
    assert "Write a compact but real initial version of that file now" not in message
    assert "Do not reread reference material or spend the next turn on bookkeeping." in message


@pytest.mark.asyncio
async def test_tool_batch_runner_directory_handoff_uses_home_relative_path(
    temp_dir: Path,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.setenv("HOME", str(temp_dir.resolve(strict=False)))

    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    nginx_root = temp_dir / "Loader" / "guides" / "nginx"
    chapters = nginx_root / "chapters"
    index_path = nginx_root / "index.html"

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create the nginx directory structure",
                "active_form": "Creating the nginx directory structure",
                "status": "pending",
            },
            {
                "content": "Develop the main index.html file with proper structure",
                "active_form": "Developing the main index.html file with proper structure",
                "status": "pending",
            },
        ],
    )

    tool_call = ToolCall(
        id="mkdir-nginx-home",
        name="bash",
        arguments={"command": f"mkdir -p {chapters}"},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages
    message = persistent_messages[-1]
    assert "Next step: create `index.html`." in message
    assert "`~/Loader/guides/nginx/index.html`" in message
    assert "Write a compact but real initial version of that file now" in message


@pytest.mark.asyncio
async def test_tool_batch_runner_redirects_post_write_self_audit_to_next_missing_artifact(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    nginx_root = temp_dir / "guides" / "nginx"
    chapters = nginx_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = nginx_root / "index.html"
    index_path.write_text(
        "\n".join(
            [
                "<html>",
                '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>',
                '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
                "</html>",
            ]
        )
        + "\n"
    )

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{nginx_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapters / '01-introduction.html'}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.touched_files.append(str(index_path))
    dod.completed_items.append("Develop the main index.html file for the nginx guide")
    dod.pending_items.append("Create chapter files for the nginx guide")

    tool_call = ToolCall(
        id="read-index-self-audit",
        name="read",
        arguments={"file_path": str(index_path)},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="1\t<html>\n",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages
    message = persistent_messages[-1]
    assert "You already have the current contents of `index.html` from the successful write." in message
    assert "Resume by creating `01-introduction.html` now." in message
    assert "Do not spend another turn rereading the file you just wrote or on TodoWrite alone." in message
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_preserves_first_file_handoff_after_recovery_prompt(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    nginx_root = temp_dir / "guides" / "nginx"
    chapters = nginx_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = nginx_root / "index.html"

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapters / '01-introduction.html'}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.USER,
                content=(
                    "[EMPTY ASSISTANT RESPONSE]\n"
                    "Respond with that concrete mutation tool call now. Do not return an empty response."
                ),
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create the main index.html file with proper structure",
                "active_form": "Creating the main index.html file with proper structure",
                "status": "pending",
            },
            {
                "content": "Create each chapter file with appropriate content",
                "active_form": "Creating each chapter file with appropriate content",
                "status": "pending",
            },
        ],
    )

    tool_call = ToolCall(
        id="write-index-recovered",
        name="write",
        arguments={
            "file_path": str(index_path),
            "content": "<html></html>\n",
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=f"Successfully wrote 14 bytes to {index_path}",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages
    assert ephemeral_messages == []
    message = persistent_messages[-1]
    assert "Next step: create `01-introduction.html`." in message
    assert "Write a compact but real initial version of that file now" not in message


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_uses_concrete_output_language_for_aggregate_chapter_step(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = guide_root / "index.html"
    index_path.write_text(
        "\n".join(
            [
                "<html>",
                '<a href="chapters/01-introduction.html">Chapter 1: Introduction to Nginx</a>',
                '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
                "</html>",
            ]
        )
        + "\n"
    )

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.touched_files.append(str(index_path))
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Develop the main index.html file with proper structure",
                "active_form": "Developing the main index.html file with proper structure",
                "status": "completed",
            },
            {
                "content": "Create chapter files with content and structure",
                "active_form": "Creating chapter files with content and structure",
                "status": "pending",
            },
        ],
    )

    todos = [
        {
            "content": "Develop the main index.html file with proper structure",
            "active_form": "Developing the main index.html file with proper structure",
            "status": "completed",
        },
        {
            "content": "Create chapter files with content and structure",
            "active_form": "Creating chapter files with content and structure",
            "status": "pending",
        },
    ]
    tool_call = ToolCall(
        id="todo-aggregate",
        name="TodoWrite",
        arguments={"todos": todos},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={"new_todos": todos},
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "Todo tracking is updated." in message
    assert "Next step: create `01-introduction.html`." in message
    assert (
        "Continue with the next pending item: `Create chapter files with content and structure`."
        not in message
    )


@pytest.mark.asyncio
async def test_duplicate_observation_nudge_prioritizes_missing_artifact_over_review(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_one.write_text("<h1>One</h1>\n")
    index_path.write_text("<a href=\"chapters/01-getting-started.html\">One</a>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapters / '06-ssl-configuration.html'}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Ensure all files are properly linked and formatted consistently",
                "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
                "status": "pending",
            },
            {
                "content": "Create the final chapter (06-ssl-configuration.html)",
                "active_form": "Working on: Create the final chapter (06-ssl-configuration.html)",
                "status": "pending",
            },
        ],
    )
    assert tool_batches_should_prioritize_missing_artifact(
        dod=dod,
        next_pending=dod.pending_items[0],
        missing_artifact=(chapters / "06-ssl-configuration.html", False),
        project_root=temp_dir,
    )

    tool_call = ToolCall(
        id="dup-read",
        name="read",
        arguments={"file_path": str(index_path)},
    )
    runner._queue_duplicate_observation_nudge(tool_call, dod=dod)  # type: ignore[attr-defined]

    assert persistent_messages
    message = persistent_messages[-1]
    assert "06-ssl-configuration.html" in message
    assert "Do not switch into review or consistency-check mode" in message
    assert (
        "Continue with the next pending item: `Ensure all files are properly linked and formatted consistently`"
        not in message
    )


@pytest.mark.asyncio
async def test_tool_batch_runner_hands_off_to_verification_once_planned_artifacts_exist(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("<a href=\"chapters/01-getting-started.html\">One</a>\n")
    chapter_one.write_text("<h1>One</h1>\n")
    chapter_two.write_text("<h1>Two</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create the guide files",
                "active_form": "Working on: Create the guide files",
                "status": "completed",
            },
            {
                "content": "Ensure all files are properly linked and formatted consistently",
                "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
                "status": "pending",
            },
        ],
    )
    tool_call = ToolCall(
        id="write-final",
        name="write",
        arguments={
            "file_path": str(chapter_two),
            "content": "<h1>Two</h1>\n",
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=f"Successfully wrote {chapter_two}",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert any(
        "All explicitly planned artifacts now exist on disk." in message
        for message in persistent_messages
    )
    assert any(
        "Ensure all files are properly linked and formatted consistently" in message
        for message in persistent_messages
    )
    assert any(
        "Finish with a final response once no specific mismatch remains so Loader can verify."
        in message
        for message in persistent_messages
    )


@pytest.mark.asyncio
async def test_tool_batch_runner_mutation_handoff_points_at_next_missing_artifact(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    index_path.write_text("<html></html>\n")
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create the main index.html file with proper structure",
                "active_form": "Working on: Create the main index.html file with proper structure",
                "status": "pending",
            },
            {
                "content": "Create each chapter file in sequence, following the established pattern",
                "active_form": "Working on: Create each chapter file in sequence, following the established pattern",
                "status": "pending",
            },
            {
                "content": "Ensure all files are properly linked and formatted consistently",
                "active_form": "Working on: Ensure all files are properly linked and formatted consistently",
                "status": "pending",
            },
        ],
    )
    tool_call = ToolCall(
        id="write-index",
        name="write",
        arguments={"file_path": str(index_path), "content": "<html></html>\n"},
    )
    executor = FakeExecutor(
        [tool_outcome(tool_call=tool_call, output=f"Successfully wrote {index_path}", is_error=False)]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages
    assert ephemeral_messages == []
    message = persistent_messages[-1]
    assert "Next step: create `01-getting-started.html`." in message
    assert "Write a compact but real initial version of that file now" not in message
    assert "refresh `TodoWrite`" not in message
    assert "Do not reread reference material or spend the next turn on bookkeeping." in message


@pytest.mark.asyncio
async def test_tool_batch_runner_large_plan_does_not_claim_completion_early(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    index_path.write_text("<html></html>\n")

    chapter_paths = [
        chapters / "01-getting-started.html",
        chapters / "02-installation.html",
        chapters / "03-first-website.html",
        chapters / "04-configuration-basics.html",
        chapters / "05-advanced-configurations.html",
        chapters / "06-performance-tuning.html",
        chapters / "07-security-best-practices.html",
    ]
    for chapter in chapter_paths[:4]:
        chapter.write_text(f"<h1>{chapter.stem}</h1>\n")
    chapter_paths[4].write_text("<h1>Advanced configurations</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                *[f"- `{path}`" for path in chapter_paths],
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a thorough nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create the nginx guide artifacts",
                "active_form": "Creating nginx guide artifacts",
                "status": "pending",
            },
            {
                "content": "Verify all guide files are linked and complete",
                "active_form": "Verifying guide linkage and completeness",
                "status": "pending",
            },
        ],
    )
    tool_call = ToolCall(
        id="write-chapter-05",
        name="write",
        arguments={
            "file_path": str(chapter_paths[4]),
            "content": "<h1>Advanced configurations</h1>\n",
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=f"Successfully wrote {chapter_paths[4]}",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert any(
        "Next step: create `06-performance-tuning.html`." in message
        for message in ephemeral_messages
    )
    assert not any(
        "All explicitly planned artifacts now exist on disk." in message
        for message in ephemeral_messages
    )


@pytest.mark.asyncio
async def test_tool_batch_runner_uses_compact_missing_artifact_nudge_after_substantial_progress(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_paths = [
        chapters / "01-introduction.html",
        chapters / "02-installation.html",
        chapters / "03-configuration.html",
        chapters / "04-basic-usage.html",
        chapters / "05-advanced-features.html",
    ]
    for path in (index_path, *chapter_paths[:4]):
        path.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                *[f"- `{path}`" for path in chapter_paths],
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a thorough nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.touched_files.extend(str(path) for path in (index_path, *chapter_paths[:4]))
    dod.completed_items.extend(
        [
            "Create the nginx directory structure",
            "Create the main index.html file with proper structure",
        ]
    )
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create each chapter file with appropriate content",
                "active_form": "Creating each chapter file with appropriate content",
                "status": "pending",
            }
        ],
    )
    tool_call = ToolCall(
        id="write-chapter-04",
        name="write",
        arguments={
            "file_path": str(chapter_paths[3]),
            "content": "<html>updated</html>\n",
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=f"Successfully wrote {chapter_paths[3]}",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert ephemeral_messages
    message = ephemeral_messages[-1]
    assert "Next step: create `05-advanced-features.html`." in message
    assert "Do not reread reference material or spend the next turn on bookkeeping." in message
    assert "refresh `TodoWrite`" not in message


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_with_missing_artifact_requeues_exact_resume_step(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    index_path.write_text("<html></html>\n")
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    chapter_one.write_text("<h1>One</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    persistent_messages: list[str] = []
    ephemeral_messages: list[str] = []
    context.queue_steering_message_callback = persistent_messages.append
    context.queue_ephemeral_steering_message_callback = ephemeral_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create 01-getting-started.html",
                "active_form": "Creating 01-getting-started.html",
                "status": "completed",
            },
            {
                "content": "Create 02-installation.html",
                "active_form": "Creating 02-installation.html",
                "status": "pending",
            },
        ],
    )
    dod.touched_files.extend([str(index_path), str(chapter_one)])

    tool_call = ToolCall(
        id="todo-only",
        name="TodoWrite",
        arguments={
            "todos": [
                {
                    "content": "Create 01-getting-started.html",
                    "active_form": "Creating 01-getting-started.html",
                    "status": "completed",
                },
                {
                    "content": "Create 02-installation.html",
                    "active_form": "Creating 02-installation.html",
                    "status": "pending",
                },
            ]
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={
                    "new_todos": [
                        {
                            "content": "Create 01-getting-started.html",
                            "active_form": "Creating 01-getting-started.html",
                            "status": "completed",
                        },
                        {
                            "content": "Create 02-installation.html",
                            "active_form": "Creating 02-installation.html",
                            "status": "pending",
                        },
                    ]
                },
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert persistent_messages
    message = persistent_messages[-1]
    assert "Todo tracking is updated. Next step: create `02-installation.html`." in message
    assert "Prefer one `write(file_path=..., content=...)` call" in message
    assert "Make your next response the concrete mutation tool call itself." in message
    assert ephemeral_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_after_artifacts_exist_pushes_verification_handoff(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<h1>One</h1>\n")
    chapter_two.write_text("<h1>Two</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
                "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
                "status": "pending",
            },
            {
                "content": "Verify all guide files are linked and complete",
                "active_form": "Working on: Verify all guide files are linked and complete",
                "status": "pending",
            },
        ],
        project_root=temp_dir,
    )

    tool_call = ToolCall(
        id="todo-only",
        name="TodoWrite",
        arguments={
            "todos": [
                {
                    "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
                    "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
                    "status": "pending",
                },
                {
                    "content": "Verify all guide files are linked and complete",
                    "active_form": "Working on: Verify all guide files are linked and complete",
                    "status": "pending",
                },
            ]
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={
                    "new_todos": [
                        {
                            "content": "First, examine the existing Fortran guide structure to understand the format and content organization",
                            "active_form": "Working on: First, examine the existing Fortran guide structure to understand the format and content organization",
                            "status": "pending",
                        },
                        {
                            "content": "Verify all guide files are linked and complete",
                            "active_form": "Working on: Verify all guide files are linked and complete",
                            "status": "pending",
                        },
                    ]
                },
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
    assert "Verify all guide files are linked and complete" in message
    assert (
        "Finish with a final response once no specific mismatch remains so Loader can verify."
        in message
    )
    assert "reopen reference materials" in message
    assert "Fortran guide structure" not in message
    assert context.workflow_mode == "execute"


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_after_outputs_exist_but_links_missing_still_handoffs_to_verify(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text(
        "\n".join(
            [
                '<a href="chapters/01-introduction.html">Intro</a>',
                '<a href="chapters/02-installation.html">Install</a>',
                '<a href="../index.html">Back</a>',
                "",
            ]
        )
    )
    chapter_one.write_text("<html></html>\n")
    chapter_two.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create chapter files following the established pattern",
                "active_form": "Creating chapter files",
                "status": "in_progress",
            }
        ],
        project_root=temp_dir,
    )

    tool_call = ToolCall(
        id="todo-post-build",
        name="TodoWrite",
        arguments={
            "todos": [
                {
                    "content": "Create chapter files following the established pattern",
                    "active_form": "Creating chapter files",
                    "status": "in_progress",
                }
            ]
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={
                    "new_todos": [
                        {
                            "content": "Create chapter files following the established pattern",
                            "active_form": "Creating chapter files",
                            "status": "in_progress",
                        }
                    ]
                },
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
    assert "Finish with a final response now so Loader can run verification automatically." in message
    assert "Repair or verify the current files instead of expanding the artifact set." not in message
    assert context.workflow_mode == "verify"


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_during_quality_repair_requires_mutation(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.USER,
                content=(
                    "Repair focus:\n"
                    f"- Improve `{chapter_one}`: thin content (409 text chars, expected at least 1758).\n"
                    f"- Improve `{chapter_one}`: insufficient structured content (6 blocks, expected at least 18).\n"
                    f"- Immediate next step: edit `{chapter_one}`.\n"
                ),
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    context.set_workflow_mode("verify")
    queued_messages: list[str] = []
    emitted_responses: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Expand generated chapters to satisfy quality verification",
                "active_form": "Expanding generated chapters",
                "status": "in_progress",
            }
        ],
        project_root=temp_dir,
    )
    pending_before_todowrite = list(dod.pending_items)
    completed_before_todowrite = list(dod.completed_items)

    tool_call = ToolCall(
        id="todo-quality",
        name="TodoWrite",
        arguments={
            "todos": [
                {
                    "content": "Expand generated chapters to satisfy quality verification",
                    "active_form": "Expanding generated chapters",
                    "status": "completed",
                }
            ]
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={
                    "new_todos": [
                        {
                            "content": "Expand generated chapters to satisfy quality verification",
                            "active_form": "Expanding generated chapters",
                            "status": "completed",
                        }
                    ]
                },
            )
        ]
    )

    async def emit(event: AgentEvent) -> None:
        if event.type == "response":
            emitted_responses.append(str(event.content))

    summary = TurnSummary(final_response="")
    result = await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "verification still has an active HTML content-quality repair" in message
    assert "TodoWrite cannot satisfy that verifier" in message
    assert f"Immediate next step: edit `{chapter_one.resolve(strict=False)}`" in message
    assert "thin content" in message
    assert "Finish with a final response now" not in message
    assert context.workflow_mode == "execute"
    assert result.halted is False
    assert summary.final_response == ""
    assert not emitted_responses
    assert dod.pending_items == pending_before_todowrite
    assert dod.completed_items == completed_before_todowrite


def test_todowrite_quality_repair_nudge_uses_exact_anchor_after_stale_context(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence should not run for direct nudge test")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for direct nudge test")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    chapters.mkdir(parents=True)
    chapter_one = chapters / "05-load-balancing.html"
    chapter_one.write_text("<html><body><h1>Load Balancing</h1></body></html>\n")
    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.USER,
                content=(
                    "Repair focus:\n"
                    f"- Improve `{chapter_one}`: thin content "
                    "(846 text chars, expected at least 1758).\n"
                    f"- Immediate next step: edit `{chapter_one}`.\n"
                ),
            ),
            Message(
                role=Role.TOOL,
                content=(
                    "Observation [edit]: Error: Failed to complete the operation "
                    f"after 2 attempts for {chapter_one}. old_string not found in file."
                ),
            ),
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")

    runner._queue_todowrite_resume_nudge(dod=dod)

    assert queued_messages
    message = queued_messages[-1]
    assert f"Immediate next step: edit `{chapter_one.resolve(strict=False)}`" in message
    assert "`edit(file_path=..., old_string=..., new_string=...)`" in message
    assert "Use this exact current closing-tail anchor as `old_string`" in message
    assert "```html\n</body></html>\n```" in message
    assert "do not call `read`, `patch`, `write`, or TodoWrite again first" in message


@pytest.mark.asyncio
async def test_tool_batch_runner_preempts_post_build_audit_after_todowrite_verify_handoff(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text(
        "\n".join(
            [
                '<li><a href="chapters/01-introduction.html">Chapter 1: Introduction</a></li>',
                '<li><a href="chapters/02-installation.html">Chapter 2: Installation</a></li>',
                "",
            ]
        )
    )
    chapter_one.write_text("<html></html>\n")
    chapter_two.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]

    todo_call = ToolCall(
        id="todo-post-build-preempt",
        name="TodoWrite",
        arguments={"todos": []},
    )
    audit_read = ToolCall(
        id="read-after-todo",
        name="read",
        arguments={"file_path": str(index_path)},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=todo_call,
                output="Todos updated",
                is_error=False,
                metadata={"new_todos": []},
            ),
            tool_outcome(
                tool_call=audit_read,
                output=index_path.read_text(),
                is_error=False,
            ),
        ]
    )

    summary = TurnSummary(final_response="")
    result = await runner.execute_batch(
        tool_calls=[todo_call, audit_read],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert result.continue_after_batch is True
    assert result.halted is False
    assert [call.id for call in executor.calls] == ["todo-post-build-preempt"]
    assert len(summary.tool_result_messages) == 1
    assert context.workflow_mode == "verify"
    assert queued_messages
    assert "Finish with a final response now so Loader can run verification automatically." in queued_messages[-1]


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_complete_directory_plan_does_not_reinfer_first_child(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    reference = temp_dir / "fortran" / "chapters" / "01-introduction.html"
    reference.parent.mkdir(parents=True)
    reference.write_text("<h1>Introduction</h1>\n")

    guide_root = temp_dir / "Loader" / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    chapter_two = chapters / "02-installation.html"
    chapter_three = chapters / "03-basic-configuration.html"
    index_path.write_text(
        "\n".join(
            [
                '<a href="chapters/01-introduction.html">Introduction</a>',
                '<a href="chapters/02-installation.html">Installation</a>',
                '<a href="chapters/03-basic-configuration.html">Configuration</a>',
                "",
            ]
        )
    )
    chapter_one.write_text("<html></html>\n")
    chapter_two.write_text("<html></html>\n")
    chapter_three.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root / 'index.html'}`",
                f"- `{chapters}/`",
                "",
            ]
        )
    )

    messages = [
        Message(
            role=Role.ASSISTANT,
            content="I examined the reference guide structure.",
            tool_calls=[
                ToolCall(
                    id="read-reference-child",
                    name="read",
                    arguments={"file_path": str(reference)},
                )
            ],
        )
    ]
    context = build_context(
        temp_dir=temp_dir,
        messages=messages,
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create an equally thorough nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]

    todo_call = ToolCall(
        id="todo-complete-directory-plan",
        name="TodoWrite",
        arguments={"todos": []},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=todo_call,
                output="Todos updated",
                is_error=False,
                metadata={"new_todos": []},
            )
        ]
    )

    summary = TurnSummary(final_response="")
    result = await runner.execute_batch(
        tool_calls=[todo_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert result.halted is True
    assert result.final_response == (
        "Todo tracking is complete; running Loader verification on the generated "
        "files now."
    )
    assert summary.final_response == result.final_response
    assert context.workflow_mode == "verify"
    assert summary.tool_result_messages
    assert (
        "final response should be provided next for Loader verification"
        in summary.tool_result_messages[-1].content
    )
    assert "01-introduction.html" not in summary.tool_result_messages[-1].content
    assert "chapter files" not in summary.tool_result_messages[-1].content.lower()
    assert "fortran guide structure" not in summary.tool_result_messages[-1].content.lower()


@pytest.mark.asyncio
async def test_tool_batch_runner_preempts_post_build_observation_batch_for_verify_handoff(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    chapter_two = chapters / "02-installation.html"
    chapter_three = chapters / "03-configuration.html"
    index_path.write_text(
        "\n".join(
            [
                '<li><a href="chapters/01-introduction.html">Chapter 1: Introduction</a></li>',
                '<li><a href="chapters/02-installation.html">Chapter 2: Installation</a></li>',
                "",
            ]
        )
    )
    chapter_one.write_text("<html></html>\n")
    chapter_two.write_text("<html></html>\n")
    chapter_three.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create an equally thorough nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]

    audit_bash = ToolCall(
        id="bash-post-build-audit",
        name="bash",
        arguments={"command": f"ls -la {guide_root}"},
    )
    audit_read = ToolCall(
        id="read-index-after-audit",
        name="read",
        arguments={"file_path": str(index_path)},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=audit_bash,
                output="total 8\n",
                is_error=False,
            ),
            tool_outcome(
                tool_call=audit_read,
                output=index_path.read_text(),
                is_error=False,
            ),
        ]
    )

    summary = TurnSummary(final_response="")
    result = await runner.execute_batch(
        tool_calls=[audit_bash, audit_read],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert result.continue_after_batch is True
    assert [call.id for call in executor.calls] == ["bash-post-build-audit"]
    assert context.workflow_mode == "verify"
    assert queued_messages
    assert "Finish with a final response now so Loader can run verification automatically." in queued_messages[-1]


@pytest.mark.asyncio
async def test_tool_batch_runner_preempts_post_build_observation_batch_during_consistency_review(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    chapter_two = chapters / "02-installation.html"
    chapter_three = chapters / "03-basic-configuration.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<html></html>\n")
    chapter_two.write_text("<html></html>\n")
    chapter_three.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    queued_ephemeral: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    context.queue_ephemeral_steering_message_callback = queued_ephemeral.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create an equally thorough nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Review the generated guide for consistency and completeness",
                "active_form": "Reviewing the generated guide for consistency and completeness",
                "status": "pending",
            }
        ],
        project_root=temp_dir,
    )

    audit_read = ToolCall(
        id="read-index-during-review",
        name="read",
        arguments={"file_path": str(index_path)},
    )
    second_read = ToolCall(
        id="read-chapter-after-review",
        name="read",
        arguments={"file_path": str(chapter_one)},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=audit_read,
                output=index_path.read_text(),
                is_error=False,
            ),
            tool_outcome(
                tool_call=second_read,
                output=chapter_one.read_text(),
                is_error=False,
            ),
        ]
    )

    summary = TurnSummary(final_response="")
    result = await runner.execute_batch(
        tool_calls=[audit_read, second_read],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert result.continue_after_batch is True
    assert [call.id for call in executor.calls] == ["read-index-during-review"]
    queued = queued_ephemeral or queued_messages
    assert queued
    assert "All explicitly planned artifacts already exist." in queued[-1]
    assert "generated files" in queued[-1]


@pytest.mark.asyncio
async def test_tool_batch_runner_skips_post_build_user_question_during_consistency_review(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text(
        "\n".join(
            [
                '<li><a href="chapters/01-introduction.html">Chapter 1: Introduction</a></li>',
                '<li><a href="chapters/02-installation.html">Chapter 2: Installation</a></li>',
                "",
            ]
        )
    )
    chapter_one.write_text("<html></html>\n")
    chapter_two.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create an equally thorough nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]
    dod.pending_items = ["Ensure all files are properly linked and formatted"]

    question_call = ToolCall(
        id="ask-post-build-review",
        name="AskUserQuestion",
        arguments={
            "question": "Which specific aspects of the reference guide should I copy?",
            "context": "I already created the output files and want to ensure they match.",
        },
    )
    executor = FakeExecutor([])

    summary = TurnSummary(final_response="")
    result = await runner.execute_batch(
        tool_calls=[question_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert result.continue_after_batch is True
    assert executor.calls == []
    assert queued_messages
    assert "The remaining work is review/verification of the generated files." in queued_messages[-1]
    assert "Do not ask the user for more clarification about the reference pattern now." in queued_messages[-1]
    assert "Finish with a final response now so Loader can run verification automatically." in queued_messages[-1]
    assert context.workflow_mode == "verify"
    assert summary.tool_result_messages
    assert "Skipped - stale post-build user question" in summary.tool_result_messages[-1].content


@pytest.mark.asyncio
async def test_tool_batch_runner_rewrites_stale_todowrite_summary_from_reconciled_dod(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    for name in (
        "01-introduction.html",
        "02-installation.html",
        "03-basic-configuration.html",
        "04-advanced-usage.html",
        "05-troubleshooting.html",
    ):
        (chapters / name).write_text("<html></html>\n")
    index_path.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create an equally thorough nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]

    tool_call = ToolCall(
        id="todo-stale-summary",
        name="TodoWrite",
        arguments={
            "todos": [
                {
                    "content": "First, examine the existing fortran guide structure and content to understand the format",
                    "active_form": "Working on: First, examine the existing fortran guide structure and content to understand the format",
                    "status": "pending",
                }
            ]
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={
                    "new_todos": [
                        {
                            "content": "First, examine the existing fortran guide structure and content to understand the format",
                            "active_form": "Working on: First, examine the existing fortran guide structure and content to understand the format",
                            "status": "pending",
                        }
                    ]
                },
            )
        ]
    )

    summary = TurnSummary(final_response="")
    result = await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert result.halted is True
    assert result.final_response == (
        "Todo tracking is complete; running Loader verification on the generated "
        "files now."
    )
    assert summary.final_response == result.final_response
    assert summary.tool_result_messages
    message = summary.tool_result_messages[-1].content
    assert "updated todo list" in message
    assert "final response should be provided next for Loader verification" in message
    assert "next pending:" not in message
    assert "fortran guide structure" not in message.lower()


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_drops_unplanned_expansion_after_outputs_exist(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run for this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text(
        "\n".join(
            [
                '<a href="chapters/01-introduction.html">Intro</a>',
                '<a href="chapters/02-installation.html">Install</a>',
                '<a href="../index.html">Back</a>',
                "",
            ]
        )
    )
    chapter_one.write_text("<html></html>\n")
    chapter_two.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]

    tool_call = ToolCall(
        id="todo-post-build-expansion",
        name="TodoWrite",
        arguments={
            "todos": [
                {
                    "content": "Create index.html for nginx guide",
                    "activeForm": "Creating index.html",
                    "status": "in_progress",
                },
                {
                    "content": "Create chapter 01-introduction.html",
                    "activeForm": "Creating chapter 01-introduction.html",
                    "status": "completed",
                },
                {
                    "content": "Create chapter 02-installation.html",
                    "activeForm": "Creating chapter 02-installation.html",
                    "status": "completed",
                },
                {
                    "content": "Create chapter 08-troubleshooting.html",
                    "activeForm": "Creating chapter 08-troubleshooting.html",
                    "status": "pending",
                },
            ]
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={
                    "new_todos": [
                        {
                            "content": "Create index.html for nginx guide",
                            "active_form": "Creating index.html",
                            "status": "in_progress",
                        },
                        {
                            "content": "Create chapter 01-introduction.html",
                            "active_form": "Creating chapter 01-introduction.html",
                            "status": "completed",
                        },
                        {
                            "content": "Create chapter 02-installation.html",
                            "active_form": "Creating chapter 02-installation.html",
                            "status": "completed",
                        },
                        {
                            "content": "Create chapter 08-troubleshooting.html",
                            "active_form": "Creating chapter 08-troubleshooting.html",
                            "status": "pending",
                        },
                    ]
                },
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "Todo tracking is updated. All explicitly planned artifacts now exist on disk." in message
    assert "Finish with a final response now so Loader can run verification automatically." in message
    assert "Repair or verify the current files instead of expanding the artifact set." not in message
    assert "08-troubleshooting.html" not in message
    assert context.workflow_mode == "verify"


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_with_existing_output_roots_requeues_next_mutation(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    index_path.write_text(
        "\n".join(
            [
                "<!DOCTYPE html>",
                "<html>",
                "<body>",
                '<a href="chapters/01-introduction.html">Introduction</a>',
                "</body>",
                "</html>",
                "",
            ]
        )
    )

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.touched_files.append(str(index_path))
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Examine the existing Fortran guide structure",
                "active_form": "Examining the existing Fortran guide structure",
                "status": "completed",
            },
            {
                "content": "Create the nginx directory structure",
                "active_form": "Creating the nginx directory structure",
                "status": "completed",
            },
            {
                "content": "Write the introduction chapter",
                "active_form": "Writing the introduction chapter",
                "status": "pending",
            },
        ],
        project_root=temp_dir,
    )

    tool_call = ToolCall(
        id="todo-next-mutation",
        name="TodoWrite",
        arguments={
            "todos": [
                {
                    "content": "Examine the existing Fortran guide structure",
                    "active_form": "Examining the existing Fortran guide structure",
                    "status": "completed",
                },
                {
                    "content": "Create the nginx directory structure",
                    "active_form": "Creating the nginx directory structure",
                    "status": "completed",
                },
                {
                    "content": "Write the introduction chapter",
                    "active_form": "Writing the introduction chapter",
                    "status": "pending",
                },
            ]
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={
                    "new_todos": [
                        {
                            "content": "Examine the existing Fortran guide structure",
                            "active_form": "Examining the existing Fortran guide structure",
                            "status": "completed",
                        },
                        {
                            "content": "Create the nginx directory structure",
                            "active_form": "Creating the nginx directory structure",
                            "status": "completed",
                        },
                        {
                            "content": "Write the introduction chapter",
                            "active_form": "Writing the introduction chapter",
                            "status": "pending",
                        },
                    ]
                },
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "Todo tracking is updated. Next step: create `01-introduction.html`." in message
    assert "Prefer one `write(file_path=..., content=...)` call" in message
    assert "Make your next response the concrete mutation tool call itself." in message


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_prefers_pending_index_over_empty_output_directory(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "Loader" / "guides" / "nginx"
    chapters = guide_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = guide_root / "index.html"
    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                "",
            ]
        )
    )

    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Examine the existing Fortran guide structure to understand the format and depth",
                "active_form": "Examining the existing Fortran guide structure",
                "status": "completed",
            },
            {
                "content": "Create the new nginx guide directory structure",
                "active_form": "Creating the new nginx guide directory structure",
                "status": "completed",
            },
            {
                "content": "Create a new index.html for the nginx guide",
                "active_form": "Creating a new index.html for the nginx guide",
                "status": "pending",
            },
            {
                "content": "Create the first chapter for the nginx guide",
                "active_form": "Creating the first chapter for the nginx guide",
                "status": "pending",
            },
        ],
        project_root=temp_dir,
    )

    queued_messages: list[str] = []
    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))

    todos = [
        {
            "content": "Examine the existing Fortran guide structure to understand the format and depth",
            "active_form": "Examining the existing Fortran guide structure",
            "status": "completed",
        },
        {
            "content": "Create the new nginx guide directory structure",
            "active_form": "Creating the new nginx guide directory structure",
            "status": "completed",
        },
        {
            "content": "Create a new index.html for the nginx guide",
            "active_form": "Creating a new index.html for the nginx guide",
            "status": "pending",
        },
        {
            "content": "Create the first chapter for the nginx guide",
            "active_form": "Creating the first chapter for the nginx guide",
            "status": "pending",
        },
    ]
    tool_call = ToolCall(
        id="todo-index-before-chapter",
        name="TodoWrite",
        arguments={"todos": todos},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={"new_todos": todos},
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "Todo tracking is updated. Next step: create `index.html`." in message
    assert f"Prefer one `write(file_path=..., content=...)` call for `{index_path.resolve(strict=False)}`" in message
    assert "01-introduction.html" not in message


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_with_declared_child_targets_names_next_missing_file(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    index_path.write_text(
        "\n".join(
            [
                "<html>",
                '<a href="chapters/introduction.html">Introduction</a>',
                '<a href="chapters/installation.html">Installation</a>',
                "</html>",
            ]
        )
        + "\n"
    )

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                "",
            ]
        )
    )

    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.pending_items = [
        "Write the introduction chapter",
        "Complete the requested work",
    ]
    dod.touched_files.append(str(index_path))

    queued_messages: list[str] = []
    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))

    tool_call = ToolCall(
        id="todo-1",
        name="TodoWrite",
        arguments={
            "todos": [
                {
                    "content": "Write the introduction chapter",
                    "activeForm": "Writing the introduction chapter",
                    "status": "pending",
                }
            ]
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={
                    "new_todos": [
                        {
                            "content": "Write the introduction chapter",
                            "active_form": "Writing the introduction chapter",
                            "status": "pending",
                        }
                    ]
                },
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "Todo tracking is updated. Next step: create `introduction.html`." in message
    assert "Prefer one `write(file_path=..., content=...)` call" in message
    assert "Make your next response the concrete mutation tool call itself." in message


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_names_concrete_pending_file_after_artifacts_exist(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    index_path.write_text(
        "\n".join(
            [
                "<html>",
                '<a href="chapters/01-introduction.html">Chapter 1: Introduction to NGINX Tool</a>',
                '<a href="chapters/02-installation.html">Chapter 2: Installation and Setup</a>',
                "</html>",
            ]
        )
        + "\n"
    )
    chapter_one.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                "",
            ]
        )
    )

    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.pending_items = [
        "Creating Chapter 2: Installation and Setup",
        "Complete the requested work",
    ]
    dod.touched_files.extend([str(index_path), str(chapter_one)])

    queued_messages: list[str] = []
    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))

    tool_call = ToolCall(
        id="todo-1",
        name="TodoWrite",
        arguments={
            "todos": [
                {
                    "content": "Creating Chapter 2: Installation and Setup",
                    "activeForm": "Creating Chapter 2: Installation and Setup",
                    "status": "pending",
                }
            ]
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={
                    "new_todos": [
                        {
                            "content": "Creating Chapter 2: Installation and Setup",
                            "active_form": "Creating Chapter 2: Installation and Setup",
                            "status": "pending",
                        }
                    ]
                },
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "Todo tracking is updated. Next step: create `02-installation.html`." in message
    assert "Prefer one `write(file_path=..., content=...)` call" in message
    assert "Make your next response the concrete mutation tool call itself" in message


@pytest.mark.asyncio
async def test_tool_batch_runner_todowrite_uses_observed_sibling_pattern_for_next_file(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    reference_chapters = temp_dir / "fortran" / "chapters"
    reference_chapters.mkdir(parents=True)
    (reference_chapters / "01-introduction.html").write_text("<h1>Introduction</h1>\n")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    index_path.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                "",
            ]
        )
    )

    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.pending_items = [
        "Write the introduction chapter",
        "Complete the requested work",
    ]
    dod.touched_files.append(str(index_path))

    queued_messages: list[str] = []
    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.ASSISTANT,
                content="",
                tool_calls=[
                    ToolCall(
                        id="read-ref-1",
                        name="read",
                        arguments={"file_path": str(reference_chapters / "01-introduction.html")},
                    )
                ],
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))

    tool_call = ToolCall(
        id="todo-observed-1",
        name="TodoWrite",
        arguments={
            "todos": [
                {
                    "content": "Write the introduction chapter",
                    "activeForm": "Writing the introduction chapter",
                    "status": "pending",
                }
            ]
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Todos updated",
                is_error=False,
                metadata={
                    "new_todos": [
                        {
                            "content": "Write the introduction chapter",
                            "active_form": "Writing the introduction chapter",
                            "status": "pending",
                        }
                    ]
                },
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "Todo tracking is updated. Next step: create `01-introduction.html`." in message
    assert "Prefer one `write(file_path=..., content=...)` call" in message


@pytest.mark.asyncio
async def test_tool_batch_runner_bookkeeping_note_with_missing_artifact_requeues_resume_step(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<h1>One</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Create 01-getting-started.html",
                "active_form": "Creating 01-getting-started.html",
                "status": "completed",
            },
            {
                "content": "Create 02-installation.html",
                "active_form": "Creating 02-installation.html",
                "status": "pending",
            },
        ],
        project_root=temp_dir,
    )
    dod.touched_files.extend([str(index_path), str(chapter_one)])

    tool_call = ToolCall(
        id="working-note",
        name="notepad_write_working",
        arguments={"content": "Creating the second chapter file: Installation"},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Working note recorded",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "Bookkeeping note is recorded. A declared output artifact is still missing." in message
    assert "Resume by creating `02-installation.html` now." in message
    assert "Make your next response the concrete mutation tool call itself" in message
    assert "refresh `TodoWrite`" in message
    assert "Do not spend the next turn on additional notes, rediscovery, verification, or final confirmation" in message


@pytest.mark.asyncio
async def test_tool_batch_runner_working_note_respects_discovery_first_pending_step(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{temp_dir / 'guides' / 'nginx' / 'index.html'}`",
                f"- `{temp_dir / 'guides' / 'nginx' / 'chapters'}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.pending_items.extend(
        [
            "First, examine the existing fortran guide structure and content to understand the format",
            "Create the nginx directory structure",
            "Develop the main index.html file for the nginx guide",
        ]
    )

    tool_call = ToolCall(
        id="working-note",
        name="notepad_write_working",
        arguments={"content": "Analyzing the fortran guide structure before creating nginx guide"},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Working note recorded",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert (
        "Continue with the next pending item: `First, examine the existing fortran guide structure and content to understand the format`."
        in message
    )
    assert "one concrete evidence-gathering tool call" in message
    assert "Resume by creating `index.html` now." not in message


@pytest.mark.asyncio
async def test_tool_batch_runner_working_note_prefers_declared_output_gap_over_stale_discovery(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters_dir = guide_root / "chapters"
    chapters_dir.mkdir(parents=True)
    index_path = guide_root / "index.html"
    first_chapter = chapters_dir / "01-introduction.html"
    index_path.write_text(
        "\n".join(
            [
                '<a href="chapters/01-introduction.html">Introduction</a>',
                '<a href="chapters/02-installation.html">Installation</a>',
                '<a href="chapters/03-configuration.html">Configuration</a>',
            ]
        )
    )
    first_chapter.write_text("<h1>Introduction</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root / 'index.html'}`",
                f"- `{chapters_dir}/`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.pending_items.extend(
        [
            "First, examine the existing fortran guide structure and content to understand the format",
            "Create chapter files following the established pattern",
        ]
    )
    dod.touched_files.extend([str(index_path), str(first_chapter)])

    tool_call = ToolCall(
        id="working-note",
        name="notepad_write_working",
        arguments={"content": "Created index and first chapter; next is chapter 2"},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output="Working note recorded",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages
    message = queued_messages[-1]
    assert "Bookkeeping note is recorded. A declared output artifact is still missing." in message
    assert "Resume by creating `02-installation.html` now." in message
    assert "Continue with the next pending item: `First, examine the existing fortran guide structure" not in message


@pytest.mark.asyncio
async def test_tool_batch_runner_shallow_glob_does_not_handoff_before_content_read(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    fortran_root = temp_dir / "Loader" / "guides" / "fortran"
    chapters_dir = fortran_root / "chapters"
    chapters_dir.mkdir(parents=True)

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{temp_dir / 'Loader' / 'guides' / 'nginx' / 'index.html'}`",
                f"- `{temp_dir / 'Loader' / 'guides' / 'nginx' / 'chapters'}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.pending_items.extend(
        [
            "First, examine the existing fortran guide structure and content",
            "Create the nginx directory structure",
            "Develop the main index.html file for nginx guide",
        ]
    )

    tool_call = ToolCall(
        id="glob-1",
        name="glob",
        arguments={"pattern": "**", "path": str(fortran_root)},
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=f"{fortran_root}\n{chapters_dir}",
                is_error=False,
            )
        ]
    )

    summary = TurnSummary(final_response="")
    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages == []


@pytest.mark.asyncio
async def test_tool_batch_runner_hands_off_noop_toc_edit_when_file_is_already_valid(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    prompt = (
        "Have a look at ~/Loader/guides/fortran/index.html, then "
        "~/Loader/guides/fortran/chapters. The table of contents links in "
        "index.html are inaccurate and the href’s are wrong. Let’s update the "
        "links and their link texts to be correct."
    )
    chapters = temp_dir / "chapters"
    chapters.mkdir()
    (chapters / "01-introduction.html").write_text(
        "<h1>Chapter 1: Introduction to Fortran</h1>\n"
    )
    (chapters / "02-setup.html").write_text(
        "<h1>Chapter 2: Setting Up Your Environment</h1>\n"
    )
    current_block = (
        "<h2>Table of Contents</h2>\n"
        '        <ul class="chapter-list">\n'
        '            <li><a href="chapters/01-introduction.html">Chapter 1: Introduction to Fortran</a></li>\n'
        '            <li><a href="chapters/02-setup.html">Chapter 2: Setting Up Your Environment</a></li>\n'
        "        </ul>\n"
    )
    index_path = temp_dir / "index.html"
    index_path.write_text(current_block)

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    context.session.current_task = prompt  # type: ignore[attr-defined]
    queued_messages: list[str] = []
    context.queue_steering_message_callback = queued_messages.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="edit-1",
        name="edit",
        arguments={
            "file_path": str(index_path),
            "old_string": current_block,
            "new_string": current_block,
        },
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=(
                    "[Blocked - old_string and new_string are identical - no change "
                    "would occur] Suggestion: Provide different old and new strings"
                ),
                is_error=True,
                state=ToolExecutionState.BLOCKED,
            )
        ]
    )

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=TurnSummary(final_response=""),
        dod=create_definition_of_done(prompt),
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued_messages == []


def test_tool_batch_runner_blocked_noop_edit_nudge_stays_on_active_repair_target(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    repair_target = temp_dir / "guide" / "chapters" / "04-basic-usage.html"
    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.ASSISTANT,
                content=(
                    "Repair focus:\n"
                    f"- Fix the broken local reference `05-advanced-topics.html` in `{repair_target}`.\n"
                    f"- Immediate next step: edit `{repair_target}`.\n"
                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '05-advanced-topics.html'}`; otherwise remove or replace `05-advanced-topics.html`.\n"
                ),
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Repair a guide page.")

    runner._queue_blocked_html_edit_nudge(
        ToolCall(
            id="edit-1",
            name="edit",
            arguments={
                "file_path": str(repair_target),
                "old_string": "same",
                "new_string": "same",
            },
        ),
        "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
        dod=dod,
    )

    assert queued
    assert str(repair_target) in queued[0]
    assert "no on-disk change" in queued[0]
    assert "replace the surrounding block" in queued[0]
    assert "Do not reopen unrelated reference materials" in queued[0]


def test_tool_batch_runner_blocked_noop_edit_after_full_build_prefers_verification(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guide"
    chapters = guide_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.ASSISTANT,
                content=(
                    "Repair focus:\n"
                    f"- Confirm the final guide state in `{index_path}`.\n"
                    f"- Immediate next step: verify `{index_path}` if no concrete mismatch remains.\n"
                ),
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))

    dod = create_definition_of_done("Create a multi-file guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.touched_files.extend([str(index_path), str(chapter_one)])
    dod.verification_commands = [f"ls -la {guide_root}"]

    runner._queue_blocked_html_edit_nudge(
        ToolCall(
            id="edit-1",
            name="edit",
            arguments={
                "file_path": str(index_path),
                "old_string": "same",
                "new_string": "same",
            },
        ),
        "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
        dod=dod,
    )

    assert queued
    assert "All explicitly planned artifacts already exist." in queued[0]
    assert "Finish with a final response now so Loader can run verification automatically." in queued[0]
    assert "replace the surrounding block" not in queued[0]


def test_tool_batch_runner_blocked_noop_edit_keeps_quality_repair_active_after_full_build(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guide"
    chapters = guide_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<html></html>\n")
    chapter_two.write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.USER,
                content=(
                    "Repair focus:\n"
                    f"- Improve `{chapter_two}`: thin content (504 text chars, expected at least 1758).\n"
                    f"- Improve `{chapter_two}`: insufficient structured content (6 blocks, expected at least 18).\n"
                    f"- Immediate next step: edit `{chapter_two}`.\n"
                ),
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))

    dod = create_definition_of_done("Create a multi-file guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.touched_files.extend([str(index_path), str(chapter_one), str(chapter_two)])
    dod.verification_commands = [f"ls -la {guide_root}"]

    runner._queue_blocked_html_edit_nudge(
        ToolCall(
            id="edit-1",
            name="edit",
            arguments={
                "file_path": str(chapter_two),
                "old_string": "same",
                "new_string": "same",
            },
        ),
        "[Blocked - old_string and new_string are identical - no change would occur] Suggestion: Provide different old and new strings",
        dod=dod,
    )

    assert queued
    assert "active content-quality repair is not complete" in queued[0]
    assert "Repair focus:" in queued[0]
    assert f"Immediate next step: edit `{chapter_two}`" in queued[0]
    assert "thin content" in queued[0]
    assert "TodoWrite cannot satisfy" not in queued[0]
    assert "Finish with a final response now" not in queued[0]


async def _noop_emit(event: AgentEvent) -> None:
    return None


@pytest.mark.asyncio
async def test_tool_batch_runner_marks_verification_planned_after_new_mutation(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="write-1",
        name="write",
        arguments={"file_path": str(temp_dir / "README.md"), "content": "updated\n"},
    )
    executor = FakeExecutor(
        [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
    )
    summary = TurnSummary(final_response="")
    dod = create_definition_of_done("Update README and verify it still works.")
    events: list[AgentEvent] = []

    async def emit(event: AgentEvent) -> None:
        events.append(event)

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert dod.last_verification_result == "planned"
    assert dod.verification_commands
    assert "Collect verification evidence" in dod.pending_items
    assert dod.active_verification_attempt_id == "verification-attempt-1"
    assert dod.active_verification_attempt_number == 1
    assert summary.workflow_timeline[-1].reason_code == "verification_planned"
    assert summary.workflow_timeline[-1].policy_outcome == "planned"
    assert summary.workflow_timeline[-1].verification_observations[0].status == "planned"
    assert (
        summary.workflow_timeline[-1].verification_observations[0].attempt_id
        == "verification-attempt-1"
    )
    assert (
        summary.workflow_timeline[-1].verification_observations[0].attempt_number == 1
    )


@pytest.mark.asyncio
async def test_tool_batch_runner_does_not_mark_verification_planned_after_setup_only_mkdir(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    nginx_root = temp_dir / "Loader" / "guides" / "nginx"
    chapters = nginx_root / "chapters"
    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{chapters}/`",
                f"- `{nginx_root / 'index.html'}`",
                "",
            ]
        )
    )

    tool_call = ToolCall(
        id="mkdir-1",
        name="bash",
        arguments={"command": f"mkdir -p {chapters}"},
    )
    executor = FakeExecutor(
        [tool_outcome(tool_call=tool_call, output="", is_error=False)]
    )
    summary = TurnSummary(final_response="")
    dod = create_definition_of_done("Create an equally thorough nginx guide with chapters.")
    dod.implementation_plan = str(implementation_plan)
    events: list[AgentEvent] = []

    async def emit(event: AgentEvent) -> None:
        events.append(event)

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert dod.last_verification_result is None
    assert "Collect verification evidence" not in dod.pending_items
    assert not any(
        entry.reason_code == "verification_planned" for entry in summary.workflow_timeline
    )


@pytest.mark.asyncio
async def test_tool_batch_runner_does_not_mark_verification_planned_while_chapter_build_pending(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    nginx_root = temp_dir / "Loader" / "guides" / "nginx"
    chapters = nginx_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = nginx_root / "index.html"
    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{nginx_root}/`",
                f"- `{chapters}/`",
                f"- `{index_path}`",
                "",
            ]
        )
    )

    tool_call = ToolCall(
        id="write-index",
        name="write",
        arguments={"file_path": str(index_path), "content": "<html></html>\n"},
    )
    executor = FakeExecutor(
        [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
    )
    summary = TurnSummary(final_response="")
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.pending_items.extend(
        [
            "Develop the main index.html file with proper structure",
            "Create first nginx chapter",
        ]
    )
    events: list[AgentEvent] = []

    async def emit(event: AgentEvent) -> None:
        events.append(event)

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert dod.last_verification_result is None
    assert "Collect verification evidence" not in dod.pending_items
    assert "Create first nginx chapter" in dod.pending_items
    assert not any(
        entry.reason_code == "verification_planned" for entry in summary.workflow_timeline
    )


@pytest.mark.asyncio
async def test_tool_batch_runner_marks_passed_verification_stale_after_new_mutation(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run for this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="write-1",
        name="write",
        arguments={"file_path": str(temp_dir / "README.md"), "content": "updated\n"},
    )
    executor = FakeExecutor(
        [tool_outcome(tool_call=tool_call, output="wrote file", is_error=False)]
    )
    summary = TurnSummary(final_response="")
    dod = create_definition_of_done("Update README and verify it still works.")
    dod.verification_commands = ["uv run pytest -q"]
    dod.last_verification_result = "passed"
    dod.verification_attempt_counter = 1
    dod.active_verification_attempt_id = "verification-attempt-1"
    dod.active_verification_attempt_number = 1
    dod.evidence = [
        VerificationEvidence(
            command="uv run pytest -q",
            passed=True,
            stdout="401 passed",
            kind="test",
        )
    ]
    dod.completed_items.append("Collect verification evidence")
    events: list[AgentEvent] = []

    async def emit(event: AgentEvent) -> None:
        events.append(event)

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=summary,
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert dod.last_verification_result == "stale"
    assert dod.evidence == []
    assert "Collect verification evidence" in dod.pending_items
    assert "Collect verification evidence" not in dod.completed_items
    assert dod.active_verification_attempt_id == "verification-attempt-2"
    assert dod.active_verification_attempt_number == 2
    assert summary.workflow_timeline[-1].reason_code == "verification_stale"
    assert summary.workflow_timeline[-1].policy_outcome == "stale"
    assert summary.workflow_timeline[-1].verification_observations[0].status == "stale"
    assert (
        summary.workflow_timeline[-1].verification_observations[0].attempt_id
        == "verification-attempt-1"
    )
    assert (
        summary.workflow_timeline[-1].verification_observations[0].attempt_number == 1
    )
    assert (
        summary.workflow_timeline[-1].verification_observations[0].supersedes_attempt_id
        == "verification-attempt-2"
    )
    assert (
        summary.workflow_timeline[-1].verification_observations[0].command
        == "uv run pytest -q"
    )


def test_tool_batch_runner_blocked_active_repair_nudge_uses_repair_scope(temp_dir: Path) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    repair_target = temp_dir / "guide" / "index.html"
    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.ASSISTANT,
                content=(
                    "Repair focus:\n"
                    f"- Fix the broken local reference `chapters/01-getting-started.html` in `{repair_target}`.\n"
                    f"- Immediate next step: edit `{repair_target}`.\n"
                    f"- If the broken reference should remain, create `{temp_dir / 'guide' / 'chapters' / '01-getting-started.html'}`; otherwise remove or replace `chapters/01-getting-started.html`.\n"
                ),
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))

    runner._queue_blocked_active_repair_nudge(
        "[Blocked - active repair scope: verification already identified the repair target.]"
    )

    assert queued
    assert str(repair_target) in queued[0]
    assert str(temp_dir / "guide" / "chapters" / "01-getting-started.html") in queued[0]
    assert "Do not reopen unrelated reference materials" in queued[0]


def test_tool_batch_runner_blocked_active_repair_mutation_nudge_uses_allowed_paths(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    repair_target = temp_dir / "guide" / "chapters" / "05-advanced-configurations.html"
    stylesheet = temp_dir / "guide" / "styles.css"
    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.ASSISTANT,
                content=(
                    "Repair focus:\n"
                    f"- Fix the broken local reference `../styles.css` in `{repair_target}`.\n"
                    f"- Immediate next step: edit `{repair_target}`.\n"
                    f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `../styles.css`.\n"
                ),
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))

    runner._queue_blocked_active_repair_mutation_nudge(
        "[Blocked - active repair mutation scope: verification already identified the repair target.]"
    )

    assert queued
    assert str(repair_target) in queued[0]
    assert str(stylesheet) in queued[0]
    assert "before widening the change set" in queued[0]


def test_tool_batch_runner_duplicate_repair_mutation_restates_verifier_deltas(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    index_path = temp_dir / "guide" / "index.html"
    chapter_path = temp_dir / "guide" / "chapters" / "02-installation.html"
    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.USER,
                content=(
                    "Repair focus:\n"
                    f"- Improve `{index_path}`: insufficient structured content (9 blocks, expected at least 12).\n"
                    f"- Improve `{chapter_path}`: thin content (526 text chars, expected at least 1758).\n"
                    f"- Immediate next step: edit `{index_path}`.\n"
                    "- Update the listed generated artifacts directly; do not recreate the artifact set.\n"
                ),
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file guide.")

    runner._queue_duplicate_mutation_nudge(  # type: ignore[attr-defined]
        ToolCall(
            id="dup-write",
            name="write",
            arguments={"file_path": str(index_path), "content": "<h1>same</h1>"},
        ),
        dod=dod,
    )

    assert queued
    assert "skipped because it would not change" in queued[0]
    assert "Do not submit the same content again" in queued[0]
    assert "insufficient structured content" in queued[0]
    assert "thin content" in queued[0]
    assert "make one real edit" in queued[0]


@pytest.mark.asyncio
async def test_tool_batch_runner_quality_repair_success_hands_to_next_target(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    chapters = temp_dir / "guide" / "chapters"
    first = chapters / "01-introduction.html"
    second = chapters / "02-installation.html"
    chapters.mkdir(parents=True)
    first.write_text("<h1>Intro</h1>\n")
    second.write_text("<h1>Install</h1>\n")
    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.ASSISTANT,
                content=(
                    "Repair focus:\n"
                    f"- Improve `{first}`: thin content (400 text chars, expected at least 1758).\n"
                    f"- Improve `{second}`: insufficient structured content (6 blocks, expected at least 18).\n"
                    f"- Immediate next step: edit `{first}` with a substantial expansion or replacement.\n"
                    "- Repair every listed quality target in order before any final answer.\n"
                ),
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Repair generated HTML guide quality.")
    tool_call = ToolCall(
        id="write-intro",
        name="write",
        arguments={
            "file_path": str(first),
            "content": "<h1>Intro</h1><p>Substantial expansion.</p>\n",
        },
    )

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=TurnSummary(final_response=""),
        dod=dod,
        executor=FakeExecutor(
            [
                tool_outcome(
                    tool_call=tool_call,
                    output=f"Successfully wrote {first}",
                    is_error=False,
                )
            ]
        ),  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued
    handoff = next(message for message in queued if "next listed quality target" in message)
    assert str(second.resolve(strict=False)) in handoff
    assert "Do not rerun verification" in handoff
    assert "Repair focus:" in handoff
    assert "insufficient structured content" in handoff
    assert f"Immediate next step: edit `{second.resolve(strict=False)}`" in handoff
    assert all("All explicitly planned artifacts now exist" not in message for message in queued)


@pytest.mark.asyncio
async def test_tool_batch_runner_continues_missing_declared_repairs_before_verify(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guide"
    chapters = guide_root / "chapters"
    index = guide_root / "index.html"
    first_missing = chapters / "02-installation.html"
    second_missing = chapters / "03-configuration.html"
    chapters.mkdir(parents=True)
    index.write_text(
        '<a href="chapters/02-installation.html">Install</a>\n'
        '<a href="chapters/03-configuration.html">Configure</a>\n'
    )
    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.USER,
                content=(
                    "Repair focus:\n"
                    f"- Continue the declared output set by creating missing planned artifact `{first_missing}`.\n"
                    f"- Continue the declared output set by creating missing planned artifact `{second_missing}`.\n"
                    f"- Existing file `{index}` already references `chapters/02-installation.html` -> `{first_missing}`.\n"
                    f"- Immediate next step: write `{first_missing}`.\n"
                    "- Continue one missing declared output at a time until the declared set exists.\n"
                ),
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file guide.")
    tool_call = ToolCall(
        id="write-install",
        name="write",
        arguments={
            "file_path": str(first_missing),
            "content": "<html><body><h1>Install</h1></body></html>",
        },
    )

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=TurnSummary(final_response=""),
        dod=dod,
        executor=FakeExecutor(
            [
                tool_outcome(
                    tool_call=tool_call,
                    output=f"Successfully wrote {first_missing}",
                    is_error=False,
                )
            ]
        ),  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued
    handoff = queued[-1]
    assert "declared artifact set is still incomplete" in handoff
    assert str(second_missing.resolve(strict=False)) in handoff
    assert f"Immediate next step: write `{second_missing.resolve(strict=False)}`" in handoff
    assert "Do not run verification" in handoff
    assert "Finish with a final response now" not in handoff


@pytest.mark.asyncio
async def test_tool_batch_runner_hands_off_after_active_repair_support_file_write(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    repair_target = temp_dir / "guide" / "index.html"
    stylesheet = temp_dir / "guide" / "style.css"
    repair_target.parent.mkdir(parents=True)
    repair_target.write_text('<link rel="stylesheet" href="style.css">\n')
    context = build_context(
        temp_dir=temp_dir,
        messages=[
            Message(
                role=Role.ASSISTANT,
                content=(
                    "Repair focus:\n"
                    f"- Fix the broken local reference `style.css` in `{repair_target}`.\n"
                    f"- Immediate next step: edit `{repair_target}`.\n"
                    f"- If the broken reference should remain, create `{stylesheet}`; otherwise remove or replace `style.css`.\n"
                ),
            )
        ],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Repair a guide stylesheet link.")
    tool_call = ToolCall(
        id="write-style",
        name="write",
        arguments={
            "file_path": str(stylesheet),
            "content": "body { font-family: sans-serif; }\n",
        },
    )

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=TurnSummary(final_response=""),
        dod=dod,
        executor=FakeExecutor(
            [
                tool_outcome(
                    tool_call=tool_call,
                    output=f"Successfully wrote {stylesheet}",
                    is_error=False,
                )
            ]
        ),  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued
    assert any("support file for the active verification repair now exists" in message for message in queued)
    assert any("Do not retarget" in message for message in queued)
    assert any("Loader can re-run verification" in message for message in queued)


def test_tool_batch_runner_blocked_late_reference_drift_nudge_points_to_missing_artifact(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    store = DefinitionOfDoneStore(temp_dir)
    dod = create_definition_of_done("Create a multi-file guide from a reference")
    plan_path = temp_dir / "implementation.md"
    plan_path.write_text(
        "# File Changes\n"
        "- `guide/index.html`\n"
        "- `guide/chapters/01-getting-started.html`\n"
        "- `guide/chapters/02-installation.html`\n"
        "- `guide/chapters/03-first-website.html`\n"
    )
    dod.implementation_plan = str(plan_path)
    (temp_dir / "guide" / "chapters").mkdir(parents=True, exist_ok=True)
    (temp_dir / "guide" / "index.html").write_text("index")
    (temp_dir / "guide" / "chapters" / "01-getting-started.html").write_text("one")
    (temp_dir / "guide" / "chapters" / "02-installation.html").write_text("two")
    runner = ToolBatchRunner(context, store)

    runner._queue_blocked_late_reference_drift_nudge(
        "[Blocked - late reference drift: several planned artifacts already exist.]",
        dod=dod,
    )

    assert queued
    assert "03-first-website.html" in queued[0]
    assert "older reference materials" in queued[0]


def test_tool_batch_runner_blocked_completed_artifact_scope_nudge_prefers_verification(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guide"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("index")
    chapter_one.write_text("one")
    chapter_two.write_text("two")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}`",
                f"- `{chapters}`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file guide from a reference")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]
    sync_todos_to_definition_of_done(
        dod,
        [
            {
                "content": "Verify all guide files are linked and complete",
                "active_form": "Working on: Verify all guide files are linked and complete",
                "status": "pending",
            }
        ],
        project_root=temp_dir,
    )

    runner._queue_blocked_completed_artifact_scope_nudge(
        "[Blocked - completed artifact set scope: all explicitly planned artifacts already exist.]",
        dod=dod,
    )

    assert queued
    assert context.workflow_mode == "verify"
    assert "All explicitly planned artifacts already exist." in queued[0]
    assert "Verify all guide files are linked and complete" in queued[0]
    assert "Do not reopen earlier reference materials." in queued[0]
    assert "Finish with a final response so Loader can verify" in queued[0]


def test_tool_batch_runner_blocked_post_build_audit_nudge_switches_to_verify(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guide"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("index")
    chapter_one.write_text("one")
    chapter_two.write_text("two")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}`",
                f"- `{chapters}`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file guide from a reference")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]

    runner._queue_blocked_completed_artifact_scope_nudge(
        "[Blocked - post-build audit loop: all explicitly planned artifacts already exist.]",
        dod=dod,
    )

    assert queued
    assert context.workflow_mode == "verify"
    assert "All explicitly planned artifacts already exist." in queued[0]
    assert "finish with a final response so Loader can verify" in queued[0]


@pytest.mark.asyncio
async def test_tool_batch_runner_does_not_halt_on_repeated_post_build_audit_blocks(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guide"
    chapters = guide_root / "chapters"
    guide_root.mkdir(parents=True)
    chapters.mkdir()
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-getting-started.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("index")
    chapter_one.write_text("one")
    chapter_two.write_text("two")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{guide_root}`",
                f"- `{chapters}`",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a multi-file guide from a reference")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide_root}"]

    blocked_message = (
        "[Blocked - post-build audit loop: all explicitly planned artifacts already exist.]"
    )
    tool_calls = [
        ToolCall(
            id=f"audit-{index}",
            name="bash",
            arguments={"command": f"cd {temp_dir} && ls -la guide/chapters/"},
        )
        for index in range(1, 4)
    ]
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=blocked_message,
                is_error=True,
                state=ToolExecutionState.BLOCKED,
            )
            for tool_call in tool_calls
        ]
    )
    events: list[AgentEvent] = []

    async def emit(event: AgentEvent) -> None:
        events.append(event)

    result = await runner.execute_batch(
        tool_calls=tool_calls,
        tool_source="native",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=TurnSummary(final_response=""),
        dod=dod,
        executor=executor,
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert result.halted is False
    assert result.consecutive_errors == 0
    assert context.workflow_mode == "verify"
    assert queued
    assert any("finish with a final response so Loader can verify" in message for message in queued)


def test_tool_batch_runner_blocked_html_declared_target_nudge_uses_closest_declared_target(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))

    runner._queue_blocked_html_declared_target_nudge(
        ToolCall(
            id="write-ch1",
            name="write",
            arguments={"file_path": str(temp_dir / "guide" / "chapters" / "01-introduction.html")},
        ),
        (
            "[Blocked - HTML page introduces new local targets outside the current declared artifact set] "
            "Suggestion: Keep non-root HTML pages within the root-declared local-link set and avoid "
            "introducing new sibling targets that the guide root does not declare, for example fix: 02-setup.html. "
            "Already-declared local targets include: chapters/01-introduction.html, chapters/02-installation.html, "
            "chapters/03-configuration.html. Closest declared local targets include: chapters/02-installation.html"
        ),
    )

    assert queued
    assert str(temp_dir / "guide" / "chapters" / "01-introduction.html") in queued[0]
    assert "`chapters/02-installation.html`" in queued[0]
    assert "same file now" in queued[0]


def test_tool_batch_runner_blocked_html_declared_target_nudge_without_close_match(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))

    runner._queue_blocked_html_declared_target_nudge(
        ToolCall(
            id="write-ch1",
            name="write",
            arguments={"file_path": str(temp_dir / "guide" / "chapters" / "introduction.html")},
        ),
        (
            "[Blocked - HTML page introduces new local targets outside the current declared artifact set] "
            "Suggestion: Keep non-root HTML pages within the root-declared local-link set and avoid "
            "introducing new sibling targets that the guide root does not declare; remove or replace "
            "undeclared hrefs like: troubleshooting.html. "
            "Already-declared local targets include: chapters/introduction.html, chapters/installation.html, "
            "chapters/configuration.html. Allowed hrefs from this file include: ../index.html, "
            "installation.html, configuration.html."
        ),
    )

    assert queued
    assert "use only these exact href values" in queued[0]
    assert "`installation.html`" in queued[0]
    assert "`../index.html`" in queued[0]
    assert "closest declared target(s)" not in queued[0]


def test_tool_batch_runner_blocked_html_declared_file_creation_nudge_points_to_root(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a guide.")

    target = temp_dir / "guide" / "chapters" / "troubleshooting.html"
    runner._queue_blocked_html_declared_file_creation_nudge(
        ToolCall(
            id="write-troubleshooting",
            name="write",
            arguments={"file_path": str(target)},
        ),
        (
            "[Blocked - HTML file creation falls outside the current declared artifact set] "
            "Suggestion: Keep new non-root HTML files within the root-declared artifact set and "
            f"update the guide root `{(temp_dir / 'guide' / 'index.html').resolve(strict=False)}` "
            "before creating undeclared sibling pages, for example: chapters/troubleshooting.html. "
            "Already-declared local targets include: chapters/advanced-topics.html, "
            "chapters/basic-usage.html, chapters/configuration.html"
        ),
        dod=dod,
    )

    assert queued
    assert "update" in queued[0].lower()
    assert str((temp_dir / "guide" / "index.html").resolve(strict=False)) in queued[0]
    assert "`chapters/troubleshooting.html`" in queued[0]
    assert "retry the file creation" in queued[0]


def test_tool_batch_runner_blocked_html_declared_file_creation_after_outputs_exist_prefers_verify(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide = temp_dir / "guide"
    chapters = guide / "chapters"
    guide.mkdir()
    chapters.mkdir()
    index = guide / "index.html"
    index.write_text(
        "\n".join(
            [
                '<a href="chapters/01-introduction.html">Intro</a>',
                '<a href="chapters/02-installation.html">Install</a>',
                '<a href="../index.html">Back</a>',
                "",
            ]
        )
    )
    (chapters / "01-introduction.html").write_text("<html></html>\n")
    (chapters / "02-installation.html").write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{index}`",
                f"- `{chapters / '01-introduction.html'}`",
                f"- `{chapters / '02-installation.html'}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide}"]
    dod.touched_files = [str(index), str(chapters / "01-introduction.html"), str(chapters / "02-installation.html")]

    target = guide / "chapters" / "08-advanced-configuration.html"
    runner._queue_blocked_html_declared_file_creation_nudge(
        ToolCall(
            id="write-extra",
            name="write",
            arguments={"file_path": str(target)},
        ),
        (
            "[Blocked - HTML file creation falls outside the current declared artifact set] "
            "Suggestion: Keep new non-root HTML files within the root-declared artifact set and "
            f"update the guide root `{index.resolve(strict=False)}` before creating undeclared sibling pages, "
            "for example: chapters/08-advanced-configuration.html."
        ),
        dod=dod,
    )

    assert queued
    assert "All explicitly planned artifacts already exist on disk." in queued[0]
    assert "Do not expand the output set with `chapters/08-advanced-configuration.html`." in queued[0]
    assert "Finish with a final response now so Loader can run verification automatically." in queued[0]
    assert "update the guide root" not in queued[0]


def test_tool_batch_runner_blocked_html_declared_file_creation_prefers_closest_target(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a guide.")

    target = temp_dir / "guide" / "chapters" / "02-basics.html"
    runner._queue_blocked_html_declared_file_creation_nudge(
        ToolCall(
            id="write-basics",
            name="write",
            arguments={"file_path": str(target)},
        ),
        (
            "[Blocked - HTML file creation falls outside the current declared artifact set] "
            "Suggestion: Keep new non-root HTML files within the root-declared artifact set. "
            "Do not create undeclared sibling page `chapters/02-basics.html`; use the closest declared local target instead. "
            "Already-declared local targets include: chapters/01-introduction.html, "
            "chapters/02-installation.html, chapters/03-basic-configuration.html. "
            "Closest declared local targets include: chapters/02-installation.html"
        ),
        dod=dod,
    )

    assert queued
    assert "Do not create `chapters/02-basics.html`." in queued[0]
    assert "closest declared target instead: `chapters/02-installation.html`" in queued[0]
    assert "Already-declared local targets include:" in queued[0]
    assert "update the guide root" not in queued[0]


@pytest.mark.asyncio
async def test_tool_batch_runner_blocked_html_quality_guidance_does_not_halt(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    target = temp_dir / "guide" / "chapters" / "06-security.html"
    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{target}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a guide chapter.")
    dod.implementation_plan = str(implementation_plan)

    tool_calls = [
        ToolCall(
            id=f"write-quality-{index}",
            name="write",
            arguments={"file_path": str(target), "content": "<html></html>"},
        )
        for index in range(3)
    ]
    blocked_message = (
        "[Blocked - HTML content contains placeholder or stub text] "
        "Suggestion: Replace placeholder phrases with concrete user-facing content "
        "before writing the HTML artifact. Placeholder phrase(s): generic core "
        "concepts section, generic practical workflow section. Include specific "
        "explanations, examples, commands, or structured prose instead."
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=blocked_message,
                is_error=True,
                state=ToolExecutionState.BLOCKED,
            )
            for tool_call in tool_calls
        ]
    )
    events: list[AgentEvent] = []

    async def emit(event: AgentEvent) -> None:
        events.append(event)

    result = await runner.execute_batch(
        tool_calls=tool_calls,
        tool_source="native",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=TurnSummary(final_response=""),
        dod=dod,
        executor=executor,
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert result.halted is False
    assert result.consecutive_errors == 0
    assert queued
    assert str(target) in queued[-1]
    assert "Retry that same target" in queued[-1]
    assert "Do not reuse placeholder pattern(s)" in queued[-1]
    assert "generic core concepts section" in queued[-1]
    assert "not a scaffold or outline" in queued[-1]


@pytest.mark.asyncio
async def test_tool_batch_runner_blocked_html_structure_guidance_does_not_halt(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    target = temp_dir / "guide" / "chapters" / "08-monitoring.html"
    target.parent.mkdir(parents=True)
    target.write_text(
        "\n".join(
            [
                "<!DOCTYPE html>",
                '<html lang="en">',
                "<body>",
                "<h1>Monitoring</h1>",
                "<p>Existing content.</p>",
                "</body>",
                "</html>",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Expand a guide chapter.")

    tool_calls = [
        ToolCall(
            id=f"patch-structure-{index}",
            name="patch",
            arguments={"file_path": str(target), "patch": "@@ malformed"},
        )
        for index in range(3)
    ]
    blocked_message = (
        "[Blocked - HTML document structure would be invalid] Suggestion: "
        "expected exactly one closing </html> tag (found 2); expected exactly "
        "one closing </body> tag (found 2). Keep the existing closing document "
        "tail intact."
    )
    executor = FakeExecutor(
        [
            tool_outcome(
                tool_call=tool_call,
                output=blocked_message,
                is_error=True,
                state=ToolExecutionState.BLOCKED,
            )
            for tool_call in tool_calls
        ]
    )
    events: list[AgentEvent] = []

    async def emit(event: AgentEvent) -> None:
        events.append(event)

    result = await runner.execute_batch(
        tool_calls=tool_calls,
        tool_source="native",
        pending_tool_calls_seen=set(),
        emit=emit,
        summary=TurnSummary(final_response=""),
        dod=dod,
        executor=executor,
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert result.halted is False
    assert result.consecutive_errors == 0
    assert queued
    assert str(target) in queued[-1]
    assert "blocked before it changed the file" in queued[-1]
    assert "Do not assume the on-disk file is malformed" in queued[-1]
    assert "```html\n</body>\n</html>\n```" in queued[-1]
    assert "do not add a second `</body>` or `</html>`" in queued[-1]


def test_tool_batch_runner_blocked_html_missing_target_after_outputs_exist_prefers_verify(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide = temp_dir / "guide"
    chapters = guide / "chapters"
    guide.mkdir()
    chapters.mkdir()
    index = guide / "index.html"
    index.write_text(
        "\n".join(
            [
                '<a href="chapters/01-introduction.html">Intro</a>',
                '<a href="chapters/02-installation.html">Install</a>',
                '<a href="../index.html">Back</a>',
                "",
            ]
        )
    )
    (chapters / "01-introduction.html").write_text("<html></html>\n")
    (chapters / "02-installation.html").write_text("<html></html>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{index}`",
                f"- `{chapters / '01-introduction.html'}`",
                f"- `{chapters / '02-installation.html'}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    dod = create_definition_of_done("Create a guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.verification_commands = [f"ls -la {guide}"]
    dod.touched_files = [str(index), str(chapters / "01-introduction.html"), str(chapters / "02-installation.html")]

    runner._queue_blocked_html_missing_target_nudge(
        ToolCall(
            id="edit-root",
            name="edit",
            arguments={"file_path": str(index)},
        ),
        (
            "[Blocked - Edited HTML links point to files that do not exist] "
            "Suggestion: Use only existing local targets for href values and avoid introducing missing links. "
            "Broken href(s): chapters/08-advanced-configuration.html. "
            "Replace them with an existing local target or remove the broken link."
        ),
        dod=dod,
    )

    assert queued
    assert "All explicitly planned artifacts already exist on disk." in queued[0]
    assert f"Stay on `{index}`." in queued[0]
    assert "Do not introduce new local-link targets beyond the current output set." in queued[0]
    assert "Repair the existing generated files instead of expanding the guide." in queued[0]
    assert "Replace broken hrefs with existing local targets or remove the broken link." in queued[0]


def test_tool_batch_runner_blocked_html_asset_nudge_retries_same_file(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    target = temp_dir / "guide" / "chapters" / "03-configuration.html"

    runner._queue_blocked_html_asset_nudge(
        ToolCall(
            id="write-config",
            name="write",
            arguments={"file_path": str(target)},
        ),
        (
            "[Blocked - HTML local asset references do not exist] Suggestion: "
            "Use only existing local assets for non-HTML href values. "
            "Missing local asset href(s): ../styles.css. Remove the asset link, "
            "create the referenced asset first, inline the styling/content, or point "
            "the href at an existing local file."
        ),
    )

    assert queued
    assert str(target) in queued[0]
    assert "was not created or updated" in queued[0]
    assert "Remove or replace `../styles.css`." in queued[0]
    assert "Do not resend the same `<link>` tag" in queued[0]
    assert "do not claim completion" in queued[0]


def test_tool_batch_runner_repeated_blocked_html_asset_nudge_forces_href_removal(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should not run in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    blocked_event = (
        "[Blocked - HTML local asset references do not exist] Suggestion: "
        "Use only existing local assets for non-HTML href values. "
        "Missing local asset href(s): ../style.css. Remove the asset link, "
        "create the referenced asset first, inline the styling/content, or point "
        "the href at an existing local file."
    )
    context = build_context(
        temp_dir=temp_dir,
        messages=[Message(role=Role.TOOL, content=blocked_event)],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
    )
    context.session.append(Message(role=Role.TOOL, content=blocked_event))
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    target = temp_dir / "guide" / "chapters" / "05-troubleshooting.html"

    runner._queue_blocked_html_asset_nudge(
        ToolCall(
            id="write-troubleshooting",
            name="write",
            arguments={"file_path": str(target)},
        ),
        blocked_event,
    )

    assert queued
    assert "blocked 2 times" in queued[0]
    assert "`../style.css`" in queued[0]
    assert "line removed" in queued[0]
    assert "Do not resend another" in queued[0]


@pytest.mark.asyncio
async def test_tool_batch_runner_blocked_empty_file_path_nudges_concrete_next_artifact(
    temp_dir: Path,
) -> None:
    async def assess_confidence(
        tool_name: str,
        tool_args: dict,
        context: str,
    ) -> ConfidenceAssessment:
        raise AssertionError("Confidence scoring should be disabled in this scenario")

    async def verify_action(
        tool_name: str,
        tool_args: dict,
        result: str,
        expected: str = "",
    ) -> ActionVerification:
        raise AssertionError("Verification should not run in this scenario")

    guide_root = temp_dir / "guides" / "nginx"
    chapters = guide_root / "chapters"
    chapters.mkdir(parents=True)
    index_path = guide_root / "index.html"
    chapter_one = chapters / "01-introduction.html"
    chapter_two = chapters / "02-installation.html"
    index_path.write_text("<html></html>\n")
    chapter_one.write_text("<h1>Intro</h1>\n")

    implementation_plan = temp_dir / "implementation.md"
    implementation_plan.write_text(
        "\n".join(
            [
                "# Implementation Plan",
                "",
                "## File Changes",
                f"- `{index_path}`",
                f"- `{chapter_one}`",
                f"- `{chapter_two}`",
                "",
            ]
        )
    )

    context = build_context(
        temp_dir=temp_dir,
        messages=[],
        safeguards=FakeSafeguards(),
        assess_confidence=assess_confidence,
        verify_action=verify_action,
        auto_recover=False,
    )
    queued: list[str] = []
    context.queue_steering_message_callback = queued.append
    runner = ToolBatchRunner(context, DefinitionOfDoneStore(temp_dir))
    tool_call = ToolCall(
        id="write-2",
        name="write",
        arguments={"file_path": "", "content": "<html></html>\n"},
    )
    blocked_message = "[Blocked - Empty file path] Suggestion: Provide a valid file path"
    executor = FakeExecutor(
        [
            ToolExecutionOutcome(
                tool_call=tool_call,
                state=ToolExecutionState.BLOCKED,
                message=Message.tool_result_message(
                    tool_call_id=tool_call.id,
                    display_content=blocked_message,
                    result_content=blocked_message,
                    is_error=True,
                ),
                event_content=blocked_message,
                is_error=True,
                result_output=blocked_message,
            )
        ]
    )
    dod = create_definition_of_done("Create a multi-file nginx guide.")
    dod.implementation_plan = str(implementation_plan)
    dod.touched_files.extend([str(index_path), str(chapter_one)])
    dod.pending_items.append("Creating Chapter 2: Installation and Setup")

    await runner.execute_batch(
        tool_calls=[tool_call],
        tool_source="assistant",
        pending_tool_calls_seen=set(),
        emit=_noop_emit,
        summary=TurnSummary(final_response=""),
        dod=dod,
        executor=executor,  # type: ignore[arg-type]
        on_confirmation=None,
        on_user_question=None,
        emit_confirmation=None,
        consecutive_errors=0,
    )

    assert queued
    assert "did not provide a valid `file_path`" in queued[0]
    assert "Resume by creating `02-installation.html` now." in queued[0]
    assert (
        f"Prefer one `write` call for `{display_runtime_path(chapter_two)}` instead of more rereads."
        in queued[0]
    )
    assert context.recovery_context is not None
    assert context.recovery_context.attempts[-1].error == blocked_message