`de5f678`

Add routed workflow artifacts to runtime

Authored by

espadonne 1 month ago

SHA: de5f6787b869038db1ff2cb7442d0c428869538c
Parents: 8713208
Tree: e9e3480

9 changed files

Status	File	+	-
M	`src/loader/agent/loop.py`	30	46
M	`src/loader/agent/prompts.py`	65	18
M	`src/loader/runtime/conversation.py`	506	0
M	`src/loader/runtime/dod.py`	10	0
M	`src/loader/runtime/events.py`	4	0
A	`src/loader/runtime/workflow.py`	637	0
A	`tests/test_workflow.py`	177	0
A	`tests/test_workflow_runtime.py`	278	0
M	`tests/test_workflow_runtime_tools.py`	6	1

src/loader/agent/loop.pymodified

  from ..runtime.events import AgentEvent, TurnSummary
  from ..runtime.permissions import PermissionMode, build_permission_policy
  from ..runtime.session import ConversationSession
 +from ..runtime.workflow import WorkflowMode
  from ..tools.base import ToolRegistry, create_default_registry
  from .planner import (
      PLANNING_PROMPT,
      max_recovery_attempts: int = 2  # Reduced from 3
      verification_retry_budget: int = 3  # Retry budget for verify/fix loop
      permission_mode: PermissionMode = PermissionMode.WORKSPACE_WRITE
 +    workflow_mode_override: str | None = None
      stream: bool = True  # Stream LLM responses for real-time output
      # Reasoning stages configuration
              messages=self.messages,
+         )
          self._system_message: Message | None = None
 +        self.workflow_mode = WorkflowMode.EXECUTE.value
          self._use_react: bool | None = None
          self.capability_profile = resolve_backend_capability_profile(self.backend)
          self.last_turn_summary: TurnSummary | None = None
                  tools=tool_schemas,
                  use_react=self.use_react,
                  project_context=self.project_context,
 +                workflow_mode=self.workflow_mode,
+             )
              self._system_message = Message(
                  role=Role.SYSTEM,
+             )
          return self._system_message
 +    def set_workflow_mode(self, workflow_mode: str) -> None:
 +        """Update the active workflow mode used by the system prompt."""
++
 +        if workflow_mode == self.workflow_mode:
 +            return
 +        self.workflow_mode = workflow_mode
 +        self._system_message = None
++
      def _build_messages(self) -> list[Message]:
          """Build the full message list for the LLM."""
          return self.session.build_request_messages()
                  else:
                      return f"Task partially completed. {decomposition.to_prompt()}"
 -        # Check if we should use planning
 -        should_use_plan = use_plan
 -        if should_use_plan is None and self.config.auto_plan:
 -            await emit(AgentEvent(type="thinking"))
 -            should_use_plan = await self._should_plan(user_message)
+-
 -        # If planning, create and execute plan
 -        if should_use_plan:
 -            plan = await self._create_plan(user_message)
 -            if plan.steps:
 -                await emit(AgentEvent(type="plan", content=plan.to_prompt()))
+-
 -                # Execute each step
 -                while not plan.is_complete():
 -                    step = plan.next_step()
 -                    if not step:
 -                        break
+-
 -                    await emit(AgentEvent(
 -                        type="step",
 -                        step_info=f"{plan.progress_str()} {step.description}",
 -                    ))
+-
 -                    # Run the step
 -                    step_prompt = format_step_prompt(plan, step)
 -                    await self._run_inner(
 -                        step_prompt,
 -                        emit,
 -                        on_confirmation,
 -                        on_user_question=on_user_question,
 -                        original_task=self._current_task,
 -                    )
+-
 -                    plan.complete_current()
+-
 -                # Final summary
 -                self.messages.append(Message(role=Role.USER, content=user_message))
 -                summary_prompt = f"I've completed the plan. Summarize what was done:\n{plan.to_prompt()}"
 -                return await self._run_inner(
 -                    summary_prompt,
 -                    emit,
 -                    on_confirmation,
 -                    on_user_question=on_user_question,
 -                    original_task=self._current_task,
 -                )
+-
          # No planning or decomposition - run directly
          self.messages.append(Message(role=Role.USER, content=user_message))
          return await self._run_inner(
              emit,
              on_confirmation,
              on_user_question=on_user_question,
 +            requested_mode=self._requested_workflow_mode(use_plan),
              original_task=self._current_task,
+         )
          emit: Callable[[AgentEvent], Awaitable[None]],
          on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None = None,
          on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None = None,
 +        requested_mode: str | None = None,
          original_task: str | None = None,
      ) -> str:
          """Inner execution loop without planning."""
              emit,
              on_confirmation=on_confirmation,
              on_user_question=on_user_question,
 +            requested_mode=requested_mode,
              original_task=original_task,
+         )
          return self.last_turn_summary.final_response
 +    def _requested_workflow_mode(self, use_plan: bool | None) -> str | None:
 +        """Resolve the explicit workflow-mode override for the current turn."""
++
 +        if use_plan is True:
 +            return WorkflowMode.PLAN.value
 +        if use_plan is False:
 +            return WorkflowMode.EXECUTE.value
 +        if self.config.workflow_mode_override:
 +            return self.config.workflow_mode_override
 +        if self.config.auto_plan:
 +            return WorkflowMode.PLAN.value
 +        return None
++
      async def run_streaming(
          self,
          user_message: str,
          self._recovery_context = None
          self._current_task = None
          self.last_turn_summary = None
 +        self.workflow_mode = WorkflowMode.EXECUTE.value
 +        self._system_message = None
          self.safeguards.reset()  # Reset all runtime safeguards

src/loader/agent/prompts.pymodified

  """Prompt templates for the agent."""
  import os
 -from typing import Any, TYPE_CHECKING
 +from typing import TYPE_CHECKING, Any
  if TYPE_CHECKING:
      from ..context.project import ProjectContext
      return "\n\n".join(lines)
 +MODE_GUIDANCE = {
 +    "clarify": """
 +## Clarify Mode
 +- Ask exactly one focused question with `AskUserQuestion`
 +- Clarify intent, outcome, scope, or boundaries before proposing solutions
 +- Do not start coding or writing patch plans yet
 +- Keep the question high-leverage and brief
 +""",
 +    "plan": """
 +## Plan Mode
 +- Produce persistent implementation and verification planning artifacts
 +- Do not start writing code in this mode
 +- Be explicit about file touchpoints, order of work, risks, acceptance criteria, and verification commands
 +- Prefer concrete, repository-grounded plans over generic checklists
 +""",
 +    "execute": """
 +## Execute Mode
 +- Use tools directly to perform the task
 +- Read relevant files before editing them
 +- Keep `TodoWrite` current for multi-step work when progress tracking matters
 +- Concise reporting is fine, and numbered lists are allowed when they communicate plan or evidence clearly
 +""",
 +    "verify": """
 +## Verify Mode
 +- Run the planned verification commands and capture evidence
 +- Do not declare the task complete while any verification step is failing
 +- Report concrete pass/fail evidence rather than vague confidence
 +""",
 +}
++
++
  SYSTEM_PROMPT = """You are Loader, an AI coding agent.
  Current directory: {cwd}
 -## Tools
 -- bash: Run shell commands
 -- write: Create files
 -- read: Read files
 -- edit: Modify files
 -- glob: Find files
 -- grep: Search in files
 +## Tools Available
 +{tool_descriptions}
  ## How to Use Tools
  Output a tool call in this format:
  [write: file_path="hello.py", content="print('hello')"]
  [read: file_path="config.json"]
  [edit: file_path="app.py", old_string="old", new_string="new"]
 +[TodoWrite: todos=[{{content="Run tests", active_form="Running tests", status="in_progress"}}]]
 +[AskUserQuestion: question="Which path matters more?", options=["Speed", "Correctness"]]
++
 +## Active Workflow Mode
 +{workflow_mode}
++
 +{mode_guidance}
  ## Rules
 -1. Use tools immediately - don't explain first
 -2. No code blocks (```) - use the write tool instead
 -3. No numbered steps - just do the task
 -4. Read files before editing them
 +1. Follow the active workflow mode rather than improvising a different one
 +2. Use tools or concise prose directly instead of narrating fake tool use
 +3. Use the write tool for files rather than pasting long code blocks
 +4. Keep responses grounded in repository evidence and verification output
  """
  {{"name": "read", "arguments": {{"file_path": "config.json"}}}}
  </tool_call>
 +## Active Workflow Mode
 +{workflow_mode}
++
 +{mode_guidance}
++
  ## Rules
 -1. Use tools immediately - don't explain first
 -2. No code blocks - use the write tool instead
 -3. No numbered steps - just do the task
 -4. Read files before editing them
 +1. Follow the active workflow mode rather than improvising a different one
 +2. Use tools or concise prose directly instead of narrating fake tool use
 +3. Use the write tool for files rather than pasting long code blocks
 +4. Keep responses grounded in repository evidence and verification output
  """
      tools: list[dict[str, Any]],
      use_react: bool = False,
      project_context: "str | ProjectContext | None" = None,
 +    workflow_mode: str = "execute",
  ) -> str:
      """Build the system prompt with tool descriptions.
          Formatted system prompt
      """
      cwd = os.getcwd()
 +    tool_descriptions = format_tool_descriptions(tools)
 +    mode_guidance = MODE_GUIDANCE.get(workflow_mode, MODE_GUIDANCE["execute"])
      if use_react:
 -        tool_descriptions = format_tool_descriptions(tools)
          prompt = REACT_SYSTEM_PROMPT.format(
              cwd=cwd,
              tool_descriptions=tool_descriptions,
 +            workflow_mode=workflow_mode,
 +            mode_guidance=mode_guidance,
+         )
      else:
 -        prompt = SYSTEM_PROMPT.format(cwd=cwd)
 +        prompt = SYSTEM_PROMPT.format(
 +            cwd=cwd,
 +            tool_descriptions=tool_descriptions,
 +            workflow_mode=workflow_mode,
 +            mode_guidance=mode_guidance,
 +        )
      # Add project context if available
      if project_context:

src/loader/runtime/conversation.pymodified

  from __future__ import annotations
 +import re
  from collections.abc import Awaitable, Callable
  from dataclasses import dataclass, field
 +from pathlib import Path
  from typing import Any
  from ..agent.parsing import parse_tool_calls
  from .executor import ToolExecutionState, ToolExecutor
  from .hooks import build_default_tool_hooks
  from .tracing import RuntimeTracer
 +from .workflow import (
 +    ClarifyBrief,
 +    ModeRouter,
 +    PlanningArtifacts,
 +    VERIFICATION_SEPARATOR,
 +    WorkflowArtifactStore,
 +    WorkflowMode,
 +    build_execute_bridge,
 +    extract_verification_commands_from_markdown,
 +    sync_todos_to_definition_of_done,
 +)
  EventSink = Callable[[AgentEvent], Awaitable[None]]
  ConfirmationHandler = Callable[[str, str, str], Awaitable[bool]] | None
          self.tracer = RuntimeTracer()
          self.executor: ToolExecutor | None = None
          self.dod_store = DefinitionOfDoneStore(agent.project_root)
 +        self.router = ModeRouter()
 +        self.artifact_store = WorkflowArtifactStore(agent.project_root)
      async def run_turn(
          self,
          emit: EventSink,
          on_confirmation: ConfirmationHandler = None,
          on_user_question: UserQuestionHandler = None,
 +        requested_mode: str | None = None,
          original_task: str | None = None,
      ) -> TurnSummary:
          """Run one task turn and return a structured summary."""
          summary.definition_of_done = dod
          await self._emit_dod_status(emit, dod)
 +        task = await self._prepare_workflow(
 +            task=task,
 +            dod=dod,
 +            emit=emit,
 +            summary=summary,
 +            on_confirmation=on_confirmation,
 +            on_user_question=on_user_question,
 +            requested_mode=requested_mode,
 +        )
++
          while iterations < self.agent.config.max_iterations:
              iterations += 1
              summary.iterations = iterations
                      if outcome.state == ToolExecutionState.EXECUTED and not outcome.is_error:
                          record_successful_tool_call(dod, tool_call)
 +                        if (
 +                            tool_call.name == "TodoWrite"
 +                            and outcome.registry_result is not None
 +                        ):
 +                            new_todos = outcome.registry_result.metadata.get("new_todos", [])
 +                            if isinstance(new_todos, list):
 +                                sync_todos_to_definition_of_done(dod, new_todos)
                          self.dod_store.save(dod)
                          self.agent._recovery_context = None
                          is_loop, loop_description = self.agent.safeguards.detect_loop()
              is_error=True,
+         )
 +    async def _prepare_workflow(
 +        self,
 +        *,
 +        task: str,
 +        dod: DefinitionOfDone,
 +        emit: EventSink,
 +        summary: TurnSummary,
 +        on_confirmation: ConfirmationHandler,
 +        on_user_question: UserQuestionHandler,
 +        requested_mode: str | None,
 +    ) -> str:
 +        requested = WorkflowMode.from_str(requested_mode)
 +        decision = self.router.route(
 +            task,
 +            requested_mode=requested,
 +            has_brief=self._artifact_exists(dod.clarify_brief),
 +            has_plan=self._artifact_exists(dod.implementation_plan)
 +            and self._artifact_exists(dod.verification_plan),
 +        )
 +        await self._set_workflow_mode(
 +            decision.mode,
 +            dod=dod,
 +            emit=emit,
 +            summary=summary,
 +            reason=decision.reason,
 +        )
++
 +        if decision.mode == WorkflowMode.CLARIFY:
 +            await self._run_clarify_mode(
 +                task=task,
 +                dod=dod,
 +                emit=emit,
 +                summary=summary,
 +                on_user_question=on_user_question,
 +            )
 +            decision = self.router.route(
 +                task,
 +                has_brief=self._artifact_exists(dod.clarify_brief),
 +                has_plan=self._artifact_exists(dod.implementation_plan)
 +                and self._artifact_exists(dod.verification_plan),
 +                allow_clarify=False,
 +            )
 +            await self._set_workflow_mode(
 +                decision.mode,
 +                dod=dod,
 +                emit=emit,
 +                summary=summary,
 +                reason=f"clarify handoff: {decision.reason}",
 +            )
++
 +        if decision.mode == WorkflowMode.PLAN:
 +            await self._run_plan_mode(
 +                task=task,
 +                dod=dod,
 +                emit=emit,
 +                summary=summary,
 +                on_confirmation=on_confirmation,
 +                on_user_question=on_user_question,
 +            )
 +            await self._set_workflow_mode(
 +                WorkflowMode.EXECUTE,
 +                dod=dod,
 +                emit=emit,
 +                summary=summary,
 +                reason="plan artifacts created; switching to execute",
 +            )
++
 +        bridge = build_execute_bridge(
 +            Path(dod.clarify_brief) if dod.clarify_brief else None,
 +            Path(dod.implementation_plan) if dod.implementation_plan else None,
 +            Path(dod.verification_plan) if dod.verification_plan else None,
 +        )
 +        if bridge and not any(
 +            message.role == Role.USER and "[WORKFLOW BRIDGE]" in message.content
 +            for message in self.agent.messages[-4:]
 +        ):
 +            self.agent.session.append(
 +                Message(
 +                    role=Role.USER,
 +                    content=(
 +                        "[WORKFLOW BRIDGE]\n"
 +                        f"{bridge}\n\n"
 +                        "Honor these artifacts while you execute the task. "
 +                        "Keep TodoWrite current when the work spans multiple steps."
 +                    ),
 +                )
 +            )
 +        return task
++
 +    async def _set_workflow_mode(
 +        self,
 +        mode: WorkflowMode,
 +        *,
 +        dod: DefinitionOfDone,
 +        emit: EventSink,
 +        summary: TurnSummary,
 +        reason: str,
 +    ) -> None:
 +        self.agent.set_workflow_mode(mode.value)
 +        dod.current_mode = mode.value
 +        if not dod.mode_history or dod.mode_history[-1] != mode.value:
 +            dod.mode_history.append(mode.value)
 +        summary.workflow_mode = mode.value
 +        summary.definition_of_done = dod
 +        self.dod_store.save(dod)
 +        await emit(
 +            AgentEvent(
 +                type="workflow_mode",
 +                content=f"Workflow: {mode.value} ({reason})",
 +                workflow_mode=mode.value,
 +                definition_of_done=dod,
 +            )
 +        )
++
 +    async def _emit_artifact(
 +        self,
 +        *,
 +        emit: EventSink,
 +        kind: str,
 +        path: Path,
 +        preview: str,
 +    ) -> None:
 +        await emit(
 +            AgentEvent(
 +                type="artifact",
 +                content=preview,
 +                artifact_kind=kind,
 +                artifact_path=str(path),
 +            )
 +        )
++
 +    async def _complete_in_mode(
 +        self,
 +        *,
 +        prompt: str,
 +        tools: list[dict[str, Any]] | None,
 +        max_tokens: int,
 +        temperature: float = 0.2,
 +    ):
 +        return await self.agent.backend.complete(
 +            messages=self.agent.session.build_request_messages()
 +            + [Message(role=Role.USER, content=prompt)],
 +            tools=tools,
 +            temperature=temperature,
 +            max_tokens=max_tokens,
 +        )
++
 +    async def _run_clarify_mode(
 +        self,
 +        *,
 +        task: str,
 +        dod: DefinitionOfDone,
 +        emit: EventSink,
 +        summary: TurnSummary,
 +        on_user_question: UserQuestionHandler,
 +    ) -> None:
 +        ask_tool = self.agent.registry.get("AskUserQuestion")
 +        assert ask_tool is not None
 +        prompt = (
 +            "Clarify the task before planning or implementation.\n"
 +            "Ask exactly one focused question with AskUserQuestion.\n"
 +            "Target missing outcome, scope, or decision-boundary information.\n"
 +            "Do not propose solutions yet.\n\n"
 +            f"Task: {task}"
 +        )
 +        response = await self._complete_in_mode(
 +            prompt=prompt,
 +            tools=[ask_tool.to_schema()],
 +            max_tokens=300,
 +        )
 +        tool_call = next(
 +            (
 +                tool
 +                for tool in response.tool_calls
 +                if tool.name == "AskUserQuestion"
 +            ),
 +            None,
 +        )
 +        if tool_call is None:
 +            tool_call = ToolCall(
 +                id="clarify-question-1",
 +                name="AskUserQuestion",
 +                arguments={
 +                    "question": self._fallback_clarify_question(task, response.content),
 +                },
 +            )
++
 +        assistant_message = Message(
 +            role=Role.ASSISTANT,
 +            content=response.content or tool_call.arguments.get("question", ""),
 +            tool_calls=[tool_call],
 +        )
 +        self.agent.session.append(assistant_message)
 +        summary.assistant_messages.append(assistant_message)
++
 +        await emit(
 +            AgentEvent(
 +                type="tool_call",
 +                tool_name=tool_call.name,
 +                tool_args=tool_call.arguments,
 +                phase="clarify",
 +            )
 +        )
 +        assert self.executor is not None
 +        outcome = await self.executor.execute_tool_call(
 +            tool_call,
 +            on_confirmation=None,
 +            on_user_question=on_user_question,
 +            emit_confirmation=None,
 +            source="clarify",
 +            skip_duplicate_check=True,
 +            record_action=False,
 +            skip_confirmation=True,
 +        )
 +        await emit(
 +            AgentEvent(
 +                type="tool_result",
 +                content=outcome.event_content,
 +                tool_name=tool_call.name,
 +                is_error=outcome.is_error,
 +                phase="clarify",
 +            )
 +        )
 +        self.agent.session.append(outcome.message)
 +        summary.tool_result_messages.append(outcome.message)
++
 +        question = str(tool_call.arguments.get("question", "")).strip()
 +        answer = ""
 +        if outcome.registry_result is not None:
 +            answer = str(outcome.registry_result.metadata.get("answer", "")).strip()
++
 +        brief_prompt = (
 +            "Write a concise task brief in markdown using these exact sections:\n"
 +            "## Task Statement\n"
 +            "## Desired Outcome\n"
 +            "## In Scope\n"
 +            "## Non Goals\n"
 +            "## Decision Boundaries\n"
 +            "## Constraints\n"
 +            "## Likely Touchpoints\n"
 +            "## Assumptions\n"
 +            "## Acceptance Criteria\n\n"
 +            "Use short bullet lists when helpful. Do not start implementing.\n\n"
 +            f"Task: {task}\n"
 +            f"Question: {question}\n"
 +            f"Answer: {answer or 'No answer provided.'}"
 +        )
 +        brief_response = await self._complete_in_mode(
 +            prompt=brief_prompt,
 +            tools=None,
 +            max_tokens=900,
 +            temperature=0.1,
 +        )
 +        brief = (
 +            ClarifyBrief.from_markdown(
 +                brief_response.content,
 +                task_statement=task,
 +                question=question,
 +                answer=answer,
 +            )
 +            if brief_response.content.strip()
 +            else ClarifyBrief.fallback(
 +                task_statement=task,
 +                question=question,
 +                answer=answer,
 +            )
 +        )
 +        brief_path = self.artifact_store.write_brief(task, brief)
 +        dod.clarify_brief = str(brief_path)
 +        dod.acceptance_criteria = list(dict.fromkeys(brief.acceptance_criteria))
 +        self.dod_store.save(dod)
 +        await self._emit_artifact(
 +            emit=emit,
 +            kind="clarify_brief",
 +            path=brief_path,
 +            preview=(
 +                f"Clarify brief: {brief_path}\n"
 +                f"Outcome: {brief.desired_outcome[0]}"
 +            ),
 +        )
++
 +    async def _run_plan_mode(
 +        self,
 +        *,
 +        task: str,
 +        dod: DefinitionOfDone,
 +        emit: EventSink,
 +        summary: TurnSummary,
 +        on_confirmation: ConfirmationHandler,
 +        on_user_question: UserQuestionHandler,
 +    ) -> None:
 +        prompt = (
 +            "Produce two markdown planning artifacts separated by the exact line "
 +            f"`{VERIFICATION_SEPARATOR}`.\n\n"
 +            "Before the separator, write an Implementation Plan with these sections:\n"
 +            "## File Changes\n"
 +            "## Execution Order\n"
 +            "## Risks\n\n"
 +            "After the separator, write a Verification Plan with these sections:\n"
 +            "## Acceptance Criteria\n"
 +            "## Verification Commands\n"
 +            "## Notes\n\n"
 +            "Do not start writing code.\n\n"
 +            f"Task: {task}"
 +        )
 +        response = await self._complete_in_mode(
 +            prompt=prompt,
 +            tools=None,
 +            max_tokens=1400,
 +            temperature=0.2,
 +        )
 +        artifacts = (
 +            PlanningArtifacts.from_model_output(
 +                response.content,
 +                task_statement=task,
 +            )
 +            if response.content.strip()
 +            else PlanningArtifacts.fallback(task_statement=task)
 +        )
 +        implementation_path, verification_path = self.artifact_store.write_plan(
 +            task,
 +            artifacts,
 +        )
 +        dod.implementation_plan = str(implementation_path)
 +        dod.verification_plan = str(verification_path)
 +        dod.acceptance_criteria = list(
 +            dict.fromkeys(dod.acceptance_criteria + artifacts.acceptance_criteria)
 +        )
 +        if artifacts.verification_commands:
 +            dod.verification_commands = artifacts.verification_commands
 +        self.dod_store.save(dod)
 +        await self._emit_artifact(
 +            emit=emit,
 +            kind="implementation_plan",
 +            path=implementation_path,
 +            preview=(
 +                f"Implementation plan: {implementation_path}\n"
 +                f"Steps: {len(artifacts.implementation_steps)}"
 +            ),
 +        )
 +        await self._emit_artifact(
 +            emit=emit,
 +            kind="verification_plan",
 +            path=verification_path,
 +            preview=(
 +                f"Verification plan: {verification_path}\n"
 +                f"Commands: {len(artifacts.verification_commands)}"
 +            ),
 +        )
 +        await self._seed_todos_from_plan(
 +            artifacts=artifacts,
 +            dod=dod,
 +            emit=emit,
 +        )
++
 +    async def _seed_todos_from_plan(
 +        self,
 +        *,
 +        artifacts: PlanningArtifacts,
 +        dod: DefinitionOfDone,
 +        emit: EventSink,
 +    ) -> None:
 +        if not artifacts.implementation_steps:
 +            return
++
 +        todos = [
 +            {
 +                "content": step,
 +                "active_form": f"Working on: {step}",
 +                "status": "pending",
 +            }
 +            for step in artifacts.implementation_steps[:8]
 +        ]
 +        tool_call = ToolCall(
 +            id="plan-todos-1",
 +            name="TodoWrite",
 +            arguments={"todos": todos},
 +        )
 +        await emit(
 +            AgentEvent(
 +                type="tool_call",
 +                tool_name=tool_call.name,
 +                tool_args=tool_call.arguments,
 +                phase="plan",
 +            )
 +        )
 +        assert self.executor is not None
 +        outcome = await self.executor.execute_tool_call(
 +            tool_call,
 +            on_confirmation=None,
 +            on_user_question=None,
 +            emit_confirmation=None,
 +            source="plan",
 +            skip_duplicate_check=True,
 +            record_action=False,
 +            skip_confirmation=True,
 +        )
 +        await emit(
 +            AgentEvent(
 +                type="tool_result",
 +                content=outcome.event_content,
 +                tool_name=tool_call.name,
 +                is_error=outcome.is_error,
 +                phase="plan",
 +            )
 +        )
 +        if outcome.registry_result is not None:
 +            new_todos = outcome.registry_result.metadata.get("new_todos", [])
 +            if isinstance(new_todos, list):
 +                sync_todos_to_definition_of_done(dod, new_todos)
 +                self.dod_store.save(dod)
++
 +    @staticmethod
 +    def _artifact_exists(path_str: str | None) -> bool:
 +        return bool(path_str and Path(path_str).exists())
++
 +    @staticmethod
 +    def _fallback_clarify_question(task: str, response_content: str) -> str:
 +        match = re.search(r"([A-Z][^?]+\?)", response_content)
 +        if match:
 +            return match.group(1).strip()
 +        return (
 +            "What outcome matters most here, and what should stay out of scope?"
 +            if task.strip()
 +            else "What outcome matters most?"
 +        )
++
      async def _run_definition_of_done_gate(
          self,
          *,
              dod.pending_items.remove(implementation_item)
              dod.completed_items.append(implementation_item)
 +        tracked_pending_items = [
 +            item
 +            for item in dod.pending_items
 +            if item != "Collect verification evidence"
 +        ]
++
          mutating_paths = [path for path in dod.touched_files if path]
          requires_verification = bool(mutating_paths or dod.mutating_actions)
 +        if tracked_pending_items and not requires_verification:
 +            pending_text = "\n".join(f"- {item}" for item in tracked_pending_items)
 +            self.dod_store.save(dod)
 +            await self._emit_dod_status(emit, dod)
 +            self.agent.session.append(
 +                Message(
 +                    role=Role.USER,
 +                    content=(
 +                        "[PENDING WORK REMAINS]\n"
 +                        "The tracked work items are not complete yet:\n"
 +                        f"{pending_text}\n\n"
 +                        "Continue the task, and update TodoWrite as you make progress."
 +                    ),
 +                )
 +            )
 +            return CompletionGateResult(should_continue=True, final_response="")
++
          if not requires_verification:
              dod.status = "done"
              dod.last_verification_result = "skipped"
          if verify_item not in dod.pending_items and verify_item not in dod.completed_items:
              dod.pending_items.append(verify_item)
 +        if not dod.verification_commands and dod.verification_plan and Path(dod.verification_plan).exists():
 +            dod.verification_commands = extract_verification_commands_from_markdown(
 +                Path(dod.verification_plan).read_text()
 +            )
++
          if not dod.verification_commands:
              dod.verification_commands = derive_verification_commands(
                  dod,
                  task_statement=dod.task_statement,
+             )
 +        await self._set_workflow_mode(
 +            WorkflowMode.VERIFY,
 +            dod=dod,
 +            emit=emit,
 +            summary=summary,
 +            reason="definition-of-done gate requires verification",
 +        )
          verification_passed = await self._verify_definition_of_done(
              dod=dod,
              emit=emit,
                  dod.pending_items.remove(verify_item)
              if verify_item not in dod.completed_items:
                  dod.completed_items.append(verify_item)
 +            for pending in list(dod.pending_items):
 +                if pending not in dod.completed_items:
 +                    dod.completed_items.append(pending)
 +            dod.pending_items = []
              dod.status = "done"
              dod.last_verification_result = "passed"
              dod.confidence = "high"
          dod.confidence = "medium"
          self.dod_store.save(dod)
          await self._emit_dod_status(emit, dod)
 +        await self._set_workflow_mode(
 +            WorkflowMode.EXECUTE,
 +            dod=dod,
 +            emit=emit,
 +            summary=summary,
 +            reason="verification failed; returning to execute for fixes",
 +        )
          failure_prompt = (
              "[DEFINITION OF DONE CHECK FAILED]\n"
              f"Task: {dod.task_statement}\n"

src/loader/runtime/dod.pymodified

      line_changes: int = 0
      storage_path: str | None = None
      last_verification_result: str | None = None
 +    current_mode: str = "execute"
 +    mode_history: list[str] = field(default_factory=list)
 +    clarify_brief: str | None = None
 +    implementation_plan: str | None = None
 +    verification_plan: str | None = None
      def to_dict(self) -> dict[str, Any]:
          """Serialize the DoD state for persistence."""
              line_changes=int(data.get("line_changes", 0)),
              storage_path=data.get("storage_path"),
              last_verification_result=data.get("last_verification_result"),
 +            current_mode=data.get("current_mode", "execute"),
 +            mode_history=list(data.get("mode_history", [])),
 +            clarify_brief=data.get("clarify_brief"),
 +            implementation_plan=data.get("implementation_plan"),
 +            verification_plan=data.get("verification_plan"),
+         )

src/loader/runtime/events.pymodified

      dod_status: str | None = None
      pending_items_count: int | None = None
      last_verification_result: str | None = None
 +    workflow_mode: str | None = None
 +    artifact_kind: str | None = None
 +    artifact_path: str | None = None
      decomposition: TaskDecomposition | None = None
      subtask: Subtask | None = None
      usage: dict[str, int] = field(default_factory=dict)
      trace: list[RuntimeTraceEvent] = field(default_factory=list)
      definition_of_done: DefinitionOfDone | None = None
 +    workflow_mode: str | None = None

src/loader/runtime/workflow.pyadded

 +"""Workflow routing and artifact persistence for Loader runtime modes."""
++
 +from __future__ import annotations
++
 +import re
 +from dataclasses import dataclass, field
 +from datetime import UTC, datetime
 +from enum import StrEnum
 +from pathlib import Path
++
 +from .dod import slugify
++
 +VERIFICATION_SEPARATOR = "<<<VERIFICATION>>>"
++
 +_SECTION_ALIASES = {
 +    "task statement": "task_statement",
 +    "desired outcome": "desired_outcome",
 +    "in scope": "in_scope",
 +    "out of scope": "non_goals",
 +    "out of scope non goals": "non_goals",
 +    "out of scope or non goals": "non_goals",
 +    "non goals": "non_goals",
 +    "non-goals": "non_goals",
 +    "decision boundaries": "decision_boundaries",
 +    "constraints": "constraints",
 +    "likely touchpoints": "likely_touchpoints",
 +    "assumptions": "assumptions",
 +    "acceptance criteria": "acceptance_criteria",
 +    "file changes": "file_changes",
 +    "execution order": "execution_order",
 +    "risks": "risks",
 +    "verification commands": "verification_commands",
 +    "commands": "verification_commands",
 +    "notes": "notes",
 +}
++
++
 +class WorkflowMode(StrEnum):
 +    """High-level runtime modes for one Loader task turn."""
++
 +    CLARIFY = "clarify"
 +    PLAN = "plan"
 +    EXECUTE = "execute"
 +    VERIFY = "verify"
++
 +    @classmethod
 +    def from_str(cls, value: str | None) -> WorkflowMode | None:
 +        if value is None:
 +            return None
 +        normalized = value.strip().lower()
 +        for mode in cls:
 +            if mode.value == normalized:
 +                return mode
 +        raise ValueError(f"Unknown workflow mode: {value}")
++
++
 +@dataclass(slots=True)
 +class ModeDecision:
 +    """Router output for the entry point of a task turn."""
++
 +    mode: WorkflowMode
 +    reason: str
 +    ambiguity_score: float = 0.0
 +    complexity_score: float = 0.0
++
++
 +@dataclass(slots=True)
 +class ClarifyBrief:
 +    """Execution-ready brief created from one clarify round."""
++
 +    task_statement: str
 +    desired_outcome: list[str] = field(default_factory=list)
 +    in_scope: list[str] = field(default_factory=list)
 +    non_goals: list[str] = field(default_factory=list)
 +    decision_boundaries: list[str] = field(default_factory=list)
 +    constraints: list[str] = field(default_factory=list)
 +    likely_touchpoints: list[str] = field(default_factory=list)
 +    assumptions: list[str] = field(default_factory=list)
 +    acceptance_criteria: list[str] = field(default_factory=list)
 +    question: str | None = None
 +    answer: str | None = None
++
 +    @classmethod
 +    def from_markdown(
 +        cls,
 +        markdown: str,
 +        *,
 +        task_statement: str,
 +        question: str | None = None,
 +        answer: str | None = None,
 +    ) -> ClarifyBrief:
 +        sections = _parse_markdown_sections(markdown)
 +        brief = cls(
 +            task_statement=_first_item(sections.get("task_statement")) or task_statement,
 +            desired_outcome=sections.get("desired_outcome", []),
 +            in_scope=sections.get("in_scope", []),
 +            non_goals=sections.get("non_goals", []),
 +            decision_boundaries=sections.get("decision_boundaries", []),
 +            constraints=sections.get("constraints", []),
 +            likely_touchpoints=sections.get("likely_touchpoints", []),
 +            assumptions=sections.get("assumptions", []),
 +            acceptance_criteria=sections.get("acceptance_criteria", []),
 +            question=question,
 +            answer=answer,
 +        )
 +        brief.fill_defaults()
 +        return brief
++
 +    @classmethod
 +    def fallback(
 +        cls,
 +        *,
 +        task_statement: str,
 +        question: str,
 +        answer: str,
 +    ) -> ClarifyBrief:
 +        brief = cls(
 +            task_statement=task_statement,
 +            desired_outcome=[answer or "Clarify the intended outcome before implementation."],
 +            in_scope=[task_statement],
 +            non_goals=["Anything not confirmed in the clarification answer."],
 +            decision_boundaries=["Escalate if the clarified scope changes materially."],
 +            constraints=["Honor the clarified answer and existing repository conventions."],
 +            likely_touchpoints=["Determine the concrete files during execution."],
 +            assumptions=[f"Clarification answer: {answer or 'No answer provided.'}"],
 +            question=question,
 +            answer=answer,
 +        )
 +        brief.fill_defaults()
 +        return brief
++
 +    def fill_defaults(self) -> None:
 +        if not self.desired_outcome:
 +            self.desired_outcome = [self.task_statement]
 +        if not self.in_scope:
 +            self.in_scope = [self.task_statement]
 +        if not self.non_goals:
 +            self.non_goals = ["Do not expand beyond the clarified task statement."]
 +        if not self.decision_boundaries:
 +            self.decision_boundaries = [
 +                "Escalate for destructive or preference-dependent changes.",
 +            ]
 +        if not self.constraints:
 +            self.constraints = ["Preserve the existing codebase conventions and tests."]
 +        if not self.likely_touchpoints:
 +            self.likely_touchpoints = ["Identify exact files during planning or execution."]
 +        if not self.assumptions:
 +            self.assumptions = ["Unspecified details stay unchanged unless evidence says otherwise."]
 +        if not self.acceptance_criteria:
 +            self.acceptance_criteria = list(
 +                dict.fromkeys(self.desired_outcome + self.in_scope[:2])
 +            )
++
 +    def to_markdown(self) -> str:
 +        lines = [
 +            "# Task Brief",
 +            "",
 +            f"Generated: {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%SZ')}",
 +            "",
 +            "## Task Statement",
 +            self.task_statement,
 +            "",
 +        ]
 +        lines.extend(_render_section("Desired Outcome", self.desired_outcome))
 +        lines.extend(_render_section("In Scope", self.in_scope))
 +        lines.extend(_render_section("Non Goals", self.non_goals))
 +        lines.extend(_render_section("Decision Boundaries", self.decision_boundaries))
 +        lines.extend(_render_section("Constraints", self.constraints))
 +        lines.extend(_render_section("Likely Touchpoints", self.likely_touchpoints))
 +        lines.extend(_render_section("Assumptions", self.assumptions))
 +        lines.extend(_render_section("Acceptance Criteria", self.acceptance_criteria))
 +        if self.question:
 +            lines.extend(_render_section("Clarify Question", [self.question]))
 +        if self.answer:
 +            lines.extend(_render_section("Clarify Answer", [self.answer]))
 +        return "\n".join(lines).rstrip() + "\n"
++
++
 +@dataclass(slots=True)
 +class PlanningArtifacts:
 +    """Persistent planning artifacts created before execution."""
++
 +    implementation_markdown: str
 +    verification_markdown: str
 +    verification_commands: list[str]
 +    acceptance_criteria: list[str]
 +    implementation_steps: list[str]
++
 +    @classmethod
 +    def from_model_output(
 +        cls,
 +        model_output: str,
 +        *,
 +        task_statement: str,
 +    ) -> PlanningArtifacts:
 +        implementation_markdown, verification_markdown = _split_plan_output(model_output)
 +        implementation_sections = _parse_markdown_sections(implementation_markdown)
 +        verification_sections = _parse_markdown_sections(verification_markdown)
++
 +        implementation_steps = (
 +            implementation_sections.get("execution_order", [])
 +            or implementation_sections.get("file_changes", [])
 +        )
 +        if not implementation_steps:
 +            implementation_steps = [task_statement]
++
 +        verification_commands = _extract_commands(
 +            verification_sections.get("verification_commands", [])
 +        )
 +        acceptance_criteria = (
 +            verification_sections.get("acceptance_criteria", [])
 +            or implementation_sections.get("acceptance_criteria", [])
 +        )
 +        if not acceptance_criteria:
 +            acceptance_criteria = [task_statement]
++
 +        return cls(
 +            implementation_markdown=_ensure_heading(
 +                implementation_markdown,
 +                "# Implementation Plan",
 +            ),
 +            verification_markdown=_ensure_heading(
 +                verification_markdown,
 +                "# Verification Plan",
 +            ),
 +            verification_commands=verification_commands,
 +            acceptance_criteria=acceptance_criteria,
 +            implementation_steps=implementation_steps,
 +        )
++
 +    @classmethod
 +    def fallback(
 +        cls,
 +        *,
 +        task_statement: str,
 +    ) -> PlanningArtifacts:
 +        implementation_markdown = "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## File Changes",
 +                f"- Determine concrete files needed for: {task_statement}",
 +                "",
 +                "## Execution Order",
 +                f"1. Inspect the codebase areas relevant to: {task_statement}",
 +                "2. Apply the minimum required changes.",
 +                "3. Re-run the most relevant verification commands.",
 +                "",
 +                "## Risks",
 +                "- Unknown repository conventions may require one discovery pass first.",
 +                "",
 +            ]
 +        )
 +        verification_markdown = "\n".join(
 +            [
 +                "# Verification Plan",
 +                "",
 +                "## Acceptance Criteria",
 +                f"- {task_statement}",
 +                "",
 +                "## Verification Commands",
 +                "- echo \"add verification command\"",
 +                "",
 +                "## Notes",
 +                "- Replace the placeholder verification command with a project-specific check.",
 +                "",
 +            ]
 +        )
 +        return cls(
 +            implementation_markdown=implementation_markdown,
 +            verification_markdown=verification_markdown,
 +            verification_commands=["echo \"add verification command\""],
 +            acceptance_criteria=[task_statement],
 +            implementation_steps=[
 +                f"Inspect the codebase areas relevant to: {task_statement}",
 +                "Apply the minimum required changes.",
 +                "Re-run the most relevant verification commands.",
 +            ],
 +        )
++
++
 +class WorkflowArtifactStore:
 +    """Persist briefs and plans under `.loader/`."""
++
 +    def __init__(self, project_root: Path) -> None:
 +        self.project_root = project_root
 +        self.loader_root = project_root / ".loader"
 +        self.briefs_root = self.loader_root / "briefs"
 +        self.plans_root = self.loader_root / "plans"
++
 +    def write_brief(self, task_statement: str, brief: ClarifyBrief) -> Path:
 +        path = self.briefs_root / f"{_timestamp()}-{slugify(task_statement)}.md"
 +        path.parent.mkdir(parents=True, exist_ok=True)
 +        path.write_text(brief.to_markdown())
 +        return path
++
 +    def write_plan(
 +        self,
 +        task_statement: str,
 +        artifacts: PlanningArtifacts,
 +    ) -> tuple[Path, Path]:
 +        plan_root = self.plans_root / f"{_timestamp()}-{slugify(task_statement)}"
 +        plan_root.mkdir(parents=True, exist_ok=True)
 +        implementation_path = plan_root / "implementation.md"
 +        verification_path = plan_root / "verification.md"
 +        implementation_path.write_text(artifacts.implementation_markdown.rstrip() + "\n")
 +        verification_path.write_text(artifacts.verification_markdown.rstrip() + "\n")
 +        return implementation_path, verification_path
++
++
 +class ModeRouter:
 +    """Simple heuristic router for clarify/plan/execute entry modes."""
++
 +    clarify_threshold = 0.55
 +    plan_threshold = 0.45
++
 +    def route(
 +        self,
 +        task: str,
 +        *,
 +        requested_mode: WorkflowMode | None = None,
 +        has_brief: bool = False,
 +        has_plan: bool = False,
 +        allow_clarify: bool = True,
 +    ) -> ModeDecision:
 +        if requested_mode is not None:
 +            return ModeDecision(
 +                mode=requested_mode,
 +                reason=f"explicit {requested_mode.value} request",
 +            )
++
 +        if has_plan:
 +            return ModeDecision(
 +                mode=WorkflowMode.EXECUTE,
 +                reason="reusing existing plan artifacts",
 +            )
++
 +        ambiguity = self._ambiguity_score(task)
 +        complexity = self._complexity_score(task)
++
 +        if allow_clarify and not has_brief and ambiguity >= self.clarify_threshold:
 +            return ModeDecision(
 +                mode=WorkflowMode.CLARIFY,
 +                reason="prompt is broad or missing boundaries",
 +                ambiguity_score=ambiguity,
 +                complexity_score=complexity,
 +            )
++
 +        if complexity >= self.plan_threshold:
 +            return ModeDecision(
 +                mode=WorkflowMode.PLAN,
 +                reason="task looks complex enough to benefit from a persisted plan",
 +                ambiguity_score=ambiguity,
 +                complexity_score=complexity,
 +            )
++
 +        return ModeDecision(
 +            mode=WorkflowMode.EXECUTE,
 +            reason="task appears concrete enough for direct execution",
 +            ambiguity_score=ambiguity,
 +            complexity_score=complexity,
 +        )
++
 +    def _ambiguity_score(self, task: str) -> float:
 +        lowered = task.lower()
 +        words = re.findall(r"\w+", lowered)
 +        score = 0.0
++
 +        if (
 +            "--clarify" in lowered
 +            or "don't assume" in lowered
 +            or "do not assume" in lowered
 +            or "not sure" in lowered
 +            or "figure out" in lowered
 +            or "interview me" in lowered
 +            or "ask me" in lowered
 +            or lowered.startswith("clarify ")
 +        ):
 +            score += 0.65
++
 +        if any(
 +            phrase in lowered
 +            for phrase in (
 +                "something",
 +                "somehow",
 +                "better",
 +                "improve",
 +                "fix this",
 +                "make it",
 +                "more like",
 +                "feels more like",
 +            )
 +        ):
 +            score += 0.2
++
 +        if not _has_concrete_anchor(task):
 +            score += 0.2
++
 +        if len(words) <= 12 and any(
 +            verb in lowered
 +            for verb in ("build", "add", "improve", "refactor", "implement")
 +        ):
 +            score += 0.15
++
 +        return min(score, 1.0)
++
 +    def _complexity_score(self, task: str) -> float:
 +        lowered = task.lower()
 +        words = re.findall(r"\w+", lowered)
 +        score = 0.0
++
 +        if len(words) >= 18:
 +            score += 0.2
 +        if len(words) >= 30:
 +            score += 0.15
++
 +        if any(
 +            phrase in lowered
 +            for phrase in (
 +                "refactor",
 +                "architecture",
 +                "migrate",
 +                "persistent",
 +                "workflow",
 +                "deep dive",
 +                "report",
 +                "implementation plan",
 +                "verification plan",
 +            )
 +        ):
 +            score += 0.3
++
 +        if lowered.count(" and ") >= 2 or lowered.count(",") >= 2:
 +            score += 0.15
++
 +        if _has_concrete_anchor(task):
 +            score += 0.1
++
 +        return min(score, 1.0)
++
++
 +def load_brief(path: Path) -> ClarifyBrief:
 +    """Load a clarify brief from disk."""
++
 +    return ClarifyBrief.from_markdown(path.read_text(), task_statement=path.stem)
++
++
 +def load_planning_artifacts(
 +    implementation_path: Path,
 +    verification_path: Path,
 +    *,
 +    task_statement: str,
 +) -> PlanningArtifacts:
 +    """Load persisted planning artifacts from disk."""
++
 +    combined = (
 +        implementation_path.read_text().rstrip()
 +        + "\n\n"
 +        + VERIFICATION_SEPARATOR
 +        + "\n\n"
 +        + verification_path.read_text().rstrip()
 +    )
 +    return PlanningArtifacts.from_model_output(combined, task_statement=task_statement)
++
++
 +def sync_todos_to_definition_of_done(
 +    dod,
 +    todos: list[dict[str, str]],
 +) -> None:
 +    """Reflect todo state into DoD pending/completed items."""
++
 +    special_pending = [
 +        item for item in dod.pending_items if item in {"Complete the requested work", "Collect verification evidence"}
 +    ]
 +    special_completed = [
 +        item for item in dod.completed_items if item in {"Complete the requested work", "Collect verification evidence"}
 +    ]
++
 +    pending: list[str] = []
 +    completed: list[str] = []
 +    for item in todos:
 +        status = str(item.get("status", "")).strip().lower()
 +        label = str(
 +            item.get("active_form") if status == "in_progress" else item.get("content", "")
 +        ).strip()
 +        if not label:
 +            continue
 +        if status == "completed":
 +            completed.append(str(item.get("content", label)).strip())
 +        else:
 +            pending.append(label)
++
 +    dod.pending_items = list(dict.fromkeys(pending + special_pending))
 +    dod.completed_items = list(dict.fromkeys(completed + special_completed))
++
++
 +def extract_verification_commands_from_markdown(markdown: str) -> list[str]:
 +    """Extract verification commands from a verification-plan markdown document."""
++
 +    sections = _parse_markdown_sections(markdown)
 +    return _extract_commands(sections.get("verification_commands", []))
++
++
 +def build_execute_bridge(
 +    brief_path: Path | None,
 +    implementation_path: Path | None,
 +    verification_path: Path | None,
 +) -> str | None:
 +    """Build a compact execution bridge message from persisted artifacts."""
++
 +    parts: list[str] = []
 +    if brief_path and brief_path.exists():
 +        parts.append(
 +            "Use the clarify brief below as the requirements source of truth.\n\n"
 +            + brief_path.read_text().strip()
 +        )
 +    if implementation_path and implementation_path.exists():
 +        parts.append(
 +            "Use the implementation plan below to sequence the work.\n\n"
 +            + implementation_path.read_text().strip()
 +        )
 +    if verification_path and verification_path.exists():
 +        parts.append(
 +            "Use the verification plan below to determine done-ness.\n\n"
 +            + verification_path.read_text().strip()
 +        )
 +    if not parts:
 +        return None
 +    return "\n\n".join(parts)
++
++
 +def _split_plan_output(model_output: str) -> tuple[str, str]:
 +    if VERIFICATION_SEPARATOR in model_output:
 +        implementation, verification = model_output.split(VERIFICATION_SEPARATOR, maxsplit=1)
 +        return implementation.strip(), verification.strip()
 +    return model_output.strip(), ""
++
++
 +def _ensure_heading(markdown: str, heading: str) -> str:
 +    stripped = markdown.strip()
 +    if not stripped:
 +        return heading + "\n"
 +    if stripped.startswith("#"):
 +        return stripped + "\n"
 +    return f"{heading}\n\n{stripped}\n"
++
++
 +def _timestamp() -> str:
 +    return datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ")
++
++
 +def _normalize_heading(text: str) -> str:
 +    cleaned = re.sub(r"[^a-z0-9]+", " ", text.lower()).strip()
 +    return _SECTION_ALIASES.get(cleaned, cleaned.replace(" ", "_"))
++
++
 +def _parse_markdown_sections(markdown: str) -> dict[str, list[str]]:
 +    sections: dict[str, list[str]] = {}
 +    current_key: str | None = None
 +    for line in markdown.splitlines():
 +        heading = re.match(r"^##+\s+(.+?)\s*$", line.strip())
 +        if heading:
 +            current_key = _normalize_heading(heading.group(1))
 +            sections.setdefault(current_key, [])
 +            continue
 +        if current_key is None:
 +            continue
 +        sections[current_key].append(line.rstrip())
 +    return {
 +        key: _extract_items(lines)
 +        for key, lines in sections.items()
 +    }
++
++
 +def _extract_items(lines: list[str]) -> list[str]:
 +    items: list[str] = []
 +    paragraph_buffer: list[str] = []
 +    for line in lines:
 +        stripped = line.strip()
 +        if not stripped:
 +            if paragraph_buffer:
 +                items.append(" ".join(paragraph_buffer).strip())
 +                paragraph_buffer.clear()
 +            continue
++
 +        bullet = re.match(r"^(?:[-*]|\d+\.)\s+(.+)$", stripped)
 +        if bullet:
 +            if paragraph_buffer:
 +                items.append(" ".join(paragraph_buffer).strip())
 +                paragraph_buffer.clear()
 +            items.append(bullet.group(1).strip())
 +            continue
 +        paragraph_buffer.append(stripped)
 +    if paragraph_buffer:
 +        items.append(" ".join(paragraph_buffer).strip())
 +    return [item for item in items if item]
++
++
 +def _render_section(title: str, items: list[str]) -> list[str]:
 +    lines = [f"## {title}"]
 +    if items:
 +        lines.extend(f"- {item}" for item in items)
 +    else:
 +        lines.append("- None recorded.")
 +    lines.append("")
 +    return lines
++
++
 +def _first_item(items: list[str] | None) -> str | None:
 +    if not items:
 +        return None
 +    return items[0]
++
++
 +def _extract_commands(items: list[str]) -> list[str]:
 +    commands: list[str] = []
 +    for item in items:
 +        match = re.match(r"^`(.+)`$", item)
 +        commands.append((match.group(1) if match else item).strip())
 +    return [command for command in commands if command]
++
++
 +def _has_concrete_anchor(task: str) -> bool:
 +    return any(
 +        re.search(pattern, task)
 +        for pattern in (
 +            r"[./][\w./-]+",  # file path
 +            r"#\d+",  # issue/pr number
 +            r"\b[a-z]+[A-Z][A-Za-z0-9_]+\b",  # camelCase
 +            r"\b[A-Z][a-z0-9]+[A-Z][A-Za-z0-9_]+\b",  # PascalCase symbol
 +            r"\b[a-z0-9]+_[a-z0-9_]+\b",  # snake_case
 +            r"```",  # code block
 +            r"\bpytest\b|\bnpm test\b|\bcargo test\b|\bmypy\b|\bruff\b",
 +            r"\bacceptance criteria\b",
 +            r"\bTypeError\b|\bAssertionError\b|\bTraceback\b",
 +        )
 +    )

tests/test_workflow.pyadded

 +"""Tests for Sprint 04 workflow routing and artifact persistence."""
++
 +from __future__ import annotations
++
 +from pathlib import Path
++
 +from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
 +from loader.runtime.workflow import (
 +    ClarifyBrief,
 +    ModeRouter,
 +    PlanningArtifacts,
 +    WorkflowArtifactStore,
 +    WorkflowMode,
 +    build_execute_bridge,
 +    extract_verification_commands_from_markdown,
 +    sync_todos_to_definition_of_done,
 +)
++
++
 +def test_mode_router_routes_ambiguous_prompt_to_clarify() -> None:
 +    router = ModeRouter()
++
 +    decision = router.route("Improve Loader so it feels more like claw-code.")
++
 +    assert decision.mode == WorkflowMode.CLARIFY
 +    assert decision.ambiguity_score >= router.clarify_threshold
++
++
 +def test_mode_router_routes_complex_prompt_to_plan() -> None:
 +    router = ModeRouter()
++
 +    decision = router.route(
 +        "Implement a persistent workflow mode router with clarify artifacts, "
 +        "planning artifacts, and verification-plan wiring in the runtime."
 +    )
++
 +    assert decision.mode == WorkflowMode.PLAN
 +    assert decision.complexity_score >= router.plan_threshold
++
++
 +def test_mode_router_routes_simple_prompt_to_execute() -> None:
 +    router = ModeRouter()
++
 +    decision = router.route("Read pyproject.toml and tell me the package name.")
++
 +    assert decision.mode == WorkflowMode.EXECUTE
++
++
 +def test_clarify_brief_round_trips_and_seeds_acceptance_criteria() -> None:
 +    brief = ClarifyBrief.fallback(
 +        task_statement="Clarify the authentication change.",
 +        question="What outcome matters most?",
 +        answer="Add login without touching the signup flow.",
 +    )
++
 +    loaded = ClarifyBrief.from_markdown(
 +        brief.to_markdown(),
 +        task_statement=brief.task_statement,
 +        question=brief.question,
 +        answer=brief.answer,
 +    )
++
 +    assert loaded.task_statement == brief.task_statement
 +    assert "Add login" in loaded.acceptance_criteria[0]
 +    assert loaded.non_goals
++
++
 +def test_planning_artifacts_round_trip_and_extract_commands() -> None:
 +    artifacts = PlanningArtifacts.from_model_output(
 +        "\n".join(
 +            [
 +                "# Implementation Plan",
 +                "",
 +                "## Execution Order",
 +                "1. Inspect auth files.",
 +                "2. Implement the change.",
 +                "",
 +                "## Risks",
 +                "- Regression in signup.",
 +                "",
 +                "<<<VERIFICATION>>>",
 +                "",
 +                "# Verification Plan",
 +                "",
 +                "## Acceptance Criteria",
 +                "- Login works without changing signup.",
 +                "",
 +                "## Verification Commands",
 +                "- `uv run pytest tests/test_auth.py -q`",
 +                "- `uv run mypy src/loader`",
 +            ]
 +        ),
 +        task_statement="Clarify and implement the auth change.",
 +    )
++
 +    assert artifacts.implementation_steps[:2] == [
 +        "Inspect auth files.",
 +        "Implement the change.",
 +    ]
 +    assert artifacts.acceptance_criteria == ["Login works without changing signup."]
 +    assert artifacts.verification_commands == [
 +        "uv run pytest tests/test_auth.py -q",
 +        "uv run mypy src/loader",
 +    ]
 +    assert extract_verification_commands_from_markdown(artifacts.verification_markdown) == [
 +        "uv run pytest tests/test_auth.py -q",
 +        "uv run mypy src/loader",
 +    ]
++
++
 +def test_workflow_artifact_store_and_bridge_round_trip(tmp_path: Path) -> None:
 +    store = WorkflowArtifactStore(tmp_path)
 +    brief = ClarifyBrief.fallback(
 +        task_statement="Clarify the runtime changes.",
 +        question="What matters most?",
 +        answer="Close the tool-use gap first.",
 +    )
 +    artifacts = PlanningArtifacts.fallback(task_statement=brief.task_statement)
++
 +    brief_path = store.write_brief(brief.task_statement, brief)
 +    implementation_path, verification_path = store.write_plan(
 +        brief.task_statement,
 +        artifacts,
 +    )
 +    bridge = build_execute_bridge(brief_path, implementation_path, verification_path)
++
 +    assert brief_path.exists()
 +    assert implementation_path.exists()
 +    assert verification_path.exists()
 +    assert bridge is not None
 +    assert "Task Brief" in bridge
 +    assert "Implementation Plan" in bridge
 +    assert "Verification Plan" in bridge
++
++
 +def test_definition_of_done_round_trip_preserves_workflow_links(tmp_path: Path) -> None:
 +    store = DefinitionOfDoneStore(tmp_path)
 +    dod = create_definition_of_done("Implement Loader workflow routing.")
 +    dod.current_mode = "plan"
 +    dod.mode_history = ["clarify", "plan"]
 +    dod.clarify_brief = str(tmp_path / ".loader" / "briefs" / "brief.md")
 +    dod.implementation_plan = str(tmp_path / ".loader" / "plans" / "impl.md")
 +    dod.verification_plan = str(tmp_path / ".loader" / "plans" / "verify.md")
++
 +    saved_path = store.save(dod)
 +    reloaded = store.load(saved_path)
++
 +    assert reloaded.current_mode == "plan"
 +    assert reloaded.mode_history == ["clarify", "plan"]
 +    assert reloaded.clarify_brief == dod.clarify_brief
 +    assert reloaded.implementation_plan == dod.implementation_plan
 +    assert reloaded.verification_plan == dod.verification_plan
++
++
 +def test_sync_todos_to_definition_of_done_preserves_runtime_items() -> None:
 +    dod = create_definition_of_done("Implement Loader workflow routing.")
 +    dod.pending_items.append("Collect verification evidence")
++
 +    sync_todos_to_definition_of_done(
 +        dod,
 +        [
 +            {
 +                "content": "Write router",
 +                "active_form": "Writing router",
 +                "status": "in_progress",
 +            },
 +            {
 +                "content": "Update tests",
 +                "active_form": "Updating tests",
 +                "status": "completed",
 +            },
 +        ],
 +    )
++
 +    assert "Writing router" in dod.pending_items
 +    assert "Collect verification evidence" in dod.pending_items
 +    assert "Update tests" in dod.completed_items

tests/test_workflow_runtime.pyadded

 +"""Runtime integration coverage for Sprint 04 workflow routing."""
++
 +from __future__ import annotations
++
 +from pathlib import Path
++
 +import pytest
++
 +from loader.agent.loop import AgentConfig
 +from loader.llm.base import CompletionResponse, ToolCall
 +from tests.helpers.runtime_harness import ScriptedBackend, run_scenario
++
++
 +def non_streaming_config() -> AgentConfig:
 +    """Shared config for deterministic workflow-mode runtime tests."""
++
 +    return AgentConfig(auto_context=False, stream=False, max_iterations=8)
++
++
 +def workflow_modes(run) -> list[str]:
 +    """Return emitted workflow modes in order."""
++
 +    return [
 +        event.workflow_mode
 +        for event in run.events
 +        if event.type == "workflow_mode" and event.workflow_mode
 +    ]
++
++
 +def artifact_kinds(run) -> list[str]:
 +    """Return emitted artifact kinds in order."""
++
 +    return [
 +        event.artifact_kind
 +        for event in run.events
 +        if event.type == "artifact" and event.artifact_kind
 +    ]
++
++
 +@pytest.mark.asyncio
 +async def test_ambiguous_prompt_routes_to_clarify_and_persists_brief(
 +    temp_dir: Path,
 +) -> None:
 +    backend = ScriptedBackend(
 +        completions=[
 +            CompletionResponse(
 +                content="I need one clarification before I proceed.",
 +                tool_calls=[
 +                    ToolCall(
 +                        id="ask-1",
 +                        name="AskUserQuestion",
 +                        arguments={
 +                            "question": "What should stay out of scope for this Loader improvement?",
 +                        },
 +                    )
 +                ],
 +            ),
 +            CompletionResponse(
 +                content="\n".join(
 +                    [
 +                        "## Task Statement",
 +                        "Improve Loader so it feels more like claw-code.",
 +                        "",
 +                        "## Desired Outcome",
 +                        "- Make Loader more reliable without broad redesign.",
 +                        "",
 +                        "## In Scope",
 +                        "- Tighten the runtime workflow around the user-facing goal.",
 +                        "",
 +                        "## Non Goals",
 +                        "- Rebuild unrelated subsystems.",
 +                        "",
 +                        "## Decision Boundaries",
 +                        "- Escalate before changing unrelated UX patterns.",
 +                        "",
 +                        "## Constraints",
 +                        "- Stay within the current repository.",
 +                        "",
 +                        "## Likely Touchpoints",
 +                        "- Runtime entry points and prompt behavior.",
 +                        "",
 +                        "## Assumptions",
 +                        "- The user wants a narrow runtime-quality improvement.",
 +                        "",
 +                        "## Acceptance Criteria",
 +                        "- The improvement stays focused on runtime behavior.",
 +                    ]
 +                )
 +            ),
 +            CompletionResponse(content="I have the brief and can move forward."),
 +        ]
 +    )
++
 +    async def answer(question: str, options: list[str] | None) -> str:
 +        assert "out of scope" in question.lower()
 +        assert options is None
 +        return "Do not redesign the whole interface."
++
 +    run = await run_scenario(
 +        "Improve Loader so it feels more like claw-code.",
 +        backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +        on_user_question=answer,
 +    )
++
 +    dod = run.agent.last_turn_summary.definition_of_done
 +    assert dod is not None
 +    assert workflow_modes(run)[:2] == ["clarify", "execute"]
 +    assert artifact_kinds(run) == ["clarify_brief"]
 +    assert dod.clarify_brief is not None
 +    assert Path(dod.clarify_brief).exists()
 +    assert "runtime behavior" in dod.acceptance_criteria[0].lower()
 +    assert "## Clarify Mode" in backend.invocations[0].messages[0].content
++
++
 +@pytest.mark.asyncio
 +async def test_complex_prompt_routes_to_plan_and_uses_verification_artifact(
 +    temp_dir: Path,
 +) -> None:
 +    target = temp_dir / "planned.txt"
 +    backend = ScriptedBackend(
 +        completions=[
 +            CompletionResponse(
 +                content="\n".join(
 +                    [
 +                        "# Implementation Plan",
 +                        "",
 +                        "## File Changes",
 +                        f"- Create {target.name} in the workspace root.",
 +                        "",
 +                        "## Execution Order",
 +                        f"1. Write {target.name}.",
 +                        "2. Confirm the file exists.",
 +                        "",
 +                        "## Risks",
 +                        "- Writing the wrong file path.",
 +                        "",
 +                        "<<<VERIFICATION>>>",
 +                        "",
 +                        "# Verification Plan",
 +                        "",
 +                        "## Acceptance Criteria",
 +                        f"- {target.name} exists in the workspace root.",
 +                        "",
 +                        "## Verification Commands",
 +                        f"- `test -f {target}`",
 +                        "",
 +                        "## Notes",
 +                        "- Use a deterministic file existence check.",
 +                    ]
 +                )
 +            ),
 +            CompletionResponse(
 +                content="I'll create the file now.",
 +                tool_calls=[
 +                    ToolCall(
 +                        id="write-1",
 +                        name="write",
 +                        arguments={
 +                            "file_path": str(target),
 +                            "content": "planned output\n",
 +                        },
 +                    )
 +                ],
 +            ),
 +            CompletionResponse(content="The file is in place."),
 +        ]
 +    )
++
 +    run = await run_scenario(
 +        "Implement a persistent workflow mode router with clarify artifacts, "
 +        "planning artifacts, and verification-plan wiring in the runtime.",
 +        backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
++
 +    dod = run.agent.last_turn_summary.definition_of_done
 +    assert dod is not None
 +    assert workflow_modes(run)[:3] == ["plan", "execute", "verify"]
 +    assert artifact_kinds(run) == ["implementation_plan", "verification_plan"]
 +    assert dod.implementation_plan is not None
 +    assert dod.verification_plan is not None
 +    assert Path(dod.implementation_plan).exists()
 +    assert Path(dod.verification_plan).exists()
 +    assert dod.verification_commands == [f"test -f {target}"]
 +    assert "## Plan Mode" in backend.invocations[0].messages[0].content
 +    verify_calls = [
 +        event
 +        for event in run.events
 +        if event.type == "tool_call" and event.phase == "verification"
 +    ]
 +    assert [event.tool_args["command"] for event in verify_calls] == [f"test -f {target}"]
++
++
 +@pytest.mark.asyncio
 +async def test_verify_failure_returns_to_execute_without_retriggering_plan(
 +    temp_dir: Path,
 +) -> None:
 +    target = temp_dir / "retry.txt"
 +    backend = ScriptedBackend(
 +        completions=[
 +            CompletionResponse(
 +                content="\n".join(
 +                    [
 +                        "# Implementation Plan",
 +                        "",
 +                        "## File Changes",
 +                        f"- Create {target.name}.",
 +                        "",
 +                        "## Execution Order",
 +                        f"1. Write {target.name}.",
 +                        "2. Fix it if verification fails.",
 +                        "",
 +                        "## Risks",
 +                        "- Initial content may be wrong.",
 +                        "",
 +                        "<<<VERIFICATION>>>",
 +                        "",
 +                        "# Verification Plan",
 +                        "",
 +                        "## Acceptance Criteria",
 +                        "- The file contains the word fixed.",
 +                        "",
 +                        "## Verification Commands",
 +                        f"- `grep -q fixed {target}`",
 +                        "",
 +                        "## Notes",
 +                        "- Retry if the first write misses the target string.",
 +                    ]
 +                )
 +            ),
 +            CompletionResponse(
 +                content="I'll write the first draft.",
 +                tool_calls=[
 +                    ToolCall(
 +                        id="write-1",
 +                        name="write",
 +                        arguments={
 +                            "file_path": str(target),
 +                            "content": "draft output\n",
 +                        },
 +                    )
 +                ],
 +            ),
 +            CompletionResponse(content="First draft is written."),
 +            CompletionResponse(
 +                content="I'll correct the file.",
 +                tool_calls=[
 +                    ToolCall(
 +                        id="write-2",
 +                        name="write",
 +                        arguments={
 +                            "file_path": str(target),
 +                            "content": "fixed output\n",
 +                        },
 +                    )
 +                ],
 +            ),
 +            CompletionResponse(content="The file now contains the fixed output."),
 +        ]
 +    )
++
 +    run = await run_scenario(
 +        "Implement a persistent workflow mode router with clarify artifacts, "
 +        "planning artifacts, and verification-plan wiring in the runtime.",
 +        backend,
 +        config=non_streaming_config(),
 +        project_root=temp_dir,
 +    )
++
 +    modes = workflow_modes(run)
 +    assert modes.count("plan") == 1
 +    assert modes.count("clarify") == 0
 +    assert modes.count("execute") >= 2
 +    assert modes.count("verify") >= 2
 +    assert "fixed output" in target.read_text()

tests/test_workflow_runtime_tools.pymodified

  def non_streaming_config() -> AgentConfig:
      """Shared deterministic config for runtime tool tests."""
 -    return AgentConfig(auto_context=False, stream=False, max_iterations=4)
 +    return AgentConfig(
 +        auto_context=False,
 +        stream=False,
 +        max_iterations=4,
 +        workflow_mode_override="execute",
 +    )
  async def _answer(question: str, options: list[str] | None) -> str: