tenseleyflow/loader / de5f678

Browse files

Add routed workflow artifacts to runtime

Authored by espadonne
SHA
de5f6787b869038db1ff2cb7442d0c428869538c
Parents
8713208
Tree
e9e3480

9 changed files

StatusFile+-
M src/loader/agent/loop.py 30 46
M src/loader/agent/prompts.py 65 18
M src/loader/runtime/conversation.py 506 0
M src/loader/runtime/dod.py 10 0
M src/loader/runtime/events.py 4 0
A src/loader/runtime/workflow.py 637 0
A tests/test_workflow.py 177 0
A tests/test_workflow_runtime.py 278 0
M tests/test_workflow_runtime_tools.py 6 1
src/loader/agent/loop.pymodified
@@ -13,6 +13,7 @@ from ..runtime.conversation import ConversationRuntime
1313
 from ..runtime.events import AgentEvent, TurnSummary
1414
 from ..runtime.permissions import PermissionMode, build_permission_policy
1515
 from ..runtime.session import ConversationSession
16
+from ..runtime.workflow import WorkflowMode
1617
 from ..tools.base import ToolRegistry, create_default_registry
1718
 from .planner import (
1819
     PLANNING_PROMPT,
@@ -89,6 +90,7 @@ class AgentConfig:
8990
     max_recovery_attempts: int = 2  # Reduced from 3
9091
     verification_retry_budget: int = 3  # Retry budget for verify/fix loop
9192
     permission_mode: PermissionMode = PermissionMode.WORKSPACE_WRITE
93
+    workflow_mode_override: str | None = None
9294
     stream: bool = True  # Stream LLM responses for real-time output
9395
 
9496
     # Reasoning stages configuration
@@ -126,6 +128,7 @@ class Agent:
126128
             messages=self.messages,
127129
         )
128130
         self._system_message: Message | None = None
131
+        self.workflow_mode = WorkflowMode.EXECUTE.value
129132
         self._use_react: bool | None = None
130133
         self.capability_profile = resolve_backend_capability_profile(self.backend)
131134
         self.last_turn_summary: TurnSummary | None = None
@@ -204,6 +207,7 @@ class Agent:
204207
                 tools=tool_schemas,
205208
                 use_react=self.use_react,
206209
                 project_context=self.project_context,
210
+                workflow_mode=self.workflow_mode,
207211
             )
208212
             self._system_message = Message(
209213
                 role=Role.SYSTEM,
@@ -211,6 +215,14 @@ class Agent:
211215
             )
212216
         return self._system_message
213217
 
218
+    def set_workflow_mode(self, workflow_mode: str) -> None:
219
+        """Update the active workflow mode used by the system prompt."""
220
+
221
+        if workflow_mode == self.workflow_mode:
222
+            return
223
+        self.workflow_mode = workflow_mode
224
+        self._system_message = None
225
+
214226
     def _build_messages(self) -> list[Message]:
215227
         """Build the full message list for the LLM."""
216228
         return self.session.build_request_messages()
@@ -553,52 +565,6 @@ class Agent:
553565
                 else:
554566
                     return f"Task partially completed. {decomposition.to_prompt()}"
555567
 
556
-        # Check if we should use planning
557
-        should_use_plan = use_plan
558
-        if should_use_plan is None and self.config.auto_plan:
559
-            await emit(AgentEvent(type="thinking"))
560
-            should_use_plan = await self._should_plan(user_message)
561
-
562
-        # If planning, create and execute plan
563
-        if should_use_plan:
564
-            plan = await self._create_plan(user_message)
565
-            if plan.steps:
566
-                await emit(AgentEvent(type="plan", content=plan.to_prompt()))
567
-
568
-                # Execute each step
569
-                while not plan.is_complete():
570
-                    step = plan.next_step()
571
-                    if not step:
572
-                        break
573
-
574
-                    await emit(AgentEvent(
575
-                        type="step",
576
-                        step_info=f"{plan.progress_str()} {step.description}",
577
-                    ))
578
-
579
-                    # Run the step
580
-                    step_prompt = format_step_prompt(plan, step)
581
-                    await self._run_inner(
582
-                        step_prompt,
583
-                        emit,
584
-                        on_confirmation,
585
-                        on_user_question=on_user_question,
586
-                        original_task=self._current_task,
587
-                    )
588
-
589
-                    plan.complete_current()
590
-
591
-                # Final summary
592
-                self.messages.append(Message(role=Role.USER, content=user_message))
593
-                summary_prompt = f"I've completed the plan. Summarize what was done:\n{plan.to_prompt()}"
594
-                return await self._run_inner(
595
-                    summary_prompt,
596
-                    emit,
597
-                    on_confirmation,
598
-                    on_user_question=on_user_question,
599
-                    original_task=self._current_task,
600
-                )
601
-
602568
         # No planning or decomposition - run directly
603569
         self.messages.append(Message(role=Role.USER, content=user_message))
604570
         return await self._run_inner(
@@ -606,6 +572,7 @@ class Agent:
606572
             emit,
607573
             on_confirmation,
608574
             on_user_question=on_user_question,
575
+            requested_mode=self._requested_workflow_mode(use_plan),
609576
             original_task=self._current_task,
610577
         )
611578
 
@@ -615,6 +582,7 @@ class Agent:
615582
         emit: Callable[[AgentEvent], Awaitable[None]],
616583
         on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None = None,
617584
         on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None = None,
585
+        requested_mode: str | None = None,
618586
         original_task: str | None = None,
619587
     ) -> str:
620588
         """Inner execution loop without planning."""
@@ -625,10 +593,24 @@ class Agent:
625593
             emit,
626594
             on_confirmation=on_confirmation,
627595
             on_user_question=on_user_question,
596
+            requested_mode=requested_mode,
628597
             original_task=original_task,
629598
         )
630599
         return self.last_turn_summary.final_response
631600
 
601
+    def _requested_workflow_mode(self, use_plan: bool | None) -> str | None:
602
+        """Resolve the explicit workflow-mode override for the current turn."""
603
+
604
+        if use_plan is True:
605
+            return WorkflowMode.PLAN.value
606
+        if use_plan is False:
607
+            return WorkflowMode.EXECUTE.value
608
+        if self.config.workflow_mode_override:
609
+            return self.config.workflow_mode_override
610
+        if self.config.auto_plan:
611
+            return WorkflowMode.PLAN.value
612
+        return None
613
+
632614
     async def run_streaming(
633615
         self,
634616
         user_message: str,
@@ -1001,4 +983,6 @@ class Agent:
1001983
         self._recovery_context = None
1002984
         self._current_task = None
1003985
         self.last_turn_summary = None
986
+        self.workflow_mode = WorkflowMode.EXECUTE.value
987
+        self._system_message = None
1004988
         self.safeguards.reset()  # Reset all runtime safeguards
src/loader/agent/prompts.pymodified
@@ -1,7 +1,7 @@
11
 """Prompt templates for the agent."""
22
 
33
 import os
4
-from typing import Any, TYPE_CHECKING
4
+from typing import TYPE_CHECKING, Any
55
 
66
 if TYPE_CHECKING:
77
     from ..context.project import ProjectContext
@@ -145,17 +145,43 @@ def format_tool_descriptions(tools: list[dict[str, Any]]) -> str:
145145
     return "\n\n".join(lines)
146146
 
147147
 
148
+MODE_GUIDANCE = {
149
+    "clarify": """
150
+## Clarify Mode
151
+- Ask exactly one focused question with `AskUserQuestion`
152
+- Clarify intent, outcome, scope, or boundaries before proposing solutions
153
+- Do not start coding or writing patch plans yet
154
+- Keep the question high-leverage and brief
155
+""",
156
+    "plan": """
157
+## Plan Mode
158
+- Produce persistent implementation and verification planning artifacts
159
+- Do not start writing code in this mode
160
+- Be explicit about file touchpoints, order of work, risks, acceptance criteria, and verification commands
161
+- Prefer concrete, repository-grounded plans over generic checklists
162
+""",
163
+    "execute": """
164
+## Execute Mode
165
+- Use tools directly to perform the task
166
+- Read relevant files before editing them
167
+- Keep `TodoWrite` current for multi-step work when progress tracking matters
168
+- Concise reporting is fine, and numbered lists are allowed when they communicate plan or evidence clearly
169
+""",
170
+    "verify": """
171
+## Verify Mode
172
+- Run the planned verification commands and capture evidence
173
+- Do not declare the task complete while any verification step is failing
174
+- Report concrete pass/fail evidence rather than vague confidence
175
+""",
176
+}
177
+
178
+
148179
 SYSTEM_PROMPT = """You are Loader, an AI coding agent.
149180
 
150181
 Current directory: {cwd}
151182
 
152
-## Tools
153
-- bash: Run shell commands
154
-- write: Create files
155
-- read: Read files
156
-- edit: Modify files
157
-- glob: Find files
158
-- grep: Search in files
183
+## Tools Available
184
+{tool_descriptions}
159185
 
160186
 ## How to Use Tools
161187
 Output a tool call in this format:
@@ -166,12 +192,19 @@ Output a tool call in this format:
166192
 [write: file_path="hello.py", content="print('hello')"]
167193
 [read: file_path="config.json"]
168194
 [edit: file_path="app.py", old_string="old", new_string="new"]
195
+[TodoWrite: todos=[{{content="Run tests", active_form="Running tests", status="in_progress"}}]]
196
+[AskUserQuestion: question="Which path matters more?", options=["Speed", "Correctness"]]
197
+
198
+## Active Workflow Mode
199
+{workflow_mode}
200
+
201
+{mode_guidance}
169202
 
170203
 ## Rules
171
-1. Use tools immediately - don't explain first
172
-2. No code blocks (```) - use the write tool instead
173
-3. No numbered steps - just do the task
174
-4. Read files before editing them
204
+1. Follow the active workflow mode rather than improvising a different one
205
+2. Use tools or concise prose directly instead of narrating fake tool use
206
+3. Use the write tool for files rather than pasting long code blocks
207
+4. Keep responses grounded in repository evidence and verification output
175208
 """
176209
 
177210
 
@@ -200,11 +233,16 @@ Current directory: {cwd}
200233
 {{"name": "read", "arguments": {{"file_path": "config.json"}}}}
201234
 </tool_call>
202235
 
236
+## Active Workflow Mode
237
+{workflow_mode}
238
+
239
+{mode_guidance}
240
+
203241
 ## Rules
204
-1. Use tools immediately - don't explain first
205
-2. No code blocks - use the write tool instead
206
-3. No numbered steps - just do the task
207
-4. Read files before editing them
242
+1. Follow the active workflow mode rather than improvising a different one
243
+2. Use tools or concise prose directly instead of narrating fake tool use
244
+3. Use the write tool for files rather than pasting long code blocks
245
+4. Keep responses grounded in repository evidence and verification output
208246
 """
209247
 
210248
 
@@ -212,6 +250,7 @@ def build_system_prompt(
212250
     tools: list[dict[str, Any]],
213251
     use_react: bool = False,
214252
     project_context: "str | ProjectContext | None" = None,
253
+    workflow_mode: str = "execute",
215254
 ) -> str:
216255
     """Build the system prompt with tool descriptions.
217256
 
@@ -224,15 +263,23 @@ def build_system_prompt(
224263
         Formatted system prompt
225264
     """
226265
     cwd = os.getcwd()
266
+    tool_descriptions = format_tool_descriptions(tools)
267
+    mode_guidance = MODE_GUIDANCE.get(workflow_mode, MODE_GUIDANCE["execute"])
227268
 
228269
     if use_react:
229
-        tool_descriptions = format_tool_descriptions(tools)
230270
         prompt = REACT_SYSTEM_PROMPT.format(
231271
             cwd=cwd,
232272
             tool_descriptions=tool_descriptions,
273
+            workflow_mode=workflow_mode,
274
+            mode_guidance=mode_guidance,
233275
         )
234276
     else:
235
-        prompt = SYSTEM_PROMPT.format(cwd=cwd)
277
+        prompt = SYSTEM_PROMPT.format(
278
+            cwd=cwd,
279
+            tool_descriptions=tool_descriptions,
280
+            workflow_mode=workflow_mode,
281
+            mode_guidance=mode_guidance,
282
+        )
236283
 
237284
     # Add project context if available
238285
     if project_context:
src/loader/runtime/conversation.pymodified
@@ -2,8 +2,10 @@
22
 
33
 from __future__ import annotations
44
 
5
+import re
56
 from collections.abc import Awaitable, Callable
67
 from dataclasses import dataclass, field
8
+from pathlib import Path
79
 from typing import Any
810
 
911
 from ..agent.parsing import parse_tool_calls
@@ -30,6 +32,17 @@ from .events import AgentEvent, TurnSummary
3032
 from .executor import ToolExecutionState, ToolExecutor
3133
 from .hooks import build_default_tool_hooks
3234
 from .tracing import RuntimeTracer
35
+from .workflow import (
36
+    ClarifyBrief,
37
+    ModeRouter,
38
+    PlanningArtifacts,
39
+    VERIFICATION_SEPARATOR,
40
+    WorkflowArtifactStore,
41
+    WorkflowMode,
42
+    build_execute_bridge,
43
+    extract_verification_commands_from_markdown,
44
+    sync_todos_to_definition_of_done,
45
+)
3346
 
3447
 EventSink = Callable[[AgentEvent], Awaitable[None]]
3548
 ConfirmationHandler = Callable[[str, str, str], Awaitable[bool]] | None
@@ -63,6 +76,8 @@ class ConversationRuntime:
6376
         self.tracer = RuntimeTracer()
6477
         self.executor: ToolExecutor | None = None
6578
         self.dod_store = DefinitionOfDoneStore(agent.project_root)
79
+        self.router = ModeRouter()
80
+        self.artifact_store = WorkflowArtifactStore(agent.project_root)
6681
 
6782
     async def run_turn(
6883
         self,
@@ -70,6 +85,7 @@ class ConversationRuntime:
7085
         emit: EventSink,
7186
         on_confirmation: ConfirmationHandler = None,
7287
         on_user_question: UserQuestionHandler = None,
88
+        requested_mode: str | None = None,
7389
         original_task: str | None = None,
7490
     ) -> TurnSummary:
7591
         """Run one task turn and return a structured summary."""
@@ -110,6 +126,16 @@ class ConversationRuntime:
110126
         summary.definition_of_done = dod
111127
         await self._emit_dod_status(emit, dod)
112128
 
129
+        task = await self._prepare_workflow(
130
+            task=task,
131
+            dod=dod,
132
+            emit=emit,
133
+            summary=summary,
134
+            on_confirmation=on_confirmation,
135
+            on_user_question=on_user_question,
136
+            requested_mode=requested_mode,
137
+        )
138
+
113139
         while iterations < self.agent.config.max_iterations:
114140
             iterations += 1
115141
             summary.iterations = iterations
@@ -315,6 +341,13 @@ class ConversationRuntime:
315341
 
316342
                     if outcome.state == ToolExecutionState.EXECUTED and not outcome.is_error:
317343
                         record_successful_tool_call(dod, tool_call)
344
+                        if (
345
+                            tool_call.name == "TodoWrite"
346
+                            and outcome.registry_result is not None
347
+                        ):
348
+                            new_todos = outcome.registry_result.metadata.get("new_todos", [])
349
+                            if isinstance(new_todos, list):
350
+                                sync_todos_to_definition_of_done(dod, new_todos)
318351
                         self.dod_store.save(dod)
319352
                         self.agent._recovery_context = None
320353
                         is_loop, loop_description = self.agent.safeguards.detect_loop()
@@ -730,6 +763,433 @@ class ConversationRuntime:
730763
             is_error=True,
731764
         )
732765
 
766
+    async def _prepare_workflow(
767
+        self,
768
+        *,
769
+        task: str,
770
+        dod: DefinitionOfDone,
771
+        emit: EventSink,
772
+        summary: TurnSummary,
773
+        on_confirmation: ConfirmationHandler,
774
+        on_user_question: UserQuestionHandler,
775
+        requested_mode: str | None,
776
+    ) -> str:
777
+        requested = WorkflowMode.from_str(requested_mode)
778
+        decision = self.router.route(
779
+            task,
780
+            requested_mode=requested,
781
+            has_brief=self._artifact_exists(dod.clarify_brief),
782
+            has_plan=self._artifact_exists(dod.implementation_plan)
783
+            and self._artifact_exists(dod.verification_plan),
784
+        )
785
+        await self._set_workflow_mode(
786
+            decision.mode,
787
+            dod=dod,
788
+            emit=emit,
789
+            summary=summary,
790
+            reason=decision.reason,
791
+        )
792
+
793
+        if decision.mode == WorkflowMode.CLARIFY:
794
+            await self._run_clarify_mode(
795
+                task=task,
796
+                dod=dod,
797
+                emit=emit,
798
+                summary=summary,
799
+                on_user_question=on_user_question,
800
+            )
801
+            decision = self.router.route(
802
+                task,
803
+                has_brief=self._artifact_exists(dod.clarify_brief),
804
+                has_plan=self._artifact_exists(dod.implementation_plan)
805
+                and self._artifact_exists(dod.verification_plan),
806
+                allow_clarify=False,
807
+            )
808
+            await self._set_workflow_mode(
809
+                decision.mode,
810
+                dod=dod,
811
+                emit=emit,
812
+                summary=summary,
813
+                reason=f"clarify handoff: {decision.reason}",
814
+            )
815
+
816
+        if decision.mode == WorkflowMode.PLAN:
817
+            await self._run_plan_mode(
818
+                task=task,
819
+                dod=dod,
820
+                emit=emit,
821
+                summary=summary,
822
+                on_confirmation=on_confirmation,
823
+                on_user_question=on_user_question,
824
+            )
825
+            await self._set_workflow_mode(
826
+                WorkflowMode.EXECUTE,
827
+                dod=dod,
828
+                emit=emit,
829
+                summary=summary,
830
+                reason="plan artifacts created; switching to execute",
831
+            )
832
+
833
+        bridge = build_execute_bridge(
834
+            Path(dod.clarify_brief) if dod.clarify_brief else None,
835
+            Path(dod.implementation_plan) if dod.implementation_plan else None,
836
+            Path(dod.verification_plan) if dod.verification_plan else None,
837
+        )
838
+        if bridge and not any(
839
+            message.role == Role.USER and "[WORKFLOW BRIDGE]" in message.content
840
+            for message in self.agent.messages[-4:]
841
+        ):
842
+            self.agent.session.append(
843
+                Message(
844
+                    role=Role.USER,
845
+                    content=(
846
+                        "[WORKFLOW BRIDGE]\n"
847
+                        f"{bridge}\n\n"
848
+                        "Honor these artifacts while you execute the task. "
849
+                        "Keep TodoWrite current when the work spans multiple steps."
850
+                    ),
851
+                )
852
+            )
853
+        return task
854
+
855
+    async def _set_workflow_mode(
856
+        self,
857
+        mode: WorkflowMode,
858
+        *,
859
+        dod: DefinitionOfDone,
860
+        emit: EventSink,
861
+        summary: TurnSummary,
862
+        reason: str,
863
+    ) -> None:
864
+        self.agent.set_workflow_mode(mode.value)
865
+        dod.current_mode = mode.value
866
+        if not dod.mode_history or dod.mode_history[-1] != mode.value:
867
+            dod.mode_history.append(mode.value)
868
+        summary.workflow_mode = mode.value
869
+        summary.definition_of_done = dod
870
+        self.dod_store.save(dod)
871
+        await emit(
872
+            AgentEvent(
873
+                type="workflow_mode",
874
+                content=f"Workflow: {mode.value} ({reason})",
875
+                workflow_mode=mode.value,
876
+                definition_of_done=dod,
877
+            )
878
+        )
879
+
880
+    async def _emit_artifact(
881
+        self,
882
+        *,
883
+        emit: EventSink,
884
+        kind: str,
885
+        path: Path,
886
+        preview: str,
887
+    ) -> None:
888
+        await emit(
889
+            AgentEvent(
890
+                type="artifact",
891
+                content=preview,
892
+                artifact_kind=kind,
893
+                artifact_path=str(path),
894
+            )
895
+        )
896
+
897
+    async def _complete_in_mode(
898
+        self,
899
+        *,
900
+        prompt: str,
901
+        tools: list[dict[str, Any]] | None,
902
+        max_tokens: int,
903
+        temperature: float = 0.2,
904
+    ):
905
+        return await self.agent.backend.complete(
906
+            messages=self.agent.session.build_request_messages()
907
+            + [Message(role=Role.USER, content=prompt)],
908
+            tools=tools,
909
+            temperature=temperature,
910
+            max_tokens=max_tokens,
911
+        )
912
+
913
+    async def _run_clarify_mode(
914
+        self,
915
+        *,
916
+        task: str,
917
+        dod: DefinitionOfDone,
918
+        emit: EventSink,
919
+        summary: TurnSummary,
920
+        on_user_question: UserQuestionHandler,
921
+    ) -> None:
922
+        ask_tool = self.agent.registry.get("AskUserQuestion")
923
+        assert ask_tool is not None
924
+        prompt = (
925
+            "Clarify the task before planning or implementation.\n"
926
+            "Ask exactly one focused question with AskUserQuestion.\n"
927
+            "Target missing outcome, scope, or decision-boundary information.\n"
928
+            "Do not propose solutions yet.\n\n"
929
+            f"Task: {task}"
930
+        )
931
+        response = await self._complete_in_mode(
932
+            prompt=prompt,
933
+            tools=[ask_tool.to_schema()],
934
+            max_tokens=300,
935
+        )
936
+        tool_call = next(
937
+            (
938
+                tool
939
+                for tool in response.tool_calls
940
+                if tool.name == "AskUserQuestion"
941
+            ),
942
+            None,
943
+        )
944
+        if tool_call is None:
945
+            tool_call = ToolCall(
946
+                id="clarify-question-1",
947
+                name="AskUserQuestion",
948
+                arguments={
949
+                    "question": self._fallback_clarify_question(task, response.content),
950
+                },
951
+            )
952
+
953
+        assistant_message = Message(
954
+            role=Role.ASSISTANT,
955
+            content=response.content or tool_call.arguments.get("question", ""),
956
+            tool_calls=[tool_call],
957
+        )
958
+        self.agent.session.append(assistant_message)
959
+        summary.assistant_messages.append(assistant_message)
960
+
961
+        await emit(
962
+            AgentEvent(
963
+                type="tool_call",
964
+                tool_name=tool_call.name,
965
+                tool_args=tool_call.arguments,
966
+                phase="clarify",
967
+            )
968
+        )
969
+        assert self.executor is not None
970
+        outcome = await self.executor.execute_tool_call(
971
+            tool_call,
972
+            on_confirmation=None,
973
+            on_user_question=on_user_question,
974
+            emit_confirmation=None,
975
+            source="clarify",
976
+            skip_duplicate_check=True,
977
+            record_action=False,
978
+            skip_confirmation=True,
979
+        )
980
+        await emit(
981
+            AgentEvent(
982
+                type="tool_result",
983
+                content=outcome.event_content,
984
+                tool_name=tool_call.name,
985
+                is_error=outcome.is_error,
986
+                phase="clarify",
987
+            )
988
+        )
989
+        self.agent.session.append(outcome.message)
990
+        summary.tool_result_messages.append(outcome.message)
991
+
992
+        question = str(tool_call.arguments.get("question", "")).strip()
993
+        answer = ""
994
+        if outcome.registry_result is not None:
995
+            answer = str(outcome.registry_result.metadata.get("answer", "")).strip()
996
+
997
+        brief_prompt = (
998
+            "Write a concise task brief in markdown using these exact sections:\n"
999
+            "## Task Statement\n"
1000
+            "## Desired Outcome\n"
1001
+            "## In Scope\n"
1002
+            "## Non Goals\n"
1003
+            "## Decision Boundaries\n"
1004
+            "## Constraints\n"
1005
+            "## Likely Touchpoints\n"
1006
+            "## Assumptions\n"
1007
+            "## Acceptance Criteria\n\n"
1008
+            "Use short bullet lists when helpful. Do not start implementing.\n\n"
1009
+            f"Task: {task}\n"
1010
+            f"Question: {question}\n"
1011
+            f"Answer: {answer or 'No answer provided.'}"
1012
+        )
1013
+        brief_response = await self._complete_in_mode(
1014
+            prompt=brief_prompt,
1015
+            tools=None,
1016
+            max_tokens=900,
1017
+            temperature=0.1,
1018
+        )
1019
+        brief = (
1020
+            ClarifyBrief.from_markdown(
1021
+                brief_response.content,
1022
+                task_statement=task,
1023
+                question=question,
1024
+                answer=answer,
1025
+            )
1026
+            if brief_response.content.strip()
1027
+            else ClarifyBrief.fallback(
1028
+                task_statement=task,
1029
+                question=question,
1030
+                answer=answer,
1031
+            )
1032
+        )
1033
+        brief_path = self.artifact_store.write_brief(task, brief)
1034
+        dod.clarify_brief = str(brief_path)
1035
+        dod.acceptance_criteria = list(dict.fromkeys(brief.acceptance_criteria))
1036
+        self.dod_store.save(dod)
1037
+        await self._emit_artifact(
1038
+            emit=emit,
1039
+            kind="clarify_brief",
1040
+            path=brief_path,
1041
+            preview=(
1042
+                f"Clarify brief: {brief_path}\n"
1043
+                f"Outcome: {brief.desired_outcome[0]}"
1044
+            ),
1045
+        )
1046
+
1047
+    async def _run_plan_mode(
1048
+        self,
1049
+        *,
1050
+        task: str,
1051
+        dod: DefinitionOfDone,
1052
+        emit: EventSink,
1053
+        summary: TurnSummary,
1054
+        on_confirmation: ConfirmationHandler,
1055
+        on_user_question: UserQuestionHandler,
1056
+    ) -> None:
1057
+        prompt = (
1058
+            "Produce two markdown planning artifacts separated by the exact line "
1059
+            f"`{VERIFICATION_SEPARATOR}`.\n\n"
1060
+            "Before the separator, write an Implementation Plan with these sections:\n"
1061
+            "## File Changes\n"
1062
+            "## Execution Order\n"
1063
+            "## Risks\n\n"
1064
+            "After the separator, write a Verification Plan with these sections:\n"
1065
+            "## Acceptance Criteria\n"
1066
+            "## Verification Commands\n"
1067
+            "## Notes\n\n"
1068
+            "Do not start writing code.\n\n"
1069
+            f"Task: {task}"
1070
+        )
1071
+        response = await self._complete_in_mode(
1072
+            prompt=prompt,
1073
+            tools=None,
1074
+            max_tokens=1400,
1075
+            temperature=0.2,
1076
+        )
1077
+        artifacts = (
1078
+            PlanningArtifacts.from_model_output(
1079
+                response.content,
1080
+                task_statement=task,
1081
+            )
1082
+            if response.content.strip()
1083
+            else PlanningArtifacts.fallback(task_statement=task)
1084
+        )
1085
+        implementation_path, verification_path = self.artifact_store.write_plan(
1086
+            task,
1087
+            artifacts,
1088
+        )
1089
+        dod.implementation_plan = str(implementation_path)
1090
+        dod.verification_plan = str(verification_path)
1091
+        dod.acceptance_criteria = list(
1092
+            dict.fromkeys(dod.acceptance_criteria + artifacts.acceptance_criteria)
1093
+        )
1094
+        if artifacts.verification_commands:
1095
+            dod.verification_commands = artifacts.verification_commands
1096
+        self.dod_store.save(dod)
1097
+        await self._emit_artifact(
1098
+            emit=emit,
1099
+            kind="implementation_plan",
1100
+            path=implementation_path,
1101
+            preview=(
1102
+                f"Implementation plan: {implementation_path}\n"
1103
+                f"Steps: {len(artifacts.implementation_steps)}"
1104
+            ),
1105
+        )
1106
+        await self._emit_artifact(
1107
+            emit=emit,
1108
+            kind="verification_plan",
1109
+            path=verification_path,
1110
+            preview=(
1111
+                f"Verification plan: {verification_path}\n"
1112
+                f"Commands: {len(artifacts.verification_commands)}"
1113
+            ),
1114
+        )
1115
+        await self._seed_todos_from_plan(
1116
+            artifacts=artifacts,
1117
+            dod=dod,
1118
+            emit=emit,
1119
+        )
1120
+
1121
+    async def _seed_todos_from_plan(
1122
+        self,
1123
+        *,
1124
+        artifacts: PlanningArtifacts,
1125
+        dod: DefinitionOfDone,
1126
+        emit: EventSink,
1127
+    ) -> None:
1128
+        if not artifacts.implementation_steps:
1129
+            return
1130
+
1131
+        todos = [
1132
+            {
1133
+                "content": step,
1134
+                "active_form": f"Working on: {step}",
1135
+                "status": "pending",
1136
+            }
1137
+            for step in artifacts.implementation_steps[:8]
1138
+        ]
1139
+        tool_call = ToolCall(
1140
+            id="plan-todos-1",
1141
+            name="TodoWrite",
1142
+            arguments={"todos": todos},
1143
+        )
1144
+        await emit(
1145
+            AgentEvent(
1146
+                type="tool_call",
1147
+                tool_name=tool_call.name,
1148
+                tool_args=tool_call.arguments,
1149
+                phase="plan",
1150
+            )
1151
+        )
1152
+        assert self.executor is not None
1153
+        outcome = await self.executor.execute_tool_call(
1154
+            tool_call,
1155
+            on_confirmation=None,
1156
+            on_user_question=None,
1157
+            emit_confirmation=None,
1158
+            source="plan",
1159
+            skip_duplicate_check=True,
1160
+            record_action=False,
1161
+            skip_confirmation=True,
1162
+        )
1163
+        await emit(
1164
+            AgentEvent(
1165
+                type="tool_result",
1166
+                content=outcome.event_content,
1167
+                tool_name=tool_call.name,
1168
+                is_error=outcome.is_error,
1169
+                phase="plan",
1170
+            )
1171
+        )
1172
+        if outcome.registry_result is not None:
1173
+            new_todos = outcome.registry_result.metadata.get("new_todos", [])
1174
+            if isinstance(new_todos, list):
1175
+                sync_todos_to_definition_of_done(dod, new_todos)
1176
+                self.dod_store.save(dod)
1177
+
1178
+    @staticmethod
1179
+    def _artifact_exists(path_str: str | None) -> bool:
1180
+        return bool(path_str and Path(path_str).exists())
1181
+
1182
+    @staticmethod
1183
+    def _fallback_clarify_question(task: str, response_content: str) -> str:
1184
+        match = re.search(r"([A-Z][^?]+\?)", response_content)
1185
+        if match:
1186
+            return match.group(1).strip()
1187
+        return (
1188
+            "What outcome matters most here, and what should stay out of scope?"
1189
+            if task.strip()
1190
+            else "What outcome matters most?"
1191
+        )
1192
+
7331193
     async def _run_definition_of_done_gate(
7341194
         self,
7351195
         *,
@@ -743,8 +1203,31 @@ class ConversationRuntime:
7431203
             dod.pending_items.remove(implementation_item)
7441204
             dod.completed_items.append(implementation_item)
7451205
 
1206
+        tracked_pending_items = [
1207
+            item
1208
+            for item in dod.pending_items
1209
+            if item != "Collect verification evidence"
1210
+        ]
1211
+
7461212
         mutating_paths = [path for path in dod.touched_files if path]
7471213
         requires_verification = bool(mutating_paths or dod.mutating_actions)
1214
+        if tracked_pending_items and not requires_verification:
1215
+            pending_text = "\n".join(f"- {item}" for item in tracked_pending_items)
1216
+            self.dod_store.save(dod)
1217
+            await self._emit_dod_status(emit, dod)
1218
+            self.agent.session.append(
1219
+                Message(
1220
+                    role=Role.USER,
1221
+                    content=(
1222
+                        "[PENDING WORK REMAINS]\n"
1223
+                        "The tracked work items are not complete yet:\n"
1224
+                        f"{pending_text}\n\n"
1225
+                        "Continue the task, and update TodoWrite as you make progress."
1226
+                    ),
1227
+                )
1228
+            )
1229
+            return CompletionGateResult(should_continue=True, final_response="")
1230
+
7481231
         if not requires_verification:
7491232
             dod.status = "done"
7501233
             dod.last_verification_result = "skipped"
@@ -761,6 +1244,11 @@ class ConversationRuntime:
7611244
         if verify_item not in dod.pending_items and verify_item not in dod.completed_items:
7621245
             dod.pending_items.append(verify_item)
7631246
 
1247
+        if not dod.verification_commands and dod.verification_plan and Path(dod.verification_plan).exists():
1248
+            dod.verification_commands = extract_verification_commands_from_markdown(
1249
+                Path(dod.verification_plan).read_text()
1250
+            )
1251
+
7641252
         if not dod.verification_commands:
7651253
             dod.verification_commands = derive_verification_commands(
7661254
                 dod,
@@ -768,6 +1256,13 @@ class ConversationRuntime:
7681256
                 task_statement=dod.task_statement,
7691257
             )
7701258
 
1259
+        await self._set_workflow_mode(
1260
+            WorkflowMode.VERIFY,
1261
+            dod=dod,
1262
+            emit=emit,
1263
+            summary=summary,
1264
+            reason="definition-of-done gate requires verification",
1265
+        )
7711266
         verification_passed = await self._verify_definition_of_done(
7721267
             dod=dod,
7731268
             emit=emit,
@@ -778,6 +1273,10 @@ class ConversationRuntime:
7781273
                 dod.pending_items.remove(verify_item)
7791274
             if verify_item not in dod.completed_items:
7801275
                 dod.completed_items.append(verify_item)
1276
+            for pending in list(dod.pending_items):
1277
+                if pending not in dod.completed_items:
1278
+                    dod.completed_items.append(pending)
1279
+            dod.pending_items = []
7811280
             dod.status = "done"
7821281
             dod.last_verification_result = "passed"
7831282
             dod.confidence = "high"
@@ -817,6 +1316,13 @@ class ConversationRuntime:
8171316
         dod.confidence = "medium"
8181317
         self.dod_store.save(dod)
8191318
         await self._emit_dod_status(emit, dod)
1319
+        await self._set_workflow_mode(
1320
+            WorkflowMode.EXECUTE,
1321
+            dod=dod,
1322
+            emit=emit,
1323
+            summary=summary,
1324
+            reason="verification failed; returning to execute for fixes",
1325
+        )
8201326
         failure_prompt = (
8211327
             "[DEFINITION OF DONE CHECK FAILED]\n"
8221328
             f"Task: {dod.task_statement}\n"
src/loader/runtime/dod.pymodified
@@ -53,6 +53,11 @@ class DefinitionOfDone:
5353
     line_changes: int = 0
5454
     storage_path: str | None = None
5555
     last_verification_result: str | None = None
56
+    current_mode: str = "execute"
57
+    mode_history: list[str] = field(default_factory=list)
58
+    clarify_brief: str | None = None
59
+    implementation_plan: str | None = None
60
+    verification_plan: str | None = None
5661
 
5762
     def to_dict(self) -> dict[str, Any]:
5863
         """Serialize the DoD state for persistence."""
@@ -83,6 +88,11 @@ class DefinitionOfDone:
8388
             line_changes=int(data.get("line_changes", 0)),
8489
             storage_path=data.get("storage_path"),
8590
             last_verification_result=data.get("last_verification_result"),
91
+            current_mode=data.get("current_mode", "execute"),
92
+            mode_history=list(data.get("mode_history", [])),
93
+            clarify_brief=data.get("clarify_brief"),
94
+            implementation_plan=data.get("implementation_plan"),
95
+            verification_plan=data.get("verification_plan"),
8696
         )
8797
 
8898
 
src/loader/runtime/events.pymodified
@@ -38,6 +38,9 @@ class AgentEvent:
3838
     dod_status: str | None = None
3939
     pending_items_count: int | None = None
4040
     last_verification_result: str | None = None
41
+    workflow_mode: str | None = None
42
+    artifact_kind: str | None = None
43
+    artifact_path: str | None = None
4144
 
4245
     decomposition: TaskDecomposition | None = None
4346
     subtask: Subtask | None = None
@@ -63,3 +66,4 @@ class TurnSummary:
6366
     usage: dict[str, int] = field(default_factory=dict)
6467
     trace: list[RuntimeTraceEvent] = field(default_factory=list)
6568
     definition_of_done: DefinitionOfDone | None = None
69
+    workflow_mode: str | None = None
src/loader/runtime/workflow.pyadded
@@ -0,0 +1,637 @@
1
+"""Workflow routing and artifact persistence for Loader runtime modes."""
2
+
3
+from __future__ import annotations
4
+
5
+import re
6
+from dataclasses import dataclass, field
7
+from datetime import UTC, datetime
8
+from enum import StrEnum
9
+from pathlib import Path
10
+
11
+from .dod import slugify
12
+
13
+VERIFICATION_SEPARATOR = "<<<VERIFICATION>>>"
14
+
15
+_SECTION_ALIASES = {
16
+    "task statement": "task_statement",
17
+    "desired outcome": "desired_outcome",
18
+    "in scope": "in_scope",
19
+    "out of scope": "non_goals",
20
+    "out of scope non goals": "non_goals",
21
+    "out of scope or non goals": "non_goals",
22
+    "non goals": "non_goals",
23
+    "non-goals": "non_goals",
24
+    "decision boundaries": "decision_boundaries",
25
+    "constraints": "constraints",
26
+    "likely touchpoints": "likely_touchpoints",
27
+    "assumptions": "assumptions",
28
+    "acceptance criteria": "acceptance_criteria",
29
+    "file changes": "file_changes",
30
+    "execution order": "execution_order",
31
+    "risks": "risks",
32
+    "verification commands": "verification_commands",
33
+    "commands": "verification_commands",
34
+    "notes": "notes",
35
+}
36
+
37
+
38
+class WorkflowMode(StrEnum):
39
+    """High-level runtime modes for one Loader task turn."""
40
+
41
+    CLARIFY = "clarify"
42
+    PLAN = "plan"
43
+    EXECUTE = "execute"
44
+    VERIFY = "verify"
45
+
46
+    @classmethod
47
+    def from_str(cls, value: str | None) -> WorkflowMode | None:
48
+        if value is None:
49
+            return None
50
+        normalized = value.strip().lower()
51
+        for mode in cls:
52
+            if mode.value == normalized:
53
+                return mode
54
+        raise ValueError(f"Unknown workflow mode: {value}")
55
+
56
+
57
+@dataclass(slots=True)
58
+class ModeDecision:
59
+    """Router output for the entry point of a task turn."""
60
+
61
+    mode: WorkflowMode
62
+    reason: str
63
+    ambiguity_score: float = 0.0
64
+    complexity_score: float = 0.0
65
+
66
+
67
+@dataclass(slots=True)
68
+class ClarifyBrief:
69
+    """Execution-ready brief created from one clarify round."""
70
+
71
+    task_statement: str
72
+    desired_outcome: list[str] = field(default_factory=list)
73
+    in_scope: list[str] = field(default_factory=list)
74
+    non_goals: list[str] = field(default_factory=list)
75
+    decision_boundaries: list[str] = field(default_factory=list)
76
+    constraints: list[str] = field(default_factory=list)
77
+    likely_touchpoints: list[str] = field(default_factory=list)
78
+    assumptions: list[str] = field(default_factory=list)
79
+    acceptance_criteria: list[str] = field(default_factory=list)
80
+    question: str | None = None
81
+    answer: str | None = None
82
+
83
+    @classmethod
84
+    def from_markdown(
85
+        cls,
86
+        markdown: str,
87
+        *,
88
+        task_statement: str,
89
+        question: str | None = None,
90
+        answer: str | None = None,
91
+    ) -> ClarifyBrief:
92
+        sections = _parse_markdown_sections(markdown)
93
+        brief = cls(
94
+            task_statement=_first_item(sections.get("task_statement")) or task_statement,
95
+            desired_outcome=sections.get("desired_outcome", []),
96
+            in_scope=sections.get("in_scope", []),
97
+            non_goals=sections.get("non_goals", []),
98
+            decision_boundaries=sections.get("decision_boundaries", []),
99
+            constraints=sections.get("constraints", []),
100
+            likely_touchpoints=sections.get("likely_touchpoints", []),
101
+            assumptions=sections.get("assumptions", []),
102
+            acceptance_criteria=sections.get("acceptance_criteria", []),
103
+            question=question,
104
+            answer=answer,
105
+        )
106
+        brief.fill_defaults()
107
+        return brief
108
+
109
+    @classmethod
110
+    def fallback(
111
+        cls,
112
+        *,
113
+        task_statement: str,
114
+        question: str,
115
+        answer: str,
116
+    ) -> ClarifyBrief:
117
+        brief = cls(
118
+            task_statement=task_statement,
119
+            desired_outcome=[answer or "Clarify the intended outcome before implementation."],
120
+            in_scope=[task_statement],
121
+            non_goals=["Anything not confirmed in the clarification answer."],
122
+            decision_boundaries=["Escalate if the clarified scope changes materially."],
123
+            constraints=["Honor the clarified answer and existing repository conventions."],
124
+            likely_touchpoints=["Determine the concrete files during execution."],
125
+            assumptions=[f"Clarification answer: {answer or 'No answer provided.'}"],
126
+            question=question,
127
+            answer=answer,
128
+        )
129
+        brief.fill_defaults()
130
+        return brief
131
+
132
+    def fill_defaults(self) -> None:
133
+        if not self.desired_outcome:
134
+            self.desired_outcome = [self.task_statement]
135
+        if not self.in_scope:
136
+            self.in_scope = [self.task_statement]
137
+        if not self.non_goals:
138
+            self.non_goals = ["Do not expand beyond the clarified task statement."]
139
+        if not self.decision_boundaries:
140
+            self.decision_boundaries = [
141
+                "Escalate for destructive or preference-dependent changes.",
142
+            ]
143
+        if not self.constraints:
144
+            self.constraints = ["Preserve the existing codebase conventions and tests."]
145
+        if not self.likely_touchpoints:
146
+            self.likely_touchpoints = ["Identify exact files during planning or execution."]
147
+        if not self.assumptions:
148
+            self.assumptions = ["Unspecified details stay unchanged unless evidence says otherwise."]
149
+        if not self.acceptance_criteria:
150
+            self.acceptance_criteria = list(
151
+                dict.fromkeys(self.desired_outcome + self.in_scope[:2])
152
+            )
153
+
154
+    def to_markdown(self) -> str:
155
+        lines = [
156
+            "# Task Brief",
157
+            "",
158
+            f"Generated: {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%SZ')}",
159
+            "",
160
+            "## Task Statement",
161
+            self.task_statement,
162
+            "",
163
+        ]
164
+        lines.extend(_render_section("Desired Outcome", self.desired_outcome))
165
+        lines.extend(_render_section("In Scope", self.in_scope))
166
+        lines.extend(_render_section("Non Goals", self.non_goals))
167
+        lines.extend(_render_section("Decision Boundaries", self.decision_boundaries))
168
+        lines.extend(_render_section("Constraints", self.constraints))
169
+        lines.extend(_render_section("Likely Touchpoints", self.likely_touchpoints))
170
+        lines.extend(_render_section("Assumptions", self.assumptions))
171
+        lines.extend(_render_section("Acceptance Criteria", self.acceptance_criteria))
172
+        if self.question:
173
+            lines.extend(_render_section("Clarify Question", [self.question]))
174
+        if self.answer:
175
+            lines.extend(_render_section("Clarify Answer", [self.answer]))
176
+        return "\n".join(lines).rstrip() + "\n"
177
+
178
+
179
+@dataclass(slots=True)
180
+class PlanningArtifacts:
181
+    """Persistent planning artifacts created before execution."""
182
+
183
+    implementation_markdown: str
184
+    verification_markdown: str
185
+    verification_commands: list[str]
186
+    acceptance_criteria: list[str]
187
+    implementation_steps: list[str]
188
+
189
+    @classmethod
190
+    def from_model_output(
191
+        cls,
192
+        model_output: str,
193
+        *,
194
+        task_statement: str,
195
+    ) -> PlanningArtifacts:
196
+        implementation_markdown, verification_markdown = _split_plan_output(model_output)
197
+        implementation_sections = _parse_markdown_sections(implementation_markdown)
198
+        verification_sections = _parse_markdown_sections(verification_markdown)
199
+
200
+        implementation_steps = (
201
+            implementation_sections.get("execution_order", [])
202
+            or implementation_sections.get("file_changes", [])
203
+        )
204
+        if not implementation_steps:
205
+            implementation_steps = [task_statement]
206
+
207
+        verification_commands = _extract_commands(
208
+            verification_sections.get("verification_commands", [])
209
+        )
210
+        acceptance_criteria = (
211
+            verification_sections.get("acceptance_criteria", [])
212
+            or implementation_sections.get("acceptance_criteria", [])
213
+        )
214
+        if not acceptance_criteria:
215
+            acceptance_criteria = [task_statement]
216
+
217
+        return cls(
218
+            implementation_markdown=_ensure_heading(
219
+                implementation_markdown,
220
+                "# Implementation Plan",
221
+            ),
222
+            verification_markdown=_ensure_heading(
223
+                verification_markdown,
224
+                "# Verification Plan",
225
+            ),
226
+            verification_commands=verification_commands,
227
+            acceptance_criteria=acceptance_criteria,
228
+            implementation_steps=implementation_steps,
229
+        )
230
+
231
+    @classmethod
232
+    def fallback(
233
+        cls,
234
+        *,
235
+        task_statement: str,
236
+    ) -> PlanningArtifacts:
237
+        implementation_markdown = "\n".join(
238
+            [
239
+                "# Implementation Plan",
240
+                "",
241
+                "## File Changes",
242
+                f"- Determine concrete files needed for: {task_statement}",
243
+                "",
244
+                "## Execution Order",
245
+                f"1. Inspect the codebase areas relevant to: {task_statement}",
246
+                "2. Apply the minimum required changes.",
247
+                "3. Re-run the most relevant verification commands.",
248
+                "",
249
+                "## Risks",
250
+                "- Unknown repository conventions may require one discovery pass first.",
251
+                "",
252
+            ]
253
+        )
254
+        verification_markdown = "\n".join(
255
+            [
256
+                "# Verification Plan",
257
+                "",
258
+                "## Acceptance Criteria",
259
+                f"- {task_statement}",
260
+                "",
261
+                "## Verification Commands",
262
+                "- echo \"add verification command\"",
263
+                "",
264
+                "## Notes",
265
+                "- Replace the placeholder verification command with a project-specific check.",
266
+                "",
267
+            ]
268
+        )
269
+        return cls(
270
+            implementation_markdown=implementation_markdown,
271
+            verification_markdown=verification_markdown,
272
+            verification_commands=["echo \"add verification command\""],
273
+            acceptance_criteria=[task_statement],
274
+            implementation_steps=[
275
+                f"Inspect the codebase areas relevant to: {task_statement}",
276
+                "Apply the minimum required changes.",
277
+                "Re-run the most relevant verification commands.",
278
+            ],
279
+        )
280
+
281
+
282
+class WorkflowArtifactStore:
283
+    """Persist briefs and plans under `.loader/`."""
284
+
285
+    def __init__(self, project_root: Path) -> None:
286
+        self.project_root = project_root
287
+        self.loader_root = project_root / ".loader"
288
+        self.briefs_root = self.loader_root / "briefs"
289
+        self.plans_root = self.loader_root / "plans"
290
+
291
+    def write_brief(self, task_statement: str, brief: ClarifyBrief) -> Path:
292
+        path = self.briefs_root / f"{_timestamp()}-{slugify(task_statement)}.md"
293
+        path.parent.mkdir(parents=True, exist_ok=True)
294
+        path.write_text(brief.to_markdown())
295
+        return path
296
+
297
+    def write_plan(
298
+        self,
299
+        task_statement: str,
300
+        artifacts: PlanningArtifacts,
301
+    ) -> tuple[Path, Path]:
302
+        plan_root = self.plans_root / f"{_timestamp()}-{slugify(task_statement)}"
303
+        plan_root.mkdir(parents=True, exist_ok=True)
304
+        implementation_path = plan_root / "implementation.md"
305
+        verification_path = plan_root / "verification.md"
306
+        implementation_path.write_text(artifacts.implementation_markdown.rstrip() + "\n")
307
+        verification_path.write_text(artifacts.verification_markdown.rstrip() + "\n")
308
+        return implementation_path, verification_path
309
+
310
+
311
+class ModeRouter:
312
+    """Simple heuristic router for clarify/plan/execute entry modes."""
313
+
314
+    clarify_threshold = 0.55
315
+    plan_threshold = 0.45
316
+
317
+    def route(
318
+        self,
319
+        task: str,
320
+        *,
321
+        requested_mode: WorkflowMode | None = None,
322
+        has_brief: bool = False,
323
+        has_plan: bool = False,
324
+        allow_clarify: bool = True,
325
+    ) -> ModeDecision:
326
+        if requested_mode is not None:
327
+            return ModeDecision(
328
+                mode=requested_mode,
329
+                reason=f"explicit {requested_mode.value} request",
330
+            )
331
+
332
+        if has_plan:
333
+            return ModeDecision(
334
+                mode=WorkflowMode.EXECUTE,
335
+                reason="reusing existing plan artifacts",
336
+            )
337
+
338
+        ambiguity = self._ambiguity_score(task)
339
+        complexity = self._complexity_score(task)
340
+
341
+        if allow_clarify and not has_brief and ambiguity >= self.clarify_threshold:
342
+            return ModeDecision(
343
+                mode=WorkflowMode.CLARIFY,
344
+                reason="prompt is broad or missing boundaries",
345
+                ambiguity_score=ambiguity,
346
+                complexity_score=complexity,
347
+            )
348
+
349
+        if complexity >= self.plan_threshold:
350
+            return ModeDecision(
351
+                mode=WorkflowMode.PLAN,
352
+                reason="task looks complex enough to benefit from a persisted plan",
353
+                ambiguity_score=ambiguity,
354
+                complexity_score=complexity,
355
+            )
356
+
357
+        return ModeDecision(
358
+            mode=WorkflowMode.EXECUTE,
359
+            reason="task appears concrete enough for direct execution",
360
+            ambiguity_score=ambiguity,
361
+            complexity_score=complexity,
362
+        )
363
+
364
+    def _ambiguity_score(self, task: str) -> float:
365
+        lowered = task.lower()
366
+        words = re.findall(r"\w+", lowered)
367
+        score = 0.0
368
+
369
+        if (
370
+            "--clarify" in lowered
371
+            or "don't assume" in lowered
372
+            or "do not assume" in lowered
373
+            or "not sure" in lowered
374
+            or "figure out" in lowered
375
+            or "interview me" in lowered
376
+            or "ask me" in lowered
377
+            or lowered.startswith("clarify ")
378
+        ):
379
+            score += 0.65
380
+
381
+        if any(
382
+            phrase in lowered
383
+            for phrase in (
384
+                "something",
385
+                "somehow",
386
+                "better",
387
+                "improve",
388
+                "fix this",
389
+                "make it",
390
+                "more like",
391
+                "feels more like",
392
+            )
393
+        ):
394
+            score += 0.2
395
+
396
+        if not _has_concrete_anchor(task):
397
+            score += 0.2
398
+
399
+        if len(words) <= 12 and any(
400
+            verb in lowered
401
+            for verb in ("build", "add", "improve", "refactor", "implement")
402
+        ):
403
+            score += 0.15
404
+
405
+        return min(score, 1.0)
406
+
407
+    def _complexity_score(self, task: str) -> float:
408
+        lowered = task.lower()
409
+        words = re.findall(r"\w+", lowered)
410
+        score = 0.0
411
+
412
+        if len(words) >= 18:
413
+            score += 0.2
414
+        if len(words) >= 30:
415
+            score += 0.15
416
+
417
+        if any(
418
+            phrase in lowered
419
+            for phrase in (
420
+                "refactor",
421
+                "architecture",
422
+                "migrate",
423
+                "persistent",
424
+                "workflow",
425
+                "deep dive",
426
+                "report",
427
+                "implementation plan",
428
+                "verification plan",
429
+            )
430
+        ):
431
+            score += 0.3
432
+
433
+        if lowered.count(" and ") >= 2 or lowered.count(",") >= 2:
434
+            score += 0.15
435
+
436
+        if _has_concrete_anchor(task):
437
+            score += 0.1
438
+
439
+        return min(score, 1.0)
440
+
441
+
442
+def load_brief(path: Path) -> ClarifyBrief:
443
+    """Load a clarify brief from disk."""
444
+
445
+    return ClarifyBrief.from_markdown(path.read_text(), task_statement=path.stem)
446
+
447
+
448
+def load_planning_artifacts(
449
+    implementation_path: Path,
450
+    verification_path: Path,
451
+    *,
452
+    task_statement: str,
453
+) -> PlanningArtifacts:
454
+    """Load persisted planning artifacts from disk."""
455
+
456
+    combined = (
457
+        implementation_path.read_text().rstrip()
458
+        + "\n\n"
459
+        + VERIFICATION_SEPARATOR
460
+        + "\n\n"
461
+        + verification_path.read_text().rstrip()
462
+    )
463
+    return PlanningArtifacts.from_model_output(combined, task_statement=task_statement)
464
+
465
+
466
+def sync_todos_to_definition_of_done(
467
+    dod,
468
+    todos: list[dict[str, str]],
469
+) -> None:
470
+    """Reflect todo state into DoD pending/completed items."""
471
+
472
+    special_pending = [
473
+        item for item in dod.pending_items if item in {"Complete the requested work", "Collect verification evidence"}
474
+    ]
475
+    special_completed = [
476
+        item for item in dod.completed_items if item in {"Complete the requested work", "Collect verification evidence"}
477
+    ]
478
+
479
+    pending: list[str] = []
480
+    completed: list[str] = []
481
+    for item in todos:
482
+        status = str(item.get("status", "")).strip().lower()
483
+        label = str(
484
+            item.get("active_form") if status == "in_progress" else item.get("content", "")
485
+        ).strip()
486
+        if not label:
487
+            continue
488
+        if status == "completed":
489
+            completed.append(str(item.get("content", label)).strip())
490
+        else:
491
+            pending.append(label)
492
+
493
+    dod.pending_items = list(dict.fromkeys(pending + special_pending))
494
+    dod.completed_items = list(dict.fromkeys(completed + special_completed))
495
+
496
+
497
+def extract_verification_commands_from_markdown(markdown: str) -> list[str]:
498
+    """Extract verification commands from a verification-plan markdown document."""
499
+
500
+    sections = _parse_markdown_sections(markdown)
501
+    return _extract_commands(sections.get("verification_commands", []))
502
+
503
+
504
+def build_execute_bridge(
505
+    brief_path: Path | None,
506
+    implementation_path: Path | None,
507
+    verification_path: Path | None,
508
+) -> str | None:
509
+    """Build a compact execution bridge message from persisted artifacts."""
510
+
511
+    parts: list[str] = []
512
+    if brief_path and brief_path.exists():
513
+        parts.append(
514
+            "Use the clarify brief below as the requirements source of truth.\n\n"
515
+            + brief_path.read_text().strip()
516
+        )
517
+    if implementation_path and implementation_path.exists():
518
+        parts.append(
519
+            "Use the implementation plan below to sequence the work.\n\n"
520
+            + implementation_path.read_text().strip()
521
+        )
522
+    if verification_path and verification_path.exists():
523
+        parts.append(
524
+            "Use the verification plan below to determine done-ness.\n\n"
525
+            + verification_path.read_text().strip()
526
+        )
527
+    if not parts:
528
+        return None
529
+    return "\n\n".join(parts)
530
+
531
+
532
+def _split_plan_output(model_output: str) -> tuple[str, str]:
533
+    if VERIFICATION_SEPARATOR in model_output:
534
+        implementation, verification = model_output.split(VERIFICATION_SEPARATOR, maxsplit=1)
535
+        return implementation.strip(), verification.strip()
536
+    return model_output.strip(), ""
537
+
538
+
539
+def _ensure_heading(markdown: str, heading: str) -> str:
540
+    stripped = markdown.strip()
541
+    if not stripped:
542
+        return heading + "\n"
543
+    if stripped.startswith("#"):
544
+        return stripped + "\n"
545
+    return f"{heading}\n\n{stripped}\n"
546
+
547
+
548
+def _timestamp() -> str:
549
+    return datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ")
550
+
551
+
552
+def _normalize_heading(text: str) -> str:
553
+    cleaned = re.sub(r"[^a-z0-9]+", " ", text.lower()).strip()
554
+    return _SECTION_ALIASES.get(cleaned, cleaned.replace(" ", "_"))
555
+
556
+
557
+def _parse_markdown_sections(markdown: str) -> dict[str, list[str]]:
558
+    sections: dict[str, list[str]] = {}
559
+    current_key: str | None = None
560
+    for line in markdown.splitlines():
561
+        heading = re.match(r"^##+\s+(.+?)\s*$", line.strip())
562
+        if heading:
563
+            current_key = _normalize_heading(heading.group(1))
564
+            sections.setdefault(current_key, [])
565
+            continue
566
+        if current_key is None:
567
+            continue
568
+        sections[current_key].append(line.rstrip())
569
+    return {
570
+        key: _extract_items(lines)
571
+        for key, lines in sections.items()
572
+    }
573
+
574
+
575
+def _extract_items(lines: list[str]) -> list[str]:
576
+    items: list[str] = []
577
+    paragraph_buffer: list[str] = []
578
+    for line in lines:
579
+        stripped = line.strip()
580
+        if not stripped:
581
+            if paragraph_buffer:
582
+                items.append(" ".join(paragraph_buffer).strip())
583
+                paragraph_buffer.clear()
584
+            continue
585
+
586
+        bullet = re.match(r"^(?:[-*]|\d+\.)\s+(.+)$", stripped)
587
+        if bullet:
588
+            if paragraph_buffer:
589
+                items.append(" ".join(paragraph_buffer).strip())
590
+                paragraph_buffer.clear()
591
+            items.append(bullet.group(1).strip())
592
+            continue
593
+        paragraph_buffer.append(stripped)
594
+    if paragraph_buffer:
595
+        items.append(" ".join(paragraph_buffer).strip())
596
+    return [item for item in items if item]
597
+
598
+
599
+def _render_section(title: str, items: list[str]) -> list[str]:
600
+    lines = [f"## {title}"]
601
+    if items:
602
+        lines.extend(f"- {item}" for item in items)
603
+    else:
604
+        lines.append("- None recorded.")
605
+    lines.append("")
606
+    return lines
607
+
608
+
609
+def _first_item(items: list[str] | None) -> str | None:
610
+    if not items:
611
+        return None
612
+    return items[0]
613
+
614
+
615
+def _extract_commands(items: list[str]) -> list[str]:
616
+    commands: list[str] = []
617
+    for item in items:
618
+        match = re.match(r"^`(.+)`$", item)
619
+        commands.append((match.group(1) if match else item).strip())
620
+    return [command for command in commands if command]
621
+
622
+
623
+def _has_concrete_anchor(task: str) -> bool:
624
+    return any(
625
+        re.search(pattern, task)
626
+        for pattern in (
627
+            r"[./][\w./-]+",  # file path
628
+            r"#\d+",  # issue/pr number
629
+            r"\b[a-z]+[A-Z][A-Za-z0-9_]+\b",  # camelCase
630
+            r"\b[A-Z][a-z0-9]+[A-Z][A-Za-z0-9_]+\b",  # PascalCase symbol
631
+            r"\b[a-z0-9]+_[a-z0-9_]+\b",  # snake_case
632
+            r"```",  # code block
633
+            r"\bpytest\b|\bnpm test\b|\bcargo test\b|\bmypy\b|\bruff\b",
634
+            r"\bacceptance criteria\b",
635
+            r"\bTypeError\b|\bAssertionError\b|\bTraceback\b",
636
+        )
637
+    )
tests/test_workflow.pyadded
@@ -0,0 +1,177 @@
1
+"""Tests for Sprint 04 workflow routing and artifact persistence."""
2
+
3
+from __future__ import annotations
4
+
5
+from pathlib import Path
6
+
7
+from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done
8
+from loader.runtime.workflow import (
9
+    ClarifyBrief,
10
+    ModeRouter,
11
+    PlanningArtifacts,
12
+    WorkflowArtifactStore,
13
+    WorkflowMode,
14
+    build_execute_bridge,
15
+    extract_verification_commands_from_markdown,
16
+    sync_todos_to_definition_of_done,
17
+)
18
+
19
+
20
+def test_mode_router_routes_ambiguous_prompt_to_clarify() -> None:
21
+    router = ModeRouter()
22
+
23
+    decision = router.route("Improve Loader so it feels more like claw-code.")
24
+
25
+    assert decision.mode == WorkflowMode.CLARIFY
26
+    assert decision.ambiguity_score >= router.clarify_threshold
27
+
28
+
29
+def test_mode_router_routes_complex_prompt_to_plan() -> None:
30
+    router = ModeRouter()
31
+
32
+    decision = router.route(
33
+        "Implement a persistent workflow mode router with clarify artifacts, "
34
+        "planning artifacts, and verification-plan wiring in the runtime."
35
+    )
36
+
37
+    assert decision.mode == WorkflowMode.PLAN
38
+    assert decision.complexity_score >= router.plan_threshold
39
+
40
+
41
+def test_mode_router_routes_simple_prompt_to_execute() -> None:
42
+    router = ModeRouter()
43
+
44
+    decision = router.route("Read pyproject.toml and tell me the package name.")
45
+
46
+    assert decision.mode == WorkflowMode.EXECUTE
47
+
48
+
49
+def test_clarify_brief_round_trips_and_seeds_acceptance_criteria() -> None:
50
+    brief = ClarifyBrief.fallback(
51
+        task_statement="Clarify the authentication change.",
52
+        question="What outcome matters most?",
53
+        answer="Add login without touching the signup flow.",
54
+    )
55
+
56
+    loaded = ClarifyBrief.from_markdown(
57
+        brief.to_markdown(),
58
+        task_statement=brief.task_statement,
59
+        question=brief.question,
60
+        answer=brief.answer,
61
+    )
62
+
63
+    assert loaded.task_statement == brief.task_statement
64
+    assert "Add login" in loaded.acceptance_criteria[0]
65
+    assert loaded.non_goals
66
+
67
+
68
+def test_planning_artifacts_round_trip_and_extract_commands() -> None:
69
+    artifacts = PlanningArtifacts.from_model_output(
70
+        "\n".join(
71
+            [
72
+                "# Implementation Plan",
73
+                "",
74
+                "## Execution Order",
75
+                "1. Inspect auth files.",
76
+                "2. Implement the change.",
77
+                "",
78
+                "## Risks",
79
+                "- Regression in signup.",
80
+                "",
81
+                "<<<VERIFICATION>>>",
82
+                "",
83
+                "# Verification Plan",
84
+                "",
85
+                "## Acceptance Criteria",
86
+                "- Login works without changing signup.",
87
+                "",
88
+                "## Verification Commands",
89
+                "- `uv run pytest tests/test_auth.py -q`",
90
+                "- `uv run mypy src/loader`",
91
+            ]
92
+        ),
93
+        task_statement="Clarify and implement the auth change.",
94
+    )
95
+
96
+    assert artifacts.implementation_steps[:2] == [
97
+        "Inspect auth files.",
98
+        "Implement the change.",
99
+    ]
100
+    assert artifacts.acceptance_criteria == ["Login works without changing signup."]
101
+    assert artifacts.verification_commands == [
102
+        "uv run pytest tests/test_auth.py -q",
103
+        "uv run mypy src/loader",
104
+    ]
105
+    assert extract_verification_commands_from_markdown(artifacts.verification_markdown) == [
106
+        "uv run pytest tests/test_auth.py -q",
107
+        "uv run mypy src/loader",
108
+    ]
109
+
110
+
111
+def test_workflow_artifact_store_and_bridge_round_trip(tmp_path: Path) -> None:
112
+    store = WorkflowArtifactStore(tmp_path)
113
+    brief = ClarifyBrief.fallback(
114
+        task_statement="Clarify the runtime changes.",
115
+        question="What matters most?",
116
+        answer="Close the tool-use gap first.",
117
+    )
118
+    artifacts = PlanningArtifacts.fallback(task_statement=brief.task_statement)
119
+
120
+    brief_path = store.write_brief(brief.task_statement, brief)
121
+    implementation_path, verification_path = store.write_plan(
122
+        brief.task_statement,
123
+        artifacts,
124
+    )
125
+    bridge = build_execute_bridge(brief_path, implementation_path, verification_path)
126
+
127
+    assert brief_path.exists()
128
+    assert implementation_path.exists()
129
+    assert verification_path.exists()
130
+    assert bridge is not None
131
+    assert "Task Brief" in bridge
132
+    assert "Implementation Plan" in bridge
133
+    assert "Verification Plan" in bridge
134
+
135
+
136
+def test_definition_of_done_round_trip_preserves_workflow_links(tmp_path: Path) -> None:
137
+    store = DefinitionOfDoneStore(tmp_path)
138
+    dod = create_definition_of_done("Implement Loader workflow routing.")
139
+    dod.current_mode = "plan"
140
+    dod.mode_history = ["clarify", "plan"]
141
+    dod.clarify_brief = str(tmp_path / ".loader" / "briefs" / "brief.md")
142
+    dod.implementation_plan = str(tmp_path / ".loader" / "plans" / "impl.md")
143
+    dod.verification_plan = str(tmp_path / ".loader" / "plans" / "verify.md")
144
+
145
+    saved_path = store.save(dod)
146
+    reloaded = store.load(saved_path)
147
+
148
+    assert reloaded.current_mode == "plan"
149
+    assert reloaded.mode_history == ["clarify", "plan"]
150
+    assert reloaded.clarify_brief == dod.clarify_brief
151
+    assert reloaded.implementation_plan == dod.implementation_plan
152
+    assert reloaded.verification_plan == dod.verification_plan
153
+
154
+
155
+def test_sync_todos_to_definition_of_done_preserves_runtime_items() -> None:
156
+    dod = create_definition_of_done("Implement Loader workflow routing.")
157
+    dod.pending_items.append("Collect verification evidence")
158
+
159
+    sync_todos_to_definition_of_done(
160
+        dod,
161
+        [
162
+            {
163
+                "content": "Write router",
164
+                "active_form": "Writing router",
165
+                "status": "in_progress",
166
+            },
167
+            {
168
+                "content": "Update tests",
169
+                "active_form": "Updating tests",
170
+                "status": "completed",
171
+            },
172
+        ],
173
+    )
174
+
175
+    assert "Writing router" in dod.pending_items
176
+    assert "Collect verification evidence" in dod.pending_items
177
+    assert "Update tests" in dod.completed_items
tests/test_workflow_runtime.pyadded
@@ -0,0 +1,278 @@
1
+"""Runtime integration coverage for Sprint 04 workflow routing."""
2
+
3
+from __future__ import annotations
4
+
5
+from pathlib import Path
6
+
7
+import pytest
8
+
9
+from loader.agent.loop import AgentConfig
10
+from loader.llm.base import CompletionResponse, ToolCall
11
+from tests.helpers.runtime_harness import ScriptedBackend, run_scenario
12
+
13
+
14
+def non_streaming_config() -> AgentConfig:
15
+    """Shared config for deterministic workflow-mode runtime tests."""
16
+
17
+    return AgentConfig(auto_context=False, stream=False, max_iterations=8)
18
+
19
+
20
+def workflow_modes(run) -> list[str]:
21
+    """Return emitted workflow modes in order."""
22
+
23
+    return [
24
+        event.workflow_mode
25
+        for event in run.events
26
+        if event.type == "workflow_mode" and event.workflow_mode
27
+    ]
28
+
29
+
30
+def artifact_kinds(run) -> list[str]:
31
+    """Return emitted artifact kinds in order."""
32
+
33
+    return [
34
+        event.artifact_kind
35
+        for event in run.events
36
+        if event.type == "artifact" and event.artifact_kind
37
+    ]
38
+
39
+
40
+@pytest.mark.asyncio
41
+async def test_ambiguous_prompt_routes_to_clarify_and_persists_brief(
42
+    temp_dir: Path,
43
+) -> None:
44
+    backend = ScriptedBackend(
45
+        completions=[
46
+            CompletionResponse(
47
+                content="I need one clarification before I proceed.",
48
+                tool_calls=[
49
+                    ToolCall(
50
+                        id="ask-1",
51
+                        name="AskUserQuestion",
52
+                        arguments={
53
+                            "question": "What should stay out of scope for this Loader improvement?",
54
+                        },
55
+                    )
56
+                ],
57
+            ),
58
+            CompletionResponse(
59
+                content="\n".join(
60
+                    [
61
+                        "## Task Statement",
62
+                        "Improve Loader so it feels more like claw-code.",
63
+                        "",
64
+                        "## Desired Outcome",
65
+                        "- Make Loader more reliable without broad redesign.",
66
+                        "",
67
+                        "## In Scope",
68
+                        "- Tighten the runtime workflow around the user-facing goal.",
69
+                        "",
70
+                        "## Non Goals",
71
+                        "- Rebuild unrelated subsystems.",
72
+                        "",
73
+                        "## Decision Boundaries",
74
+                        "- Escalate before changing unrelated UX patterns.",
75
+                        "",
76
+                        "## Constraints",
77
+                        "- Stay within the current repository.",
78
+                        "",
79
+                        "## Likely Touchpoints",
80
+                        "- Runtime entry points and prompt behavior.",
81
+                        "",
82
+                        "## Assumptions",
83
+                        "- The user wants a narrow runtime-quality improvement.",
84
+                        "",
85
+                        "## Acceptance Criteria",
86
+                        "- The improvement stays focused on runtime behavior.",
87
+                    ]
88
+                )
89
+            ),
90
+            CompletionResponse(content="I have the brief and can move forward."),
91
+        ]
92
+    )
93
+
94
+    async def answer(question: str, options: list[str] | None) -> str:
95
+        assert "out of scope" in question.lower()
96
+        assert options is None
97
+        return "Do not redesign the whole interface."
98
+
99
+    run = await run_scenario(
100
+        "Improve Loader so it feels more like claw-code.",
101
+        backend,
102
+        config=non_streaming_config(),
103
+        project_root=temp_dir,
104
+        on_user_question=answer,
105
+    )
106
+
107
+    dod = run.agent.last_turn_summary.definition_of_done
108
+    assert dod is not None
109
+    assert workflow_modes(run)[:2] == ["clarify", "execute"]
110
+    assert artifact_kinds(run) == ["clarify_brief"]
111
+    assert dod.clarify_brief is not None
112
+    assert Path(dod.clarify_brief).exists()
113
+    assert "runtime behavior" in dod.acceptance_criteria[0].lower()
114
+    assert "## Clarify Mode" in backend.invocations[0].messages[0].content
115
+
116
+
117
+@pytest.mark.asyncio
118
+async def test_complex_prompt_routes_to_plan_and_uses_verification_artifact(
119
+    temp_dir: Path,
120
+) -> None:
121
+    target = temp_dir / "planned.txt"
122
+    backend = ScriptedBackend(
123
+        completions=[
124
+            CompletionResponse(
125
+                content="\n".join(
126
+                    [
127
+                        "# Implementation Plan",
128
+                        "",
129
+                        "## File Changes",
130
+                        f"- Create {target.name} in the workspace root.",
131
+                        "",
132
+                        "## Execution Order",
133
+                        f"1. Write {target.name}.",
134
+                        "2. Confirm the file exists.",
135
+                        "",
136
+                        "## Risks",
137
+                        "- Writing the wrong file path.",
138
+                        "",
139
+                        "<<<VERIFICATION>>>",
140
+                        "",
141
+                        "# Verification Plan",
142
+                        "",
143
+                        "## Acceptance Criteria",
144
+                        f"- {target.name} exists in the workspace root.",
145
+                        "",
146
+                        "## Verification Commands",
147
+                        f"- `test -f {target}`",
148
+                        "",
149
+                        "## Notes",
150
+                        "- Use a deterministic file existence check.",
151
+                    ]
152
+                )
153
+            ),
154
+            CompletionResponse(
155
+                content="I'll create the file now.",
156
+                tool_calls=[
157
+                    ToolCall(
158
+                        id="write-1",
159
+                        name="write",
160
+                        arguments={
161
+                            "file_path": str(target),
162
+                            "content": "planned output\n",
163
+                        },
164
+                    )
165
+                ],
166
+            ),
167
+            CompletionResponse(content="The file is in place."),
168
+        ]
169
+    )
170
+
171
+    run = await run_scenario(
172
+        "Implement a persistent workflow mode router with clarify artifacts, "
173
+        "planning artifacts, and verification-plan wiring in the runtime.",
174
+        backend,
175
+        config=non_streaming_config(),
176
+        project_root=temp_dir,
177
+    )
178
+
179
+    dod = run.agent.last_turn_summary.definition_of_done
180
+    assert dod is not None
181
+    assert workflow_modes(run)[:3] == ["plan", "execute", "verify"]
182
+    assert artifact_kinds(run) == ["implementation_plan", "verification_plan"]
183
+    assert dod.implementation_plan is not None
184
+    assert dod.verification_plan is not None
185
+    assert Path(dod.implementation_plan).exists()
186
+    assert Path(dod.verification_plan).exists()
187
+    assert dod.verification_commands == [f"test -f {target}"]
188
+    assert "## Plan Mode" in backend.invocations[0].messages[0].content
189
+    verify_calls = [
190
+        event
191
+        for event in run.events
192
+        if event.type == "tool_call" and event.phase == "verification"
193
+    ]
194
+    assert [event.tool_args["command"] for event in verify_calls] == [f"test -f {target}"]
195
+
196
+
197
+@pytest.mark.asyncio
198
+async def test_verify_failure_returns_to_execute_without_retriggering_plan(
199
+    temp_dir: Path,
200
+) -> None:
201
+    target = temp_dir / "retry.txt"
202
+    backend = ScriptedBackend(
203
+        completions=[
204
+            CompletionResponse(
205
+                content="\n".join(
206
+                    [
207
+                        "# Implementation Plan",
208
+                        "",
209
+                        "## File Changes",
210
+                        f"- Create {target.name}.",
211
+                        "",
212
+                        "## Execution Order",
213
+                        f"1. Write {target.name}.",
214
+                        "2. Fix it if verification fails.",
215
+                        "",
216
+                        "## Risks",
217
+                        "- Initial content may be wrong.",
218
+                        "",
219
+                        "<<<VERIFICATION>>>",
220
+                        "",
221
+                        "# Verification Plan",
222
+                        "",
223
+                        "## Acceptance Criteria",
224
+                        "- The file contains the word fixed.",
225
+                        "",
226
+                        "## Verification Commands",
227
+                        f"- `grep -q fixed {target}`",
228
+                        "",
229
+                        "## Notes",
230
+                        "- Retry if the first write misses the target string.",
231
+                    ]
232
+                )
233
+            ),
234
+            CompletionResponse(
235
+                content="I'll write the first draft.",
236
+                tool_calls=[
237
+                    ToolCall(
238
+                        id="write-1",
239
+                        name="write",
240
+                        arguments={
241
+                            "file_path": str(target),
242
+                            "content": "draft output\n",
243
+                        },
244
+                    )
245
+                ],
246
+            ),
247
+            CompletionResponse(content="First draft is written."),
248
+            CompletionResponse(
249
+                content="I'll correct the file.",
250
+                tool_calls=[
251
+                    ToolCall(
252
+                        id="write-2",
253
+                        name="write",
254
+                        arguments={
255
+                            "file_path": str(target),
256
+                            "content": "fixed output\n",
257
+                        },
258
+                    )
259
+                ],
260
+            ),
261
+            CompletionResponse(content="The file now contains the fixed output."),
262
+        ]
263
+    )
264
+
265
+    run = await run_scenario(
266
+        "Implement a persistent workflow mode router with clarify artifacts, "
267
+        "planning artifacts, and verification-plan wiring in the runtime.",
268
+        backend,
269
+        config=non_streaming_config(),
270
+        project_root=temp_dir,
271
+    )
272
+
273
+    modes = workflow_modes(run)
274
+    assert modes.count("plan") == 1
275
+    assert modes.count("clarify") == 0
276
+    assert modes.count("execute") >= 2
277
+    assert modes.count("verify") >= 2
278
+    assert "fixed output" in target.read_text()
tests/test_workflow_runtime_tools.pymodified
@@ -12,7 +12,12 @@ from tests.helpers.runtime_harness import ScriptedBackend, run_scenario
1212
 def non_streaming_config() -> AgentConfig:
1313
     """Shared deterministic config for runtime tool tests."""
1414
 
15
-    return AgentConfig(auto_context=False, stream=False, max_iterations=4)
15
+    return AgentConfig(
16
+        auto_context=False,
17
+        stream=False,
18
+        max_iterations=4,
19
+        workflow_mode_override="execute",
20
+    )
1621
 
1722
 
1823
 async def _answer(question: str, options: list[str] | None) -> str: