Add routed workflow artifacts to runtime
- SHA
de5f6787b869038db1ff2cb7442d0c428869538c- Parents
-
8713208 - Tree
e9e3480
de5f678
de5f6787b869038db1ff2cb7442d0c428869538c8713208
e9e3480| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/agent/loop.py
|
30 | 46 |
| M |
src/loader/agent/prompts.py
|
65 | 18 |
| M |
src/loader/runtime/conversation.py
|
506 | 0 |
| M |
src/loader/runtime/dod.py
|
10 | 0 |
| M |
src/loader/runtime/events.py
|
4 | 0 |
| A |
src/loader/runtime/workflow.py
|
637 | 0 |
| A |
tests/test_workflow.py
|
177 | 0 |
| A |
tests/test_workflow_runtime.py
|
278 | 0 |
| M |
tests/test_workflow_runtime_tools.py
|
6 | 1 |
src/loader/agent/loop.pymodified@@ -13,6 +13,7 @@ from ..runtime.conversation import ConversationRuntime | ||
| 13 | 13 | from ..runtime.events import AgentEvent, TurnSummary |
| 14 | 14 | from ..runtime.permissions import PermissionMode, build_permission_policy |
| 15 | 15 | from ..runtime.session import ConversationSession |
| 16 | +from ..runtime.workflow import WorkflowMode | |
| 16 | 17 | from ..tools.base import ToolRegistry, create_default_registry |
| 17 | 18 | from .planner import ( |
| 18 | 19 | PLANNING_PROMPT, |
@@ -89,6 +90,7 @@ class AgentConfig: | ||
| 89 | 90 | max_recovery_attempts: int = 2 # Reduced from 3 |
| 90 | 91 | verification_retry_budget: int = 3 # Retry budget for verify/fix loop |
| 91 | 92 | permission_mode: PermissionMode = PermissionMode.WORKSPACE_WRITE |
| 93 | + workflow_mode_override: str | None = None | |
| 92 | 94 | stream: bool = True # Stream LLM responses for real-time output |
| 93 | 95 | |
| 94 | 96 | # Reasoning stages configuration |
@@ -126,6 +128,7 @@ class Agent: | ||
| 126 | 128 | messages=self.messages, |
| 127 | 129 | ) |
| 128 | 130 | self._system_message: Message | None = None |
| 131 | + self.workflow_mode = WorkflowMode.EXECUTE.value | |
| 129 | 132 | self._use_react: bool | None = None |
| 130 | 133 | self.capability_profile = resolve_backend_capability_profile(self.backend) |
| 131 | 134 | self.last_turn_summary: TurnSummary | None = None |
@@ -204,6 +207,7 @@ class Agent: | ||
| 204 | 207 | tools=tool_schemas, |
| 205 | 208 | use_react=self.use_react, |
| 206 | 209 | project_context=self.project_context, |
| 210 | + workflow_mode=self.workflow_mode, | |
| 207 | 211 | ) |
| 208 | 212 | self._system_message = Message( |
| 209 | 213 | role=Role.SYSTEM, |
@@ -211,6 +215,14 @@ class Agent: | ||
| 211 | 215 | ) |
| 212 | 216 | return self._system_message |
| 213 | 217 | |
| 218 | + def set_workflow_mode(self, workflow_mode: str) -> None: | |
| 219 | + """Update the active workflow mode used by the system prompt.""" | |
| 220 | + | |
| 221 | + if workflow_mode == self.workflow_mode: | |
| 222 | + return | |
| 223 | + self.workflow_mode = workflow_mode | |
| 224 | + self._system_message = None | |
| 225 | + | |
| 214 | 226 | def _build_messages(self) -> list[Message]: |
| 215 | 227 | """Build the full message list for the LLM.""" |
| 216 | 228 | return self.session.build_request_messages() |
@@ -553,52 +565,6 @@ class Agent: | ||
| 553 | 565 | else: |
| 554 | 566 | return f"Task partially completed. {decomposition.to_prompt()}" |
| 555 | 567 | |
| 556 | - # Check if we should use planning | |
| 557 | - should_use_plan = use_plan | |
| 558 | - if should_use_plan is None and self.config.auto_plan: | |
| 559 | - await emit(AgentEvent(type="thinking")) | |
| 560 | - should_use_plan = await self._should_plan(user_message) | |
| 561 | - | |
| 562 | - # If planning, create and execute plan | |
| 563 | - if should_use_plan: | |
| 564 | - plan = await self._create_plan(user_message) | |
| 565 | - if plan.steps: | |
| 566 | - await emit(AgentEvent(type="plan", content=plan.to_prompt())) | |
| 567 | - | |
| 568 | - # Execute each step | |
| 569 | - while not plan.is_complete(): | |
| 570 | - step = plan.next_step() | |
| 571 | - if not step: | |
| 572 | - break | |
| 573 | - | |
| 574 | - await emit(AgentEvent( | |
| 575 | - type="step", | |
| 576 | - step_info=f"{plan.progress_str()} {step.description}", | |
| 577 | - )) | |
| 578 | - | |
| 579 | - # Run the step | |
| 580 | - step_prompt = format_step_prompt(plan, step) | |
| 581 | - await self._run_inner( | |
| 582 | - step_prompt, | |
| 583 | - emit, | |
| 584 | - on_confirmation, | |
| 585 | - on_user_question=on_user_question, | |
| 586 | - original_task=self._current_task, | |
| 587 | - ) | |
| 588 | - | |
| 589 | - plan.complete_current() | |
| 590 | - | |
| 591 | - # Final summary | |
| 592 | - self.messages.append(Message(role=Role.USER, content=user_message)) | |
| 593 | - summary_prompt = f"I've completed the plan. Summarize what was done:\n{plan.to_prompt()}" | |
| 594 | - return await self._run_inner( | |
| 595 | - summary_prompt, | |
| 596 | - emit, | |
| 597 | - on_confirmation, | |
| 598 | - on_user_question=on_user_question, | |
| 599 | - original_task=self._current_task, | |
| 600 | - ) | |
| 601 | - | |
| 602 | 568 | # No planning or decomposition - run directly |
| 603 | 569 | self.messages.append(Message(role=Role.USER, content=user_message)) |
| 604 | 570 | return await self._run_inner( |
@@ -606,6 +572,7 @@ class Agent: | ||
| 606 | 572 | emit, |
| 607 | 573 | on_confirmation, |
| 608 | 574 | on_user_question=on_user_question, |
| 575 | + requested_mode=self._requested_workflow_mode(use_plan), | |
| 609 | 576 | original_task=self._current_task, |
| 610 | 577 | ) |
| 611 | 578 | |
@@ -615,6 +582,7 @@ class Agent: | ||
| 615 | 582 | emit: Callable[[AgentEvent], Awaitable[None]], |
| 616 | 583 | on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None = None, |
| 617 | 584 | on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None = None, |
| 585 | + requested_mode: str | None = None, | |
| 618 | 586 | original_task: str | None = None, |
| 619 | 587 | ) -> str: |
| 620 | 588 | """Inner execution loop without planning.""" |
@@ -625,10 +593,24 @@ class Agent: | ||
| 625 | 593 | emit, |
| 626 | 594 | on_confirmation=on_confirmation, |
| 627 | 595 | on_user_question=on_user_question, |
| 596 | + requested_mode=requested_mode, | |
| 628 | 597 | original_task=original_task, |
| 629 | 598 | ) |
| 630 | 599 | return self.last_turn_summary.final_response |
| 631 | 600 | |
| 601 | + def _requested_workflow_mode(self, use_plan: bool | None) -> str | None: | |
| 602 | + """Resolve the explicit workflow-mode override for the current turn.""" | |
| 603 | + | |
| 604 | + if use_plan is True: | |
| 605 | + return WorkflowMode.PLAN.value | |
| 606 | + if use_plan is False: | |
| 607 | + return WorkflowMode.EXECUTE.value | |
| 608 | + if self.config.workflow_mode_override: | |
| 609 | + return self.config.workflow_mode_override | |
| 610 | + if self.config.auto_plan: | |
| 611 | + return WorkflowMode.PLAN.value | |
| 612 | + return None | |
| 613 | + | |
| 632 | 614 | async def run_streaming( |
| 633 | 615 | self, |
| 634 | 616 | user_message: str, |
@@ -1001,4 +983,6 @@ class Agent: | ||
| 1001 | 983 | self._recovery_context = None |
| 1002 | 984 | self._current_task = None |
| 1003 | 985 | self.last_turn_summary = None |
| 986 | + self.workflow_mode = WorkflowMode.EXECUTE.value | |
| 987 | + self._system_message = None | |
| 1004 | 988 | self.safeguards.reset() # Reset all runtime safeguards |
src/loader/agent/prompts.pymodified@@ -1,7 +1,7 @@ | ||
| 1 | 1 | """Prompt templates for the agent.""" |
| 2 | 2 | |
| 3 | 3 | import os |
| 4 | -from typing import Any, TYPE_CHECKING | |
| 4 | +from typing import TYPE_CHECKING, Any | |
| 5 | 5 | |
| 6 | 6 | if TYPE_CHECKING: |
| 7 | 7 | from ..context.project import ProjectContext |
@@ -145,17 +145,43 @@ def format_tool_descriptions(tools: list[dict[str, Any]]) -> str: | ||
| 145 | 145 | return "\n\n".join(lines) |
| 146 | 146 | |
| 147 | 147 | |
| 148 | +MODE_GUIDANCE = { | |
| 149 | + "clarify": """ | |
| 150 | +## Clarify Mode | |
| 151 | +- Ask exactly one focused question with `AskUserQuestion` | |
| 152 | +- Clarify intent, outcome, scope, or boundaries before proposing solutions | |
| 153 | +- Do not start coding or writing patch plans yet | |
| 154 | +- Keep the question high-leverage and brief | |
| 155 | +""", | |
| 156 | + "plan": """ | |
| 157 | +## Plan Mode | |
| 158 | +- Produce persistent implementation and verification planning artifacts | |
| 159 | +- Do not start writing code in this mode | |
| 160 | +- Be explicit about file touchpoints, order of work, risks, acceptance criteria, and verification commands | |
| 161 | +- Prefer concrete, repository-grounded plans over generic checklists | |
| 162 | +""", | |
| 163 | + "execute": """ | |
| 164 | +## Execute Mode | |
| 165 | +- Use tools directly to perform the task | |
| 166 | +- Read relevant files before editing them | |
| 167 | +- Keep `TodoWrite` current for multi-step work when progress tracking matters | |
| 168 | +- Concise reporting is fine, and numbered lists are allowed when they communicate plan or evidence clearly | |
| 169 | +""", | |
| 170 | + "verify": """ | |
| 171 | +## Verify Mode | |
| 172 | +- Run the planned verification commands and capture evidence | |
| 173 | +- Do not declare the task complete while any verification step is failing | |
| 174 | +- Report concrete pass/fail evidence rather than vague confidence | |
| 175 | +""", | |
| 176 | +} | |
| 177 | + | |
| 178 | + | |
| 148 | 179 | SYSTEM_PROMPT = """You are Loader, an AI coding agent. |
| 149 | 180 | |
| 150 | 181 | Current directory: {cwd} |
| 151 | 182 | |
| 152 | -## Tools | |
| 153 | -- bash: Run shell commands | |
| 154 | -- write: Create files | |
| 155 | -- read: Read files | |
| 156 | -- edit: Modify files | |
| 157 | -- glob: Find files | |
| 158 | -- grep: Search in files | |
| 183 | +## Tools Available | |
| 184 | +{tool_descriptions} | |
| 159 | 185 | |
| 160 | 186 | ## How to Use Tools |
| 161 | 187 | Output a tool call in this format: |
@@ -166,12 +192,19 @@ Output a tool call in this format: | ||
| 166 | 192 | [write: file_path="hello.py", content="print('hello')"] |
| 167 | 193 | [read: file_path="config.json"] |
| 168 | 194 | [edit: file_path="app.py", old_string="old", new_string="new"] |
| 195 | +[TodoWrite: todos=[{{content="Run tests", active_form="Running tests", status="in_progress"}}]] | |
| 196 | +[AskUserQuestion: question="Which path matters more?", options=["Speed", "Correctness"]] | |
| 197 | + | |
| 198 | +## Active Workflow Mode | |
| 199 | +{workflow_mode} | |
| 200 | + | |
| 201 | +{mode_guidance} | |
| 169 | 202 | |
| 170 | 203 | ## Rules |
| 171 | -1. Use tools immediately - don't explain first | |
| 172 | -2. No code blocks (```) - use the write tool instead | |
| 173 | -3. No numbered steps - just do the task | |
| 174 | -4. Read files before editing them | |
| 204 | +1. Follow the active workflow mode rather than improvising a different one | |
| 205 | +2. Use tools or concise prose directly instead of narrating fake tool use | |
| 206 | +3. Use the write tool for files rather than pasting long code blocks | |
| 207 | +4. Keep responses grounded in repository evidence and verification output | |
| 175 | 208 | """ |
| 176 | 209 | |
| 177 | 210 | |
@@ -200,11 +233,16 @@ Current directory: {cwd} | ||
| 200 | 233 | {{"name": "read", "arguments": {{"file_path": "config.json"}}}} |
| 201 | 234 | </tool_call> |
| 202 | 235 | |
| 236 | +## Active Workflow Mode | |
| 237 | +{workflow_mode} | |
| 238 | + | |
| 239 | +{mode_guidance} | |
| 240 | + | |
| 203 | 241 | ## Rules |
| 204 | -1. Use tools immediately - don't explain first | |
| 205 | -2. No code blocks - use the write tool instead | |
| 206 | -3. No numbered steps - just do the task | |
| 207 | -4. Read files before editing them | |
| 242 | +1. Follow the active workflow mode rather than improvising a different one | |
| 243 | +2. Use tools or concise prose directly instead of narrating fake tool use | |
| 244 | +3. Use the write tool for files rather than pasting long code blocks | |
| 245 | +4. Keep responses grounded in repository evidence and verification output | |
| 208 | 246 | """ |
| 209 | 247 | |
| 210 | 248 | |
@@ -212,6 +250,7 @@ def build_system_prompt( | ||
| 212 | 250 | tools: list[dict[str, Any]], |
| 213 | 251 | use_react: bool = False, |
| 214 | 252 | project_context: "str | ProjectContext | None" = None, |
| 253 | + workflow_mode: str = "execute", | |
| 215 | 254 | ) -> str: |
| 216 | 255 | """Build the system prompt with tool descriptions. |
| 217 | 256 | |
@@ -224,15 +263,23 @@ def build_system_prompt( | ||
| 224 | 263 | Formatted system prompt |
| 225 | 264 | """ |
| 226 | 265 | cwd = os.getcwd() |
| 266 | + tool_descriptions = format_tool_descriptions(tools) | |
| 267 | + mode_guidance = MODE_GUIDANCE.get(workflow_mode, MODE_GUIDANCE["execute"]) | |
| 227 | 268 | |
| 228 | 269 | if use_react: |
| 229 | - tool_descriptions = format_tool_descriptions(tools) | |
| 230 | 270 | prompt = REACT_SYSTEM_PROMPT.format( |
| 231 | 271 | cwd=cwd, |
| 232 | 272 | tool_descriptions=tool_descriptions, |
| 273 | + workflow_mode=workflow_mode, | |
| 274 | + mode_guidance=mode_guidance, | |
| 233 | 275 | ) |
| 234 | 276 | else: |
| 235 | - prompt = SYSTEM_PROMPT.format(cwd=cwd) | |
| 277 | + prompt = SYSTEM_PROMPT.format( | |
| 278 | + cwd=cwd, | |
| 279 | + tool_descriptions=tool_descriptions, | |
| 280 | + workflow_mode=workflow_mode, | |
| 281 | + mode_guidance=mode_guidance, | |
| 282 | + ) | |
| 236 | 283 | |
| 237 | 284 | # Add project context if available |
| 238 | 285 | if project_context: |
src/loader/runtime/conversation.pymodified@@ -2,8 +2,10 @@ | ||
| 2 | 2 | |
| 3 | 3 | from __future__ import annotations |
| 4 | 4 | |
| 5 | +import re | |
| 5 | 6 | from collections.abc import Awaitable, Callable |
| 6 | 7 | from dataclasses import dataclass, field |
| 8 | +from pathlib import Path | |
| 7 | 9 | from typing import Any |
| 8 | 10 | |
| 9 | 11 | from ..agent.parsing import parse_tool_calls |
@@ -30,6 +32,17 @@ from .events import AgentEvent, TurnSummary | ||
| 30 | 32 | from .executor import ToolExecutionState, ToolExecutor |
| 31 | 33 | from .hooks import build_default_tool_hooks |
| 32 | 34 | from .tracing import RuntimeTracer |
| 35 | +from .workflow import ( | |
| 36 | + ClarifyBrief, | |
| 37 | + ModeRouter, | |
| 38 | + PlanningArtifacts, | |
| 39 | + VERIFICATION_SEPARATOR, | |
| 40 | + WorkflowArtifactStore, | |
| 41 | + WorkflowMode, | |
| 42 | + build_execute_bridge, | |
| 43 | + extract_verification_commands_from_markdown, | |
| 44 | + sync_todos_to_definition_of_done, | |
| 45 | +) | |
| 33 | 46 | |
| 34 | 47 | EventSink = Callable[[AgentEvent], Awaitable[None]] |
| 35 | 48 | ConfirmationHandler = Callable[[str, str, str], Awaitable[bool]] | None |
@@ -63,6 +76,8 @@ class ConversationRuntime: | ||
| 63 | 76 | self.tracer = RuntimeTracer() |
| 64 | 77 | self.executor: ToolExecutor | None = None |
| 65 | 78 | self.dod_store = DefinitionOfDoneStore(agent.project_root) |
| 79 | + self.router = ModeRouter() | |
| 80 | + self.artifact_store = WorkflowArtifactStore(agent.project_root) | |
| 66 | 81 | |
| 67 | 82 | async def run_turn( |
| 68 | 83 | self, |
@@ -70,6 +85,7 @@ class ConversationRuntime: | ||
| 70 | 85 | emit: EventSink, |
| 71 | 86 | on_confirmation: ConfirmationHandler = None, |
| 72 | 87 | on_user_question: UserQuestionHandler = None, |
| 88 | + requested_mode: str | None = None, | |
| 73 | 89 | original_task: str | None = None, |
| 74 | 90 | ) -> TurnSummary: |
| 75 | 91 | """Run one task turn and return a structured summary.""" |
@@ -110,6 +126,16 @@ class ConversationRuntime: | ||
| 110 | 126 | summary.definition_of_done = dod |
| 111 | 127 | await self._emit_dod_status(emit, dod) |
| 112 | 128 | |
| 129 | + task = await self._prepare_workflow( | |
| 130 | + task=task, | |
| 131 | + dod=dod, | |
| 132 | + emit=emit, | |
| 133 | + summary=summary, | |
| 134 | + on_confirmation=on_confirmation, | |
| 135 | + on_user_question=on_user_question, | |
| 136 | + requested_mode=requested_mode, | |
| 137 | + ) | |
| 138 | + | |
| 113 | 139 | while iterations < self.agent.config.max_iterations: |
| 114 | 140 | iterations += 1 |
| 115 | 141 | summary.iterations = iterations |
@@ -315,6 +341,13 @@ class ConversationRuntime: | ||
| 315 | 341 | |
| 316 | 342 | if outcome.state == ToolExecutionState.EXECUTED and not outcome.is_error: |
| 317 | 343 | record_successful_tool_call(dod, tool_call) |
| 344 | + if ( | |
| 345 | + tool_call.name == "TodoWrite" | |
| 346 | + and outcome.registry_result is not None | |
| 347 | + ): | |
| 348 | + new_todos = outcome.registry_result.metadata.get("new_todos", []) | |
| 349 | + if isinstance(new_todos, list): | |
| 350 | + sync_todos_to_definition_of_done(dod, new_todos) | |
| 318 | 351 | self.dod_store.save(dod) |
| 319 | 352 | self.agent._recovery_context = None |
| 320 | 353 | is_loop, loop_description = self.agent.safeguards.detect_loop() |
@@ -730,6 +763,433 @@ class ConversationRuntime: | ||
| 730 | 763 | is_error=True, |
| 731 | 764 | ) |
| 732 | 765 | |
| 766 | + async def _prepare_workflow( | |
| 767 | + self, | |
| 768 | + *, | |
| 769 | + task: str, | |
| 770 | + dod: DefinitionOfDone, | |
| 771 | + emit: EventSink, | |
| 772 | + summary: TurnSummary, | |
| 773 | + on_confirmation: ConfirmationHandler, | |
| 774 | + on_user_question: UserQuestionHandler, | |
| 775 | + requested_mode: str | None, | |
| 776 | + ) -> str: | |
| 777 | + requested = WorkflowMode.from_str(requested_mode) | |
| 778 | + decision = self.router.route( | |
| 779 | + task, | |
| 780 | + requested_mode=requested, | |
| 781 | + has_brief=self._artifact_exists(dod.clarify_brief), | |
| 782 | + has_plan=self._artifact_exists(dod.implementation_plan) | |
| 783 | + and self._artifact_exists(dod.verification_plan), | |
| 784 | + ) | |
| 785 | + await self._set_workflow_mode( | |
| 786 | + decision.mode, | |
| 787 | + dod=dod, | |
| 788 | + emit=emit, | |
| 789 | + summary=summary, | |
| 790 | + reason=decision.reason, | |
| 791 | + ) | |
| 792 | + | |
| 793 | + if decision.mode == WorkflowMode.CLARIFY: | |
| 794 | + await self._run_clarify_mode( | |
| 795 | + task=task, | |
| 796 | + dod=dod, | |
| 797 | + emit=emit, | |
| 798 | + summary=summary, | |
| 799 | + on_user_question=on_user_question, | |
| 800 | + ) | |
| 801 | + decision = self.router.route( | |
| 802 | + task, | |
| 803 | + has_brief=self._artifact_exists(dod.clarify_brief), | |
| 804 | + has_plan=self._artifact_exists(dod.implementation_plan) | |
| 805 | + and self._artifact_exists(dod.verification_plan), | |
| 806 | + allow_clarify=False, | |
| 807 | + ) | |
| 808 | + await self._set_workflow_mode( | |
| 809 | + decision.mode, | |
| 810 | + dod=dod, | |
| 811 | + emit=emit, | |
| 812 | + summary=summary, | |
| 813 | + reason=f"clarify handoff: {decision.reason}", | |
| 814 | + ) | |
| 815 | + | |
| 816 | + if decision.mode == WorkflowMode.PLAN: | |
| 817 | + await self._run_plan_mode( | |
| 818 | + task=task, | |
| 819 | + dod=dod, | |
| 820 | + emit=emit, | |
| 821 | + summary=summary, | |
| 822 | + on_confirmation=on_confirmation, | |
| 823 | + on_user_question=on_user_question, | |
| 824 | + ) | |
| 825 | + await self._set_workflow_mode( | |
| 826 | + WorkflowMode.EXECUTE, | |
| 827 | + dod=dod, | |
| 828 | + emit=emit, | |
| 829 | + summary=summary, | |
| 830 | + reason="plan artifacts created; switching to execute", | |
| 831 | + ) | |
| 832 | + | |
| 833 | + bridge = build_execute_bridge( | |
| 834 | + Path(dod.clarify_brief) if dod.clarify_brief else None, | |
| 835 | + Path(dod.implementation_plan) if dod.implementation_plan else None, | |
| 836 | + Path(dod.verification_plan) if dod.verification_plan else None, | |
| 837 | + ) | |
| 838 | + if bridge and not any( | |
| 839 | + message.role == Role.USER and "[WORKFLOW BRIDGE]" in message.content | |
| 840 | + for message in self.agent.messages[-4:] | |
| 841 | + ): | |
| 842 | + self.agent.session.append( | |
| 843 | + Message( | |
| 844 | + role=Role.USER, | |
| 845 | + content=( | |
| 846 | + "[WORKFLOW BRIDGE]\n" | |
| 847 | + f"{bridge}\n\n" | |
| 848 | + "Honor these artifacts while you execute the task. " | |
| 849 | + "Keep TodoWrite current when the work spans multiple steps." | |
| 850 | + ), | |
| 851 | + ) | |
| 852 | + ) | |
| 853 | + return task | |
| 854 | + | |
| 855 | + async def _set_workflow_mode( | |
| 856 | + self, | |
| 857 | + mode: WorkflowMode, | |
| 858 | + *, | |
| 859 | + dod: DefinitionOfDone, | |
| 860 | + emit: EventSink, | |
| 861 | + summary: TurnSummary, | |
| 862 | + reason: str, | |
| 863 | + ) -> None: | |
| 864 | + self.agent.set_workflow_mode(mode.value) | |
| 865 | + dod.current_mode = mode.value | |
| 866 | + if not dod.mode_history or dod.mode_history[-1] != mode.value: | |
| 867 | + dod.mode_history.append(mode.value) | |
| 868 | + summary.workflow_mode = mode.value | |
| 869 | + summary.definition_of_done = dod | |
| 870 | + self.dod_store.save(dod) | |
| 871 | + await emit( | |
| 872 | + AgentEvent( | |
| 873 | + type="workflow_mode", | |
| 874 | + content=f"Workflow: {mode.value} ({reason})", | |
| 875 | + workflow_mode=mode.value, | |
| 876 | + definition_of_done=dod, | |
| 877 | + ) | |
| 878 | + ) | |
| 879 | + | |
| 880 | + async def _emit_artifact( | |
| 881 | + self, | |
| 882 | + *, | |
| 883 | + emit: EventSink, | |
| 884 | + kind: str, | |
| 885 | + path: Path, | |
| 886 | + preview: str, | |
| 887 | + ) -> None: | |
| 888 | + await emit( | |
| 889 | + AgentEvent( | |
| 890 | + type="artifact", | |
| 891 | + content=preview, | |
| 892 | + artifact_kind=kind, | |
| 893 | + artifact_path=str(path), | |
| 894 | + ) | |
| 895 | + ) | |
| 896 | + | |
| 897 | + async def _complete_in_mode( | |
| 898 | + self, | |
| 899 | + *, | |
| 900 | + prompt: str, | |
| 901 | + tools: list[dict[str, Any]] | None, | |
| 902 | + max_tokens: int, | |
| 903 | + temperature: float = 0.2, | |
| 904 | + ): | |
| 905 | + return await self.agent.backend.complete( | |
| 906 | + messages=self.agent.session.build_request_messages() | |
| 907 | + + [Message(role=Role.USER, content=prompt)], | |
| 908 | + tools=tools, | |
| 909 | + temperature=temperature, | |
| 910 | + max_tokens=max_tokens, | |
| 911 | + ) | |
| 912 | + | |
| 913 | + async def _run_clarify_mode( | |
| 914 | + self, | |
| 915 | + *, | |
| 916 | + task: str, | |
| 917 | + dod: DefinitionOfDone, | |
| 918 | + emit: EventSink, | |
| 919 | + summary: TurnSummary, | |
| 920 | + on_user_question: UserQuestionHandler, | |
| 921 | + ) -> None: | |
| 922 | + ask_tool = self.agent.registry.get("AskUserQuestion") | |
| 923 | + assert ask_tool is not None | |
| 924 | + prompt = ( | |
| 925 | + "Clarify the task before planning or implementation.\n" | |
| 926 | + "Ask exactly one focused question with AskUserQuestion.\n" | |
| 927 | + "Target missing outcome, scope, or decision-boundary information.\n" | |
| 928 | + "Do not propose solutions yet.\n\n" | |
| 929 | + f"Task: {task}" | |
| 930 | + ) | |
| 931 | + response = await self._complete_in_mode( | |
| 932 | + prompt=prompt, | |
| 933 | + tools=[ask_tool.to_schema()], | |
| 934 | + max_tokens=300, | |
| 935 | + ) | |
| 936 | + tool_call = next( | |
| 937 | + ( | |
| 938 | + tool | |
| 939 | + for tool in response.tool_calls | |
| 940 | + if tool.name == "AskUserQuestion" | |
| 941 | + ), | |
| 942 | + None, | |
| 943 | + ) | |
| 944 | + if tool_call is None: | |
| 945 | + tool_call = ToolCall( | |
| 946 | + id="clarify-question-1", | |
| 947 | + name="AskUserQuestion", | |
| 948 | + arguments={ | |
| 949 | + "question": self._fallback_clarify_question(task, response.content), | |
| 950 | + }, | |
| 951 | + ) | |
| 952 | + | |
| 953 | + assistant_message = Message( | |
| 954 | + role=Role.ASSISTANT, | |
| 955 | + content=response.content or tool_call.arguments.get("question", ""), | |
| 956 | + tool_calls=[tool_call], | |
| 957 | + ) | |
| 958 | + self.agent.session.append(assistant_message) | |
| 959 | + summary.assistant_messages.append(assistant_message) | |
| 960 | + | |
| 961 | + await emit( | |
| 962 | + AgentEvent( | |
| 963 | + type="tool_call", | |
| 964 | + tool_name=tool_call.name, | |
| 965 | + tool_args=tool_call.arguments, | |
| 966 | + phase="clarify", | |
| 967 | + ) | |
| 968 | + ) | |
| 969 | + assert self.executor is not None | |
| 970 | + outcome = await self.executor.execute_tool_call( | |
| 971 | + tool_call, | |
| 972 | + on_confirmation=None, | |
| 973 | + on_user_question=on_user_question, | |
| 974 | + emit_confirmation=None, | |
| 975 | + source="clarify", | |
| 976 | + skip_duplicate_check=True, | |
| 977 | + record_action=False, | |
| 978 | + skip_confirmation=True, | |
| 979 | + ) | |
| 980 | + await emit( | |
| 981 | + AgentEvent( | |
| 982 | + type="tool_result", | |
| 983 | + content=outcome.event_content, | |
| 984 | + tool_name=tool_call.name, | |
| 985 | + is_error=outcome.is_error, | |
| 986 | + phase="clarify", | |
| 987 | + ) | |
| 988 | + ) | |
| 989 | + self.agent.session.append(outcome.message) | |
| 990 | + summary.tool_result_messages.append(outcome.message) | |
| 991 | + | |
| 992 | + question = str(tool_call.arguments.get("question", "")).strip() | |
| 993 | + answer = "" | |
| 994 | + if outcome.registry_result is not None: | |
| 995 | + answer = str(outcome.registry_result.metadata.get("answer", "")).strip() | |
| 996 | + | |
| 997 | + brief_prompt = ( | |
| 998 | + "Write a concise task brief in markdown using these exact sections:\n" | |
| 999 | + "## Task Statement\n" | |
| 1000 | + "## Desired Outcome\n" | |
| 1001 | + "## In Scope\n" | |
| 1002 | + "## Non Goals\n" | |
| 1003 | + "## Decision Boundaries\n" | |
| 1004 | + "## Constraints\n" | |
| 1005 | + "## Likely Touchpoints\n" | |
| 1006 | + "## Assumptions\n" | |
| 1007 | + "## Acceptance Criteria\n\n" | |
| 1008 | + "Use short bullet lists when helpful. Do not start implementing.\n\n" | |
| 1009 | + f"Task: {task}\n" | |
| 1010 | + f"Question: {question}\n" | |
| 1011 | + f"Answer: {answer or 'No answer provided.'}" | |
| 1012 | + ) | |
| 1013 | + brief_response = await self._complete_in_mode( | |
| 1014 | + prompt=brief_prompt, | |
| 1015 | + tools=None, | |
| 1016 | + max_tokens=900, | |
| 1017 | + temperature=0.1, | |
| 1018 | + ) | |
| 1019 | + brief = ( | |
| 1020 | + ClarifyBrief.from_markdown( | |
| 1021 | + brief_response.content, | |
| 1022 | + task_statement=task, | |
| 1023 | + question=question, | |
| 1024 | + answer=answer, | |
| 1025 | + ) | |
| 1026 | + if brief_response.content.strip() | |
| 1027 | + else ClarifyBrief.fallback( | |
| 1028 | + task_statement=task, | |
| 1029 | + question=question, | |
| 1030 | + answer=answer, | |
| 1031 | + ) | |
| 1032 | + ) | |
| 1033 | + brief_path = self.artifact_store.write_brief(task, brief) | |
| 1034 | + dod.clarify_brief = str(brief_path) | |
| 1035 | + dod.acceptance_criteria = list(dict.fromkeys(brief.acceptance_criteria)) | |
| 1036 | + self.dod_store.save(dod) | |
| 1037 | + await self._emit_artifact( | |
| 1038 | + emit=emit, | |
| 1039 | + kind="clarify_brief", | |
| 1040 | + path=brief_path, | |
| 1041 | + preview=( | |
| 1042 | + f"Clarify brief: {brief_path}\n" | |
| 1043 | + f"Outcome: {brief.desired_outcome[0]}" | |
| 1044 | + ), | |
| 1045 | + ) | |
| 1046 | + | |
| 1047 | + async def _run_plan_mode( | |
| 1048 | + self, | |
| 1049 | + *, | |
| 1050 | + task: str, | |
| 1051 | + dod: DefinitionOfDone, | |
| 1052 | + emit: EventSink, | |
| 1053 | + summary: TurnSummary, | |
| 1054 | + on_confirmation: ConfirmationHandler, | |
| 1055 | + on_user_question: UserQuestionHandler, | |
| 1056 | + ) -> None: | |
| 1057 | + prompt = ( | |
| 1058 | + "Produce two markdown planning artifacts separated by the exact line " | |
| 1059 | + f"`{VERIFICATION_SEPARATOR}`.\n\n" | |
| 1060 | + "Before the separator, write an Implementation Plan with these sections:\n" | |
| 1061 | + "## File Changes\n" | |
| 1062 | + "## Execution Order\n" | |
| 1063 | + "## Risks\n\n" | |
| 1064 | + "After the separator, write a Verification Plan with these sections:\n" | |
| 1065 | + "## Acceptance Criteria\n" | |
| 1066 | + "## Verification Commands\n" | |
| 1067 | + "## Notes\n\n" | |
| 1068 | + "Do not start writing code.\n\n" | |
| 1069 | + f"Task: {task}" | |
| 1070 | + ) | |
| 1071 | + response = await self._complete_in_mode( | |
| 1072 | + prompt=prompt, | |
| 1073 | + tools=None, | |
| 1074 | + max_tokens=1400, | |
| 1075 | + temperature=0.2, | |
| 1076 | + ) | |
| 1077 | + artifacts = ( | |
| 1078 | + PlanningArtifacts.from_model_output( | |
| 1079 | + response.content, | |
| 1080 | + task_statement=task, | |
| 1081 | + ) | |
| 1082 | + if response.content.strip() | |
| 1083 | + else PlanningArtifacts.fallback(task_statement=task) | |
| 1084 | + ) | |
| 1085 | + implementation_path, verification_path = self.artifact_store.write_plan( | |
| 1086 | + task, | |
| 1087 | + artifacts, | |
| 1088 | + ) | |
| 1089 | + dod.implementation_plan = str(implementation_path) | |
| 1090 | + dod.verification_plan = str(verification_path) | |
| 1091 | + dod.acceptance_criteria = list( | |
| 1092 | + dict.fromkeys(dod.acceptance_criteria + artifacts.acceptance_criteria) | |
| 1093 | + ) | |
| 1094 | + if artifacts.verification_commands: | |
| 1095 | + dod.verification_commands = artifacts.verification_commands | |
| 1096 | + self.dod_store.save(dod) | |
| 1097 | + await self._emit_artifact( | |
| 1098 | + emit=emit, | |
| 1099 | + kind="implementation_plan", | |
| 1100 | + path=implementation_path, | |
| 1101 | + preview=( | |
| 1102 | + f"Implementation plan: {implementation_path}\n" | |
| 1103 | + f"Steps: {len(artifacts.implementation_steps)}" | |
| 1104 | + ), | |
| 1105 | + ) | |
| 1106 | + await self._emit_artifact( | |
| 1107 | + emit=emit, | |
| 1108 | + kind="verification_plan", | |
| 1109 | + path=verification_path, | |
| 1110 | + preview=( | |
| 1111 | + f"Verification plan: {verification_path}\n" | |
| 1112 | + f"Commands: {len(artifacts.verification_commands)}" | |
| 1113 | + ), | |
| 1114 | + ) | |
| 1115 | + await self._seed_todos_from_plan( | |
| 1116 | + artifacts=artifacts, | |
| 1117 | + dod=dod, | |
| 1118 | + emit=emit, | |
| 1119 | + ) | |
| 1120 | + | |
| 1121 | + async def _seed_todos_from_plan( | |
| 1122 | + self, | |
| 1123 | + *, | |
| 1124 | + artifacts: PlanningArtifacts, | |
| 1125 | + dod: DefinitionOfDone, | |
| 1126 | + emit: EventSink, | |
| 1127 | + ) -> None: | |
| 1128 | + if not artifacts.implementation_steps: | |
| 1129 | + return | |
| 1130 | + | |
| 1131 | + todos = [ | |
| 1132 | + { | |
| 1133 | + "content": step, | |
| 1134 | + "active_form": f"Working on: {step}", | |
| 1135 | + "status": "pending", | |
| 1136 | + } | |
| 1137 | + for step in artifacts.implementation_steps[:8] | |
| 1138 | + ] | |
| 1139 | + tool_call = ToolCall( | |
| 1140 | + id="plan-todos-1", | |
| 1141 | + name="TodoWrite", | |
| 1142 | + arguments={"todos": todos}, | |
| 1143 | + ) | |
| 1144 | + await emit( | |
| 1145 | + AgentEvent( | |
| 1146 | + type="tool_call", | |
| 1147 | + tool_name=tool_call.name, | |
| 1148 | + tool_args=tool_call.arguments, | |
| 1149 | + phase="plan", | |
| 1150 | + ) | |
| 1151 | + ) | |
| 1152 | + assert self.executor is not None | |
| 1153 | + outcome = await self.executor.execute_tool_call( | |
| 1154 | + tool_call, | |
| 1155 | + on_confirmation=None, | |
| 1156 | + on_user_question=None, | |
| 1157 | + emit_confirmation=None, | |
| 1158 | + source="plan", | |
| 1159 | + skip_duplicate_check=True, | |
| 1160 | + record_action=False, | |
| 1161 | + skip_confirmation=True, | |
| 1162 | + ) | |
| 1163 | + await emit( | |
| 1164 | + AgentEvent( | |
| 1165 | + type="tool_result", | |
| 1166 | + content=outcome.event_content, | |
| 1167 | + tool_name=tool_call.name, | |
| 1168 | + is_error=outcome.is_error, | |
| 1169 | + phase="plan", | |
| 1170 | + ) | |
| 1171 | + ) | |
| 1172 | + if outcome.registry_result is not None: | |
| 1173 | + new_todos = outcome.registry_result.metadata.get("new_todos", []) | |
| 1174 | + if isinstance(new_todos, list): | |
| 1175 | + sync_todos_to_definition_of_done(dod, new_todos) | |
| 1176 | + self.dod_store.save(dod) | |
| 1177 | + | |
| 1178 | + @staticmethod | |
| 1179 | + def _artifact_exists(path_str: str | None) -> bool: | |
| 1180 | + return bool(path_str and Path(path_str).exists()) | |
| 1181 | + | |
| 1182 | + @staticmethod | |
| 1183 | + def _fallback_clarify_question(task: str, response_content: str) -> str: | |
| 1184 | + match = re.search(r"([A-Z][^?]+\?)", response_content) | |
| 1185 | + if match: | |
| 1186 | + return match.group(1).strip() | |
| 1187 | + return ( | |
| 1188 | + "What outcome matters most here, and what should stay out of scope?" | |
| 1189 | + if task.strip() | |
| 1190 | + else "What outcome matters most?" | |
| 1191 | + ) | |
| 1192 | + | |
| 733 | 1193 | async def _run_definition_of_done_gate( |
| 734 | 1194 | self, |
| 735 | 1195 | *, |
@@ -743,8 +1203,31 @@ class ConversationRuntime: | ||
| 743 | 1203 | dod.pending_items.remove(implementation_item) |
| 744 | 1204 | dod.completed_items.append(implementation_item) |
| 745 | 1205 | |
| 1206 | + tracked_pending_items = [ | |
| 1207 | + item | |
| 1208 | + for item in dod.pending_items | |
| 1209 | + if item != "Collect verification evidence" | |
| 1210 | + ] | |
| 1211 | + | |
| 746 | 1212 | mutating_paths = [path for path in dod.touched_files if path] |
| 747 | 1213 | requires_verification = bool(mutating_paths or dod.mutating_actions) |
| 1214 | + if tracked_pending_items and not requires_verification: | |
| 1215 | + pending_text = "\n".join(f"- {item}" for item in tracked_pending_items) | |
| 1216 | + self.dod_store.save(dod) | |
| 1217 | + await self._emit_dod_status(emit, dod) | |
| 1218 | + self.agent.session.append( | |
| 1219 | + Message( | |
| 1220 | + role=Role.USER, | |
| 1221 | + content=( | |
| 1222 | + "[PENDING WORK REMAINS]\n" | |
| 1223 | + "The tracked work items are not complete yet:\n" | |
| 1224 | + f"{pending_text}\n\n" | |
| 1225 | + "Continue the task, and update TodoWrite as you make progress." | |
| 1226 | + ), | |
| 1227 | + ) | |
| 1228 | + ) | |
| 1229 | + return CompletionGateResult(should_continue=True, final_response="") | |
| 1230 | + | |
| 748 | 1231 | if not requires_verification: |
| 749 | 1232 | dod.status = "done" |
| 750 | 1233 | dod.last_verification_result = "skipped" |
@@ -761,6 +1244,11 @@ class ConversationRuntime: | ||
| 761 | 1244 | if verify_item not in dod.pending_items and verify_item not in dod.completed_items: |
| 762 | 1245 | dod.pending_items.append(verify_item) |
| 763 | 1246 | |
| 1247 | + if not dod.verification_commands and dod.verification_plan and Path(dod.verification_plan).exists(): | |
| 1248 | + dod.verification_commands = extract_verification_commands_from_markdown( | |
| 1249 | + Path(dod.verification_plan).read_text() | |
| 1250 | + ) | |
| 1251 | + | |
| 764 | 1252 | if not dod.verification_commands: |
| 765 | 1253 | dod.verification_commands = derive_verification_commands( |
| 766 | 1254 | dod, |
@@ -768,6 +1256,13 @@ class ConversationRuntime: | ||
| 768 | 1256 | task_statement=dod.task_statement, |
| 769 | 1257 | ) |
| 770 | 1258 | |
| 1259 | + await self._set_workflow_mode( | |
| 1260 | + WorkflowMode.VERIFY, | |
| 1261 | + dod=dod, | |
| 1262 | + emit=emit, | |
| 1263 | + summary=summary, | |
| 1264 | + reason="definition-of-done gate requires verification", | |
| 1265 | + ) | |
| 771 | 1266 | verification_passed = await self._verify_definition_of_done( |
| 772 | 1267 | dod=dod, |
| 773 | 1268 | emit=emit, |
@@ -778,6 +1273,10 @@ class ConversationRuntime: | ||
| 778 | 1273 | dod.pending_items.remove(verify_item) |
| 779 | 1274 | if verify_item not in dod.completed_items: |
| 780 | 1275 | dod.completed_items.append(verify_item) |
| 1276 | + for pending in list(dod.pending_items): | |
| 1277 | + if pending not in dod.completed_items: | |
| 1278 | + dod.completed_items.append(pending) | |
| 1279 | + dod.pending_items = [] | |
| 781 | 1280 | dod.status = "done" |
| 782 | 1281 | dod.last_verification_result = "passed" |
| 783 | 1282 | dod.confidence = "high" |
@@ -817,6 +1316,13 @@ class ConversationRuntime: | ||
| 817 | 1316 | dod.confidence = "medium" |
| 818 | 1317 | self.dod_store.save(dod) |
| 819 | 1318 | await self._emit_dod_status(emit, dod) |
| 1319 | + await self._set_workflow_mode( | |
| 1320 | + WorkflowMode.EXECUTE, | |
| 1321 | + dod=dod, | |
| 1322 | + emit=emit, | |
| 1323 | + summary=summary, | |
| 1324 | + reason="verification failed; returning to execute for fixes", | |
| 1325 | + ) | |
| 820 | 1326 | failure_prompt = ( |
| 821 | 1327 | "[DEFINITION OF DONE CHECK FAILED]\n" |
| 822 | 1328 | f"Task: {dod.task_statement}\n" |
src/loader/runtime/dod.pymodified@@ -53,6 +53,11 @@ class DefinitionOfDone: | ||
| 53 | 53 | line_changes: int = 0 |
| 54 | 54 | storage_path: str | None = None |
| 55 | 55 | last_verification_result: str | None = None |
| 56 | + current_mode: str = "execute" | |
| 57 | + mode_history: list[str] = field(default_factory=list) | |
| 58 | + clarify_brief: str | None = None | |
| 59 | + implementation_plan: str | None = None | |
| 60 | + verification_plan: str | None = None | |
| 56 | 61 | |
| 57 | 62 | def to_dict(self) -> dict[str, Any]: |
| 58 | 63 | """Serialize the DoD state for persistence.""" |
@@ -83,6 +88,11 @@ class DefinitionOfDone: | ||
| 83 | 88 | line_changes=int(data.get("line_changes", 0)), |
| 84 | 89 | storage_path=data.get("storage_path"), |
| 85 | 90 | last_verification_result=data.get("last_verification_result"), |
| 91 | + current_mode=data.get("current_mode", "execute"), | |
| 92 | + mode_history=list(data.get("mode_history", [])), | |
| 93 | + clarify_brief=data.get("clarify_brief"), | |
| 94 | + implementation_plan=data.get("implementation_plan"), | |
| 95 | + verification_plan=data.get("verification_plan"), | |
| 86 | 96 | ) |
| 87 | 97 | |
| 88 | 98 | |
src/loader/runtime/events.pymodified@@ -38,6 +38,9 @@ class AgentEvent: | ||
| 38 | 38 | dod_status: str | None = None |
| 39 | 39 | pending_items_count: int | None = None |
| 40 | 40 | last_verification_result: str | None = None |
| 41 | + workflow_mode: str | None = None | |
| 42 | + artifact_kind: str | None = None | |
| 43 | + artifact_path: str | None = None | |
| 41 | 44 | |
| 42 | 45 | decomposition: TaskDecomposition | None = None |
| 43 | 46 | subtask: Subtask | None = None |
@@ -63,3 +66,4 @@ class TurnSummary: | ||
| 63 | 66 | usage: dict[str, int] = field(default_factory=dict) |
| 64 | 67 | trace: list[RuntimeTraceEvent] = field(default_factory=list) |
| 65 | 68 | definition_of_done: DefinitionOfDone | None = None |
| 69 | + workflow_mode: str | None = None | |
src/loader/runtime/workflow.pyadded@@ -0,0 +1,637 @@ | ||
| 1 | +"""Workflow routing and artifact persistence for Loader runtime modes.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import re | |
| 6 | +from dataclasses import dataclass, field | |
| 7 | +from datetime import UTC, datetime | |
| 8 | +from enum import StrEnum | |
| 9 | +from pathlib import Path | |
| 10 | + | |
| 11 | +from .dod import slugify | |
| 12 | + | |
| 13 | +VERIFICATION_SEPARATOR = "<<<VERIFICATION>>>" | |
| 14 | + | |
| 15 | +_SECTION_ALIASES = { | |
| 16 | + "task statement": "task_statement", | |
| 17 | + "desired outcome": "desired_outcome", | |
| 18 | + "in scope": "in_scope", | |
| 19 | + "out of scope": "non_goals", | |
| 20 | + "out of scope non goals": "non_goals", | |
| 21 | + "out of scope or non goals": "non_goals", | |
| 22 | + "non goals": "non_goals", | |
| 23 | + "non-goals": "non_goals", | |
| 24 | + "decision boundaries": "decision_boundaries", | |
| 25 | + "constraints": "constraints", | |
| 26 | + "likely touchpoints": "likely_touchpoints", | |
| 27 | + "assumptions": "assumptions", | |
| 28 | + "acceptance criteria": "acceptance_criteria", | |
| 29 | + "file changes": "file_changes", | |
| 30 | + "execution order": "execution_order", | |
| 31 | + "risks": "risks", | |
| 32 | + "verification commands": "verification_commands", | |
| 33 | + "commands": "verification_commands", | |
| 34 | + "notes": "notes", | |
| 35 | +} | |
| 36 | + | |
| 37 | + | |
| 38 | +class WorkflowMode(StrEnum): | |
| 39 | + """High-level runtime modes for one Loader task turn.""" | |
| 40 | + | |
| 41 | + CLARIFY = "clarify" | |
| 42 | + PLAN = "plan" | |
| 43 | + EXECUTE = "execute" | |
| 44 | + VERIFY = "verify" | |
| 45 | + | |
| 46 | + @classmethod | |
| 47 | + def from_str(cls, value: str | None) -> WorkflowMode | None: | |
| 48 | + if value is None: | |
| 49 | + return None | |
| 50 | + normalized = value.strip().lower() | |
| 51 | + for mode in cls: | |
| 52 | + if mode.value == normalized: | |
| 53 | + return mode | |
| 54 | + raise ValueError(f"Unknown workflow mode: {value}") | |
| 55 | + | |
| 56 | + | |
| 57 | +@dataclass(slots=True) | |
| 58 | +class ModeDecision: | |
| 59 | + """Router output for the entry point of a task turn.""" | |
| 60 | + | |
| 61 | + mode: WorkflowMode | |
| 62 | + reason: str | |
| 63 | + ambiguity_score: float = 0.0 | |
| 64 | + complexity_score: float = 0.0 | |
| 65 | + | |
| 66 | + | |
| 67 | +@dataclass(slots=True) | |
| 68 | +class ClarifyBrief: | |
| 69 | + """Execution-ready brief created from one clarify round.""" | |
| 70 | + | |
| 71 | + task_statement: str | |
| 72 | + desired_outcome: list[str] = field(default_factory=list) | |
| 73 | + in_scope: list[str] = field(default_factory=list) | |
| 74 | + non_goals: list[str] = field(default_factory=list) | |
| 75 | + decision_boundaries: list[str] = field(default_factory=list) | |
| 76 | + constraints: list[str] = field(default_factory=list) | |
| 77 | + likely_touchpoints: list[str] = field(default_factory=list) | |
| 78 | + assumptions: list[str] = field(default_factory=list) | |
| 79 | + acceptance_criteria: list[str] = field(default_factory=list) | |
| 80 | + question: str | None = None | |
| 81 | + answer: str | None = None | |
| 82 | + | |
| 83 | + @classmethod | |
| 84 | + def from_markdown( | |
| 85 | + cls, | |
| 86 | + markdown: str, | |
| 87 | + *, | |
| 88 | + task_statement: str, | |
| 89 | + question: str | None = None, | |
| 90 | + answer: str | None = None, | |
| 91 | + ) -> ClarifyBrief: | |
| 92 | + sections = _parse_markdown_sections(markdown) | |
| 93 | + brief = cls( | |
| 94 | + task_statement=_first_item(sections.get("task_statement")) or task_statement, | |
| 95 | + desired_outcome=sections.get("desired_outcome", []), | |
| 96 | + in_scope=sections.get("in_scope", []), | |
| 97 | + non_goals=sections.get("non_goals", []), | |
| 98 | + decision_boundaries=sections.get("decision_boundaries", []), | |
| 99 | + constraints=sections.get("constraints", []), | |
| 100 | + likely_touchpoints=sections.get("likely_touchpoints", []), | |
| 101 | + assumptions=sections.get("assumptions", []), | |
| 102 | + acceptance_criteria=sections.get("acceptance_criteria", []), | |
| 103 | + question=question, | |
| 104 | + answer=answer, | |
| 105 | + ) | |
| 106 | + brief.fill_defaults() | |
| 107 | + return brief | |
| 108 | + | |
| 109 | + @classmethod | |
| 110 | + def fallback( | |
| 111 | + cls, | |
| 112 | + *, | |
| 113 | + task_statement: str, | |
| 114 | + question: str, | |
| 115 | + answer: str, | |
| 116 | + ) -> ClarifyBrief: | |
| 117 | + brief = cls( | |
| 118 | + task_statement=task_statement, | |
| 119 | + desired_outcome=[answer or "Clarify the intended outcome before implementation."], | |
| 120 | + in_scope=[task_statement], | |
| 121 | + non_goals=["Anything not confirmed in the clarification answer."], | |
| 122 | + decision_boundaries=["Escalate if the clarified scope changes materially."], | |
| 123 | + constraints=["Honor the clarified answer and existing repository conventions."], | |
| 124 | + likely_touchpoints=["Determine the concrete files during execution."], | |
| 125 | + assumptions=[f"Clarification answer: {answer or 'No answer provided.'}"], | |
| 126 | + question=question, | |
| 127 | + answer=answer, | |
| 128 | + ) | |
| 129 | + brief.fill_defaults() | |
| 130 | + return brief | |
| 131 | + | |
| 132 | + def fill_defaults(self) -> None: | |
| 133 | + if not self.desired_outcome: | |
| 134 | + self.desired_outcome = [self.task_statement] | |
| 135 | + if not self.in_scope: | |
| 136 | + self.in_scope = [self.task_statement] | |
| 137 | + if not self.non_goals: | |
| 138 | + self.non_goals = ["Do not expand beyond the clarified task statement."] | |
| 139 | + if not self.decision_boundaries: | |
| 140 | + self.decision_boundaries = [ | |
| 141 | + "Escalate for destructive or preference-dependent changes.", | |
| 142 | + ] | |
| 143 | + if not self.constraints: | |
| 144 | + self.constraints = ["Preserve the existing codebase conventions and tests."] | |
| 145 | + if not self.likely_touchpoints: | |
| 146 | + self.likely_touchpoints = ["Identify exact files during planning or execution."] | |
| 147 | + if not self.assumptions: | |
| 148 | + self.assumptions = ["Unspecified details stay unchanged unless evidence says otherwise."] | |
| 149 | + if not self.acceptance_criteria: | |
| 150 | + self.acceptance_criteria = list( | |
| 151 | + dict.fromkeys(self.desired_outcome + self.in_scope[:2]) | |
| 152 | + ) | |
| 153 | + | |
| 154 | + def to_markdown(self) -> str: | |
| 155 | + lines = [ | |
| 156 | + "# Task Brief", | |
| 157 | + "", | |
| 158 | + f"Generated: {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%SZ')}", | |
| 159 | + "", | |
| 160 | + "## Task Statement", | |
| 161 | + self.task_statement, | |
| 162 | + "", | |
| 163 | + ] | |
| 164 | + lines.extend(_render_section("Desired Outcome", self.desired_outcome)) | |
| 165 | + lines.extend(_render_section("In Scope", self.in_scope)) | |
| 166 | + lines.extend(_render_section("Non Goals", self.non_goals)) | |
| 167 | + lines.extend(_render_section("Decision Boundaries", self.decision_boundaries)) | |
| 168 | + lines.extend(_render_section("Constraints", self.constraints)) | |
| 169 | + lines.extend(_render_section("Likely Touchpoints", self.likely_touchpoints)) | |
| 170 | + lines.extend(_render_section("Assumptions", self.assumptions)) | |
| 171 | + lines.extend(_render_section("Acceptance Criteria", self.acceptance_criteria)) | |
| 172 | + if self.question: | |
| 173 | + lines.extend(_render_section("Clarify Question", [self.question])) | |
| 174 | + if self.answer: | |
| 175 | + lines.extend(_render_section("Clarify Answer", [self.answer])) | |
| 176 | + return "\n".join(lines).rstrip() + "\n" | |
| 177 | + | |
| 178 | + | |
| 179 | +@dataclass(slots=True) | |
| 180 | +class PlanningArtifacts: | |
| 181 | + """Persistent planning artifacts created before execution.""" | |
| 182 | + | |
| 183 | + implementation_markdown: str | |
| 184 | + verification_markdown: str | |
| 185 | + verification_commands: list[str] | |
| 186 | + acceptance_criteria: list[str] | |
| 187 | + implementation_steps: list[str] | |
| 188 | + | |
| 189 | + @classmethod | |
| 190 | + def from_model_output( | |
| 191 | + cls, | |
| 192 | + model_output: str, | |
| 193 | + *, | |
| 194 | + task_statement: str, | |
| 195 | + ) -> PlanningArtifacts: | |
| 196 | + implementation_markdown, verification_markdown = _split_plan_output(model_output) | |
| 197 | + implementation_sections = _parse_markdown_sections(implementation_markdown) | |
| 198 | + verification_sections = _parse_markdown_sections(verification_markdown) | |
| 199 | + | |
| 200 | + implementation_steps = ( | |
| 201 | + implementation_sections.get("execution_order", []) | |
| 202 | + or implementation_sections.get("file_changes", []) | |
| 203 | + ) | |
| 204 | + if not implementation_steps: | |
| 205 | + implementation_steps = [task_statement] | |
| 206 | + | |
| 207 | + verification_commands = _extract_commands( | |
| 208 | + verification_sections.get("verification_commands", []) | |
| 209 | + ) | |
| 210 | + acceptance_criteria = ( | |
| 211 | + verification_sections.get("acceptance_criteria", []) | |
| 212 | + or implementation_sections.get("acceptance_criteria", []) | |
| 213 | + ) | |
| 214 | + if not acceptance_criteria: | |
| 215 | + acceptance_criteria = [task_statement] | |
| 216 | + | |
| 217 | + return cls( | |
| 218 | + implementation_markdown=_ensure_heading( | |
| 219 | + implementation_markdown, | |
| 220 | + "# Implementation Plan", | |
| 221 | + ), | |
| 222 | + verification_markdown=_ensure_heading( | |
| 223 | + verification_markdown, | |
| 224 | + "# Verification Plan", | |
| 225 | + ), | |
| 226 | + verification_commands=verification_commands, | |
| 227 | + acceptance_criteria=acceptance_criteria, | |
| 228 | + implementation_steps=implementation_steps, | |
| 229 | + ) | |
| 230 | + | |
| 231 | + @classmethod | |
| 232 | + def fallback( | |
| 233 | + cls, | |
| 234 | + *, | |
| 235 | + task_statement: str, | |
| 236 | + ) -> PlanningArtifacts: | |
| 237 | + implementation_markdown = "\n".join( | |
| 238 | + [ | |
| 239 | + "# Implementation Plan", | |
| 240 | + "", | |
| 241 | + "## File Changes", | |
| 242 | + f"- Determine concrete files needed for: {task_statement}", | |
| 243 | + "", | |
| 244 | + "## Execution Order", | |
| 245 | + f"1. Inspect the codebase areas relevant to: {task_statement}", | |
| 246 | + "2. Apply the minimum required changes.", | |
| 247 | + "3. Re-run the most relevant verification commands.", | |
| 248 | + "", | |
| 249 | + "## Risks", | |
| 250 | + "- Unknown repository conventions may require one discovery pass first.", | |
| 251 | + "", | |
| 252 | + ] | |
| 253 | + ) | |
| 254 | + verification_markdown = "\n".join( | |
| 255 | + [ | |
| 256 | + "# Verification Plan", | |
| 257 | + "", | |
| 258 | + "## Acceptance Criteria", | |
| 259 | + f"- {task_statement}", | |
| 260 | + "", | |
| 261 | + "## Verification Commands", | |
| 262 | + "- echo \"add verification command\"", | |
| 263 | + "", | |
| 264 | + "## Notes", | |
| 265 | + "- Replace the placeholder verification command with a project-specific check.", | |
| 266 | + "", | |
| 267 | + ] | |
| 268 | + ) | |
| 269 | + return cls( | |
| 270 | + implementation_markdown=implementation_markdown, | |
| 271 | + verification_markdown=verification_markdown, | |
| 272 | + verification_commands=["echo \"add verification command\""], | |
| 273 | + acceptance_criteria=[task_statement], | |
| 274 | + implementation_steps=[ | |
| 275 | + f"Inspect the codebase areas relevant to: {task_statement}", | |
| 276 | + "Apply the minimum required changes.", | |
| 277 | + "Re-run the most relevant verification commands.", | |
| 278 | + ], | |
| 279 | + ) | |
| 280 | + | |
| 281 | + | |
| 282 | +class WorkflowArtifactStore: | |
| 283 | + """Persist briefs and plans under `.loader/`.""" | |
| 284 | + | |
| 285 | + def __init__(self, project_root: Path) -> None: | |
| 286 | + self.project_root = project_root | |
| 287 | + self.loader_root = project_root / ".loader" | |
| 288 | + self.briefs_root = self.loader_root / "briefs" | |
| 289 | + self.plans_root = self.loader_root / "plans" | |
| 290 | + | |
| 291 | + def write_brief(self, task_statement: str, brief: ClarifyBrief) -> Path: | |
| 292 | + path = self.briefs_root / f"{_timestamp()}-{slugify(task_statement)}.md" | |
| 293 | + path.parent.mkdir(parents=True, exist_ok=True) | |
| 294 | + path.write_text(brief.to_markdown()) | |
| 295 | + return path | |
| 296 | + | |
| 297 | + def write_plan( | |
| 298 | + self, | |
| 299 | + task_statement: str, | |
| 300 | + artifacts: PlanningArtifacts, | |
| 301 | + ) -> tuple[Path, Path]: | |
| 302 | + plan_root = self.plans_root / f"{_timestamp()}-{slugify(task_statement)}" | |
| 303 | + plan_root.mkdir(parents=True, exist_ok=True) | |
| 304 | + implementation_path = plan_root / "implementation.md" | |
| 305 | + verification_path = plan_root / "verification.md" | |
| 306 | + implementation_path.write_text(artifacts.implementation_markdown.rstrip() + "\n") | |
| 307 | + verification_path.write_text(artifacts.verification_markdown.rstrip() + "\n") | |
| 308 | + return implementation_path, verification_path | |
| 309 | + | |
| 310 | + | |
| 311 | +class ModeRouter: | |
| 312 | + """Simple heuristic router for clarify/plan/execute entry modes.""" | |
| 313 | + | |
| 314 | + clarify_threshold = 0.55 | |
| 315 | + plan_threshold = 0.45 | |
| 316 | + | |
| 317 | + def route( | |
| 318 | + self, | |
| 319 | + task: str, | |
| 320 | + *, | |
| 321 | + requested_mode: WorkflowMode | None = None, | |
| 322 | + has_brief: bool = False, | |
| 323 | + has_plan: bool = False, | |
| 324 | + allow_clarify: bool = True, | |
| 325 | + ) -> ModeDecision: | |
| 326 | + if requested_mode is not None: | |
| 327 | + return ModeDecision( | |
| 328 | + mode=requested_mode, | |
| 329 | + reason=f"explicit {requested_mode.value} request", | |
| 330 | + ) | |
| 331 | + | |
| 332 | + if has_plan: | |
| 333 | + return ModeDecision( | |
| 334 | + mode=WorkflowMode.EXECUTE, | |
| 335 | + reason="reusing existing plan artifacts", | |
| 336 | + ) | |
| 337 | + | |
| 338 | + ambiguity = self._ambiguity_score(task) | |
| 339 | + complexity = self._complexity_score(task) | |
| 340 | + | |
| 341 | + if allow_clarify and not has_brief and ambiguity >= self.clarify_threshold: | |
| 342 | + return ModeDecision( | |
| 343 | + mode=WorkflowMode.CLARIFY, | |
| 344 | + reason="prompt is broad or missing boundaries", | |
| 345 | + ambiguity_score=ambiguity, | |
| 346 | + complexity_score=complexity, | |
| 347 | + ) | |
| 348 | + | |
| 349 | + if complexity >= self.plan_threshold: | |
| 350 | + return ModeDecision( | |
| 351 | + mode=WorkflowMode.PLAN, | |
| 352 | + reason="task looks complex enough to benefit from a persisted plan", | |
| 353 | + ambiguity_score=ambiguity, | |
| 354 | + complexity_score=complexity, | |
| 355 | + ) | |
| 356 | + | |
| 357 | + return ModeDecision( | |
| 358 | + mode=WorkflowMode.EXECUTE, | |
| 359 | + reason="task appears concrete enough for direct execution", | |
| 360 | + ambiguity_score=ambiguity, | |
| 361 | + complexity_score=complexity, | |
| 362 | + ) | |
| 363 | + | |
| 364 | + def _ambiguity_score(self, task: str) -> float: | |
| 365 | + lowered = task.lower() | |
| 366 | + words = re.findall(r"\w+", lowered) | |
| 367 | + score = 0.0 | |
| 368 | + | |
| 369 | + if ( | |
| 370 | + "--clarify" in lowered | |
| 371 | + or "don't assume" in lowered | |
| 372 | + or "do not assume" in lowered | |
| 373 | + or "not sure" in lowered | |
| 374 | + or "figure out" in lowered | |
| 375 | + or "interview me" in lowered | |
| 376 | + or "ask me" in lowered | |
| 377 | + or lowered.startswith("clarify ") | |
| 378 | + ): | |
| 379 | + score += 0.65 | |
| 380 | + | |
| 381 | + if any( | |
| 382 | + phrase in lowered | |
| 383 | + for phrase in ( | |
| 384 | + "something", | |
| 385 | + "somehow", | |
| 386 | + "better", | |
| 387 | + "improve", | |
| 388 | + "fix this", | |
| 389 | + "make it", | |
| 390 | + "more like", | |
| 391 | + "feels more like", | |
| 392 | + ) | |
| 393 | + ): | |
| 394 | + score += 0.2 | |
| 395 | + | |
| 396 | + if not _has_concrete_anchor(task): | |
| 397 | + score += 0.2 | |
| 398 | + | |
| 399 | + if len(words) <= 12 and any( | |
| 400 | + verb in lowered | |
| 401 | + for verb in ("build", "add", "improve", "refactor", "implement") | |
| 402 | + ): | |
| 403 | + score += 0.15 | |
| 404 | + | |
| 405 | + return min(score, 1.0) | |
| 406 | + | |
| 407 | + def _complexity_score(self, task: str) -> float: | |
| 408 | + lowered = task.lower() | |
| 409 | + words = re.findall(r"\w+", lowered) | |
| 410 | + score = 0.0 | |
| 411 | + | |
| 412 | + if len(words) >= 18: | |
| 413 | + score += 0.2 | |
| 414 | + if len(words) >= 30: | |
| 415 | + score += 0.15 | |
| 416 | + | |
| 417 | + if any( | |
| 418 | + phrase in lowered | |
| 419 | + for phrase in ( | |
| 420 | + "refactor", | |
| 421 | + "architecture", | |
| 422 | + "migrate", | |
| 423 | + "persistent", | |
| 424 | + "workflow", | |
| 425 | + "deep dive", | |
| 426 | + "report", | |
| 427 | + "implementation plan", | |
| 428 | + "verification plan", | |
| 429 | + ) | |
| 430 | + ): | |
| 431 | + score += 0.3 | |
| 432 | + | |
| 433 | + if lowered.count(" and ") >= 2 or lowered.count(",") >= 2: | |
| 434 | + score += 0.15 | |
| 435 | + | |
| 436 | + if _has_concrete_anchor(task): | |
| 437 | + score += 0.1 | |
| 438 | + | |
| 439 | + return min(score, 1.0) | |
| 440 | + | |
| 441 | + | |
| 442 | +def load_brief(path: Path) -> ClarifyBrief: | |
| 443 | + """Load a clarify brief from disk.""" | |
| 444 | + | |
| 445 | + return ClarifyBrief.from_markdown(path.read_text(), task_statement=path.stem) | |
| 446 | + | |
| 447 | + | |
| 448 | +def load_planning_artifacts( | |
| 449 | + implementation_path: Path, | |
| 450 | + verification_path: Path, | |
| 451 | + *, | |
| 452 | + task_statement: str, | |
| 453 | +) -> PlanningArtifacts: | |
| 454 | + """Load persisted planning artifacts from disk.""" | |
| 455 | + | |
| 456 | + combined = ( | |
| 457 | + implementation_path.read_text().rstrip() | |
| 458 | + + "\n\n" | |
| 459 | + + VERIFICATION_SEPARATOR | |
| 460 | + + "\n\n" | |
| 461 | + + verification_path.read_text().rstrip() | |
| 462 | + ) | |
| 463 | + return PlanningArtifacts.from_model_output(combined, task_statement=task_statement) | |
| 464 | + | |
| 465 | + | |
| 466 | +def sync_todos_to_definition_of_done( | |
| 467 | + dod, | |
| 468 | + todos: list[dict[str, str]], | |
| 469 | +) -> None: | |
| 470 | + """Reflect todo state into DoD pending/completed items.""" | |
| 471 | + | |
| 472 | + special_pending = [ | |
| 473 | + item for item in dod.pending_items if item in {"Complete the requested work", "Collect verification evidence"} | |
| 474 | + ] | |
| 475 | + special_completed = [ | |
| 476 | + item for item in dod.completed_items if item in {"Complete the requested work", "Collect verification evidence"} | |
| 477 | + ] | |
| 478 | + | |
| 479 | + pending: list[str] = [] | |
| 480 | + completed: list[str] = [] | |
| 481 | + for item in todos: | |
| 482 | + status = str(item.get("status", "")).strip().lower() | |
| 483 | + label = str( | |
| 484 | + item.get("active_form") if status == "in_progress" else item.get("content", "") | |
| 485 | + ).strip() | |
| 486 | + if not label: | |
| 487 | + continue | |
| 488 | + if status == "completed": | |
| 489 | + completed.append(str(item.get("content", label)).strip()) | |
| 490 | + else: | |
| 491 | + pending.append(label) | |
| 492 | + | |
| 493 | + dod.pending_items = list(dict.fromkeys(pending + special_pending)) | |
| 494 | + dod.completed_items = list(dict.fromkeys(completed + special_completed)) | |
| 495 | + | |
| 496 | + | |
| 497 | +def extract_verification_commands_from_markdown(markdown: str) -> list[str]: | |
| 498 | + """Extract verification commands from a verification-plan markdown document.""" | |
| 499 | + | |
| 500 | + sections = _parse_markdown_sections(markdown) | |
| 501 | + return _extract_commands(sections.get("verification_commands", [])) | |
| 502 | + | |
| 503 | + | |
| 504 | +def build_execute_bridge( | |
| 505 | + brief_path: Path | None, | |
| 506 | + implementation_path: Path | None, | |
| 507 | + verification_path: Path | None, | |
| 508 | +) -> str | None: | |
| 509 | + """Build a compact execution bridge message from persisted artifacts.""" | |
| 510 | + | |
| 511 | + parts: list[str] = [] | |
| 512 | + if brief_path and brief_path.exists(): | |
| 513 | + parts.append( | |
| 514 | + "Use the clarify brief below as the requirements source of truth.\n\n" | |
| 515 | + + brief_path.read_text().strip() | |
| 516 | + ) | |
| 517 | + if implementation_path and implementation_path.exists(): | |
| 518 | + parts.append( | |
| 519 | + "Use the implementation plan below to sequence the work.\n\n" | |
| 520 | + + implementation_path.read_text().strip() | |
| 521 | + ) | |
| 522 | + if verification_path and verification_path.exists(): | |
| 523 | + parts.append( | |
| 524 | + "Use the verification plan below to determine done-ness.\n\n" | |
| 525 | + + verification_path.read_text().strip() | |
| 526 | + ) | |
| 527 | + if not parts: | |
| 528 | + return None | |
| 529 | + return "\n\n".join(parts) | |
| 530 | + | |
| 531 | + | |
| 532 | +def _split_plan_output(model_output: str) -> tuple[str, str]: | |
| 533 | + if VERIFICATION_SEPARATOR in model_output: | |
| 534 | + implementation, verification = model_output.split(VERIFICATION_SEPARATOR, maxsplit=1) | |
| 535 | + return implementation.strip(), verification.strip() | |
| 536 | + return model_output.strip(), "" | |
| 537 | + | |
| 538 | + | |
| 539 | +def _ensure_heading(markdown: str, heading: str) -> str: | |
| 540 | + stripped = markdown.strip() | |
| 541 | + if not stripped: | |
| 542 | + return heading + "\n" | |
| 543 | + if stripped.startswith("#"): | |
| 544 | + return stripped + "\n" | |
| 545 | + return f"{heading}\n\n{stripped}\n" | |
| 546 | + | |
| 547 | + | |
| 548 | +def _timestamp() -> str: | |
| 549 | + return datetime.now(UTC).strftime("%Y%m%dT%H%M%SZ") | |
| 550 | + | |
| 551 | + | |
| 552 | +def _normalize_heading(text: str) -> str: | |
| 553 | + cleaned = re.sub(r"[^a-z0-9]+", " ", text.lower()).strip() | |
| 554 | + return _SECTION_ALIASES.get(cleaned, cleaned.replace(" ", "_")) | |
| 555 | + | |
| 556 | + | |
| 557 | +def _parse_markdown_sections(markdown: str) -> dict[str, list[str]]: | |
| 558 | + sections: dict[str, list[str]] = {} | |
| 559 | + current_key: str | None = None | |
| 560 | + for line in markdown.splitlines(): | |
| 561 | + heading = re.match(r"^##+\s+(.+?)\s*$", line.strip()) | |
| 562 | + if heading: | |
| 563 | + current_key = _normalize_heading(heading.group(1)) | |
| 564 | + sections.setdefault(current_key, []) | |
| 565 | + continue | |
| 566 | + if current_key is None: | |
| 567 | + continue | |
| 568 | + sections[current_key].append(line.rstrip()) | |
| 569 | + return { | |
| 570 | + key: _extract_items(lines) | |
| 571 | + for key, lines in sections.items() | |
| 572 | + } | |
| 573 | + | |
| 574 | + | |
| 575 | +def _extract_items(lines: list[str]) -> list[str]: | |
| 576 | + items: list[str] = [] | |
| 577 | + paragraph_buffer: list[str] = [] | |
| 578 | + for line in lines: | |
| 579 | + stripped = line.strip() | |
| 580 | + if not stripped: | |
| 581 | + if paragraph_buffer: | |
| 582 | + items.append(" ".join(paragraph_buffer).strip()) | |
| 583 | + paragraph_buffer.clear() | |
| 584 | + continue | |
| 585 | + | |
| 586 | + bullet = re.match(r"^(?:[-*]|\d+\.)\s+(.+)$", stripped) | |
| 587 | + if bullet: | |
| 588 | + if paragraph_buffer: | |
| 589 | + items.append(" ".join(paragraph_buffer).strip()) | |
| 590 | + paragraph_buffer.clear() | |
| 591 | + items.append(bullet.group(1).strip()) | |
| 592 | + continue | |
| 593 | + paragraph_buffer.append(stripped) | |
| 594 | + if paragraph_buffer: | |
| 595 | + items.append(" ".join(paragraph_buffer).strip()) | |
| 596 | + return [item for item in items if item] | |
| 597 | + | |
| 598 | + | |
| 599 | +def _render_section(title: str, items: list[str]) -> list[str]: | |
| 600 | + lines = [f"## {title}"] | |
| 601 | + if items: | |
| 602 | + lines.extend(f"- {item}" for item in items) | |
| 603 | + else: | |
| 604 | + lines.append("- None recorded.") | |
| 605 | + lines.append("") | |
| 606 | + return lines | |
| 607 | + | |
| 608 | + | |
| 609 | +def _first_item(items: list[str] | None) -> str | None: | |
| 610 | + if not items: | |
| 611 | + return None | |
| 612 | + return items[0] | |
| 613 | + | |
| 614 | + | |
| 615 | +def _extract_commands(items: list[str]) -> list[str]: | |
| 616 | + commands: list[str] = [] | |
| 617 | + for item in items: | |
| 618 | + match = re.match(r"^`(.+)`$", item) | |
| 619 | + commands.append((match.group(1) if match else item).strip()) | |
| 620 | + return [command for command in commands if command] | |
| 621 | + | |
| 622 | + | |
| 623 | +def _has_concrete_anchor(task: str) -> bool: | |
| 624 | + return any( | |
| 625 | + re.search(pattern, task) | |
| 626 | + for pattern in ( | |
| 627 | + r"[./][\w./-]+", # file path | |
| 628 | + r"#\d+", # issue/pr number | |
| 629 | + r"\b[a-z]+[A-Z][A-Za-z0-9_]+\b", # camelCase | |
| 630 | + r"\b[A-Z][a-z0-9]+[A-Z][A-Za-z0-9_]+\b", # PascalCase symbol | |
| 631 | + r"\b[a-z0-9]+_[a-z0-9_]+\b", # snake_case | |
| 632 | + r"```", # code block | |
| 633 | + r"\bpytest\b|\bnpm test\b|\bcargo test\b|\bmypy\b|\bruff\b", | |
| 634 | + r"\bacceptance criteria\b", | |
| 635 | + r"\bTypeError\b|\bAssertionError\b|\bTraceback\b", | |
| 636 | + ) | |
| 637 | + ) | |
tests/test_workflow.pyadded@@ -0,0 +1,177 @@ | ||
| 1 | +"""Tests for Sprint 04 workflow routing and artifact persistence.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +from pathlib import Path | |
| 6 | + | |
| 7 | +from loader.runtime.dod import DefinitionOfDoneStore, create_definition_of_done | |
| 8 | +from loader.runtime.workflow import ( | |
| 9 | + ClarifyBrief, | |
| 10 | + ModeRouter, | |
| 11 | + PlanningArtifacts, | |
| 12 | + WorkflowArtifactStore, | |
| 13 | + WorkflowMode, | |
| 14 | + build_execute_bridge, | |
| 15 | + extract_verification_commands_from_markdown, | |
| 16 | + sync_todos_to_definition_of_done, | |
| 17 | +) | |
| 18 | + | |
| 19 | + | |
| 20 | +def test_mode_router_routes_ambiguous_prompt_to_clarify() -> None: | |
| 21 | + router = ModeRouter() | |
| 22 | + | |
| 23 | + decision = router.route("Improve Loader so it feels more like claw-code.") | |
| 24 | + | |
| 25 | + assert decision.mode == WorkflowMode.CLARIFY | |
| 26 | + assert decision.ambiguity_score >= router.clarify_threshold | |
| 27 | + | |
| 28 | + | |
| 29 | +def test_mode_router_routes_complex_prompt_to_plan() -> None: | |
| 30 | + router = ModeRouter() | |
| 31 | + | |
| 32 | + decision = router.route( | |
| 33 | + "Implement a persistent workflow mode router with clarify artifacts, " | |
| 34 | + "planning artifacts, and verification-plan wiring in the runtime." | |
| 35 | + ) | |
| 36 | + | |
| 37 | + assert decision.mode == WorkflowMode.PLAN | |
| 38 | + assert decision.complexity_score >= router.plan_threshold | |
| 39 | + | |
| 40 | + | |
| 41 | +def test_mode_router_routes_simple_prompt_to_execute() -> None: | |
| 42 | + router = ModeRouter() | |
| 43 | + | |
| 44 | + decision = router.route("Read pyproject.toml and tell me the package name.") | |
| 45 | + | |
| 46 | + assert decision.mode == WorkflowMode.EXECUTE | |
| 47 | + | |
| 48 | + | |
| 49 | +def test_clarify_brief_round_trips_and_seeds_acceptance_criteria() -> None: | |
| 50 | + brief = ClarifyBrief.fallback( | |
| 51 | + task_statement="Clarify the authentication change.", | |
| 52 | + question="What outcome matters most?", | |
| 53 | + answer="Add login without touching the signup flow.", | |
| 54 | + ) | |
| 55 | + | |
| 56 | + loaded = ClarifyBrief.from_markdown( | |
| 57 | + brief.to_markdown(), | |
| 58 | + task_statement=brief.task_statement, | |
| 59 | + question=brief.question, | |
| 60 | + answer=brief.answer, | |
| 61 | + ) | |
| 62 | + | |
| 63 | + assert loaded.task_statement == brief.task_statement | |
| 64 | + assert "Add login" in loaded.acceptance_criteria[0] | |
| 65 | + assert loaded.non_goals | |
| 66 | + | |
| 67 | + | |
| 68 | +def test_planning_artifacts_round_trip_and_extract_commands() -> None: | |
| 69 | + artifacts = PlanningArtifacts.from_model_output( | |
| 70 | + "\n".join( | |
| 71 | + [ | |
| 72 | + "# Implementation Plan", | |
| 73 | + "", | |
| 74 | + "## Execution Order", | |
| 75 | + "1. Inspect auth files.", | |
| 76 | + "2. Implement the change.", | |
| 77 | + "", | |
| 78 | + "## Risks", | |
| 79 | + "- Regression in signup.", | |
| 80 | + "", | |
| 81 | + "<<<VERIFICATION>>>", | |
| 82 | + "", | |
| 83 | + "# Verification Plan", | |
| 84 | + "", | |
| 85 | + "## Acceptance Criteria", | |
| 86 | + "- Login works without changing signup.", | |
| 87 | + "", | |
| 88 | + "## Verification Commands", | |
| 89 | + "- `uv run pytest tests/test_auth.py -q`", | |
| 90 | + "- `uv run mypy src/loader`", | |
| 91 | + ] | |
| 92 | + ), | |
| 93 | + task_statement="Clarify and implement the auth change.", | |
| 94 | + ) | |
| 95 | + | |
| 96 | + assert artifacts.implementation_steps[:2] == [ | |
| 97 | + "Inspect auth files.", | |
| 98 | + "Implement the change.", | |
| 99 | + ] | |
| 100 | + assert artifacts.acceptance_criteria == ["Login works without changing signup."] | |
| 101 | + assert artifacts.verification_commands == [ | |
| 102 | + "uv run pytest tests/test_auth.py -q", | |
| 103 | + "uv run mypy src/loader", | |
| 104 | + ] | |
| 105 | + assert extract_verification_commands_from_markdown(artifacts.verification_markdown) == [ | |
| 106 | + "uv run pytest tests/test_auth.py -q", | |
| 107 | + "uv run mypy src/loader", | |
| 108 | + ] | |
| 109 | + | |
| 110 | + | |
| 111 | +def test_workflow_artifact_store_and_bridge_round_trip(tmp_path: Path) -> None: | |
| 112 | + store = WorkflowArtifactStore(tmp_path) | |
| 113 | + brief = ClarifyBrief.fallback( | |
| 114 | + task_statement="Clarify the runtime changes.", | |
| 115 | + question="What matters most?", | |
| 116 | + answer="Close the tool-use gap first.", | |
| 117 | + ) | |
| 118 | + artifacts = PlanningArtifacts.fallback(task_statement=brief.task_statement) | |
| 119 | + | |
| 120 | + brief_path = store.write_brief(brief.task_statement, brief) | |
| 121 | + implementation_path, verification_path = store.write_plan( | |
| 122 | + brief.task_statement, | |
| 123 | + artifacts, | |
| 124 | + ) | |
| 125 | + bridge = build_execute_bridge(brief_path, implementation_path, verification_path) | |
| 126 | + | |
| 127 | + assert brief_path.exists() | |
| 128 | + assert implementation_path.exists() | |
| 129 | + assert verification_path.exists() | |
| 130 | + assert bridge is not None | |
| 131 | + assert "Task Brief" in bridge | |
| 132 | + assert "Implementation Plan" in bridge | |
| 133 | + assert "Verification Plan" in bridge | |
| 134 | + | |
| 135 | + | |
| 136 | +def test_definition_of_done_round_trip_preserves_workflow_links(tmp_path: Path) -> None: | |
| 137 | + store = DefinitionOfDoneStore(tmp_path) | |
| 138 | + dod = create_definition_of_done("Implement Loader workflow routing.") | |
| 139 | + dod.current_mode = "plan" | |
| 140 | + dod.mode_history = ["clarify", "plan"] | |
| 141 | + dod.clarify_brief = str(tmp_path / ".loader" / "briefs" / "brief.md") | |
| 142 | + dod.implementation_plan = str(tmp_path / ".loader" / "plans" / "impl.md") | |
| 143 | + dod.verification_plan = str(tmp_path / ".loader" / "plans" / "verify.md") | |
| 144 | + | |
| 145 | + saved_path = store.save(dod) | |
| 146 | + reloaded = store.load(saved_path) | |
| 147 | + | |
| 148 | + assert reloaded.current_mode == "plan" | |
| 149 | + assert reloaded.mode_history == ["clarify", "plan"] | |
| 150 | + assert reloaded.clarify_brief == dod.clarify_brief | |
| 151 | + assert reloaded.implementation_plan == dod.implementation_plan | |
| 152 | + assert reloaded.verification_plan == dod.verification_plan | |
| 153 | + | |
| 154 | + | |
| 155 | +def test_sync_todos_to_definition_of_done_preserves_runtime_items() -> None: | |
| 156 | + dod = create_definition_of_done("Implement Loader workflow routing.") | |
| 157 | + dod.pending_items.append("Collect verification evidence") | |
| 158 | + | |
| 159 | + sync_todos_to_definition_of_done( | |
| 160 | + dod, | |
| 161 | + [ | |
| 162 | + { | |
| 163 | + "content": "Write router", | |
| 164 | + "active_form": "Writing router", | |
| 165 | + "status": "in_progress", | |
| 166 | + }, | |
| 167 | + { | |
| 168 | + "content": "Update tests", | |
| 169 | + "active_form": "Updating tests", | |
| 170 | + "status": "completed", | |
| 171 | + }, | |
| 172 | + ], | |
| 173 | + ) | |
| 174 | + | |
| 175 | + assert "Writing router" in dod.pending_items | |
| 176 | + assert "Collect verification evidence" in dod.pending_items | |
| 177 | + assert "Update tests" in dod.completed_items | |
tests/test_workflow_runtime.pyadded@@ -0,0 +1,278 @@ | ||
| 1 | +"""Runtime integration coverage for Sprint 04 workflow routing.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +from pathlib import Path | |
| 6 | + | |
| 7 | +import pytest | |
| 8 | + | |
| 9 | +from loader.agent.loop import AgentConfig | |
| 10 | +from loader.llm.base import CompletionResponse, ToolCall | |
| 11 | +from tests.helpers.runtime_harness import ScriptedBackend, run_scenario | |
| 12 | + | |
| 13 | + | |
| 14 | +def non_streaming_config() -> AgentConfig: | |
| 15 | + """Shared config for deterministic workflow-mode runtime tests.""" | |
| 16 | + | |
| 17 | + return AgentConfig(auto_context=False, stream=False, max_iterations=8) | |
| 18 | + | |
| 19 | + | |
| 20 | +def workflow_modes(run) -> list[str]: | |
| 21 | + """Return emitted workflow modes in order.""" | |
| 22 | + | |
| 23 | + return [ | |
| 24 | + event.workflow_mode | |
| 25 | + for event in run.events | |
| 26 | + if event.type == "workflow_mode" and event.workflow_mode | |
| 27 | + ] | |
| 28 | + | |
| 29 | + | |
| 30 | +def artifact_kinds(run) -> list[str]: | |
| 31 | + """Return emitted artifact kinds in order.""" | |
| 32 | + | |
| 33 | + return [ | |
| 34 | + event.artifact_kind | |
| 35 | + for event in run.events | |
| 36 | + if event.type == "artifact" and event.artifact_kind | |
| 37 | + ] | |
| 38 | + | |
| 39 | + | |
| 40 | +@pytest.mark.asyncio | |
| 41 | +async def test_ambiguous_prompt_routes_to_clarify_and_persists_brief( | |
| 42 | + temp_dir: Path, | |
| 43 | +) -> None: | |
| 44 | + backend = ScriptedBackend( | |
| 45 | + completions=[ | |
| 46 | + CompletionResponse( | |
| 47 | + content="I need one clarification before I proceed.", | |
| 48 | + tool_calls=[ | |
| 49 | + ToolCall( | |
| 50 | + id="ask-1", | |
| 51 | + name="AskUserQuestion", | |
| 52 | + arguments={ | |
| 53 | + "question": "What should stay out of scope for this Loader improvement?", | |
| 54 | + }, | |
| 55 | + ) | |
| 56 | + ], | |
| 57 | + ), | |
| 58 | + CompletionResponse( | |
| 59 | + content="\n".join( | |
| 60 | + [ | |
| 61 | + "## Task Statement", | |
| 62 | + "Improve Loader so it feels more like claw-code.", | |
| 63 | + "", | |
| 64 | + "## Desired Outcome", | |
| 65 | + "- Make Loader more reliable without broad redesign.", | |
| 66 | + "", | |
| 67 | + "## In Scope", | |
| 68 | + "- Tighten the runtime workflow around the user-facing goal.", | |
| 69 | + "", | |
| 70 | + "## Non Goals", | |
| 71 | + "- Rebuild unrelated subsystems.", | |
| 72 | + "", | |
| 73 | + "## Decision Boundaries", | |
| 74 | + "- Escalate before changing unrelated UX patterns.", | |
| 75 | + "", | |
| 76 | + "## Constraints", | |
| 77 | + "- Stay within the current repository.", | |
| 78 | + "", | |
| 79 | + "## Likely Touchpoints", | |
| 80 | + "- Runtime entry points and prompt behavior.", | |
| 81 | + "", | |
| 82 | + "## Assumptions", | |
| 83 | + "- The user wants a narrow runtime-quality improvement.", | |
| 84 | + "", | |
| 85 | + "## Acceptance Criteria", | |
| 86 | + "- The improvement stays focused on runtime behavior.", | |
| 87 | + ] | |
| 88 | + ) | |
| 89 | + ), | |
| 90 | + CompletionResponse(content="I have the brief and can move forward."), | |
| 91 | + ] | |
| 92 | + ) | |
| 93 | + | |
| 94 | + async def answer(question: str, options: list[str] | None) -> str: | |
| 95 | + assert "out of scope" in question.lower() | |
| 96 | + assert options is None | |
| 97 | + return "Do not redesign the whole interface." | |
| 98 | + | |
| 99 | + run = await run_scenario( | |
| 100 | + "Improve Loader so it feels more like claw-code.", | |
| 101 | + backend, | |
| 102 | + config=non_streaming_config(), | |
| 103 | + project_root=temp_dir, | |
| 104 | + on_user_question=answer, | |
| 105 | + ) | |
| 106 | + | |
| 107 | + dod = run.agent.last_turn_summary.definition_of_done | |
| 108 | + assert dod is not None | |
| 109 | + assert workflow_modes(run)[:2] == ["clarify", "execute"] | |
| 110 | + assert artifact_kinds(run) == ["clarify_brief"] | |
| 111 | + assert dod.clarify_brief is not None | |
| 112 | + assert Path(dod.clarify_brief).exists() | |
| 113 | + assert "runtime behavior" in dod.acceptance_criteria[0].lower() | |
| 114 | + assert "## Clarify Mode" in backend.invocations[0].messages[0].content | |
| 115 | + | |
| 116 | + | |
| 117 | +@pytest.mark.asyncio | |
| 118 | +async def test_complex_prompt_routes_to_plan_and_uses_verification_artifact( | |
| 119 | + temp_dir: Path, | |
| 120 | +) -> None: | |
| 121 | + target = temp_dir / "planned.txt" | |
| 122 | + backend = ScriptedBackend( | |
| 123 | + completions=[ | |
| 124 | + CompletionResponse( | |
| 125 | + content="\n".join( | |
| 126 | + [ | |
| 127 | + "# Implementation Plan", | |
| 128 | + "", | |
| 129 | + "## File Changes", | |
| 130 | + f"- Create {target.name} in the workspace root.", | |
| 131 | + "", | |
| 132 | + "## Execution Order", | |
| 133 | + f"1. Write {target.name}.", | |
| 134 | + "2. Confirm the file exists.", | |
| 135 | + "", | |
| 136 | + "## Risks", | |
| 137 | + "- Writing the wrong file path.", | |
| 138 | + "", | |
| 139 | + "<<<VERIFICATION>>>", | |
| 140 | + "", | |
| 141 | + "# Verification Plan", | |
| 142 | + "", | |
| 143 | + "## Acceptance Criteria", | |
| 144 | + f"- {target.name} exists in the workspace root.", | |
| 145 | + "", | |
| 146 | + "## Verification Commands", | |
| 147 | + f"- `test -f {target}`", | |
| 148 | + "", | |
| 149 | + "## Notes", | |
| 150 | + "- Use a deterministic file existence check.", | |
| 151 | + ] | |
| 152 | + ) | |
| 153 | + ), | |
| 154 | + CompletionResponse( | |
| 155 | + content="I'll create the file now.", | |
| 156 | + tool_calls=[ | |
| 157 | + ToolCall( | |
| 158 | + id="write-1", | |
| 159 | + name="write", | |
| 160 | + arguments={ | |
| 161 | + "file_path": str(target), | |
| 162 | + "content": "planned output\n", | |
| 163 | + }, | |
| 164 | + ) | |
| 165 | + ], | |
| 166 | + ), | |
| 167 | + CompletionResponse(content="The file is in place."), | |
| 168 | + ] | |
| 169 | + ) | |
| 170 | + | |
| 171 | + run = await run_scenario( | |
| 172 | + "Implement a persistent workflow mode router with clarify artifacts, " | |
| 173 | + "planning artifacts, and verification-plan wiring in the runtime.", | |
| 174 | + backend, | |
| 175 | + config=non_streaming_config(), | |
| 176 | + project_root=temp_dir, | |
| 177 | + ) | |
| 178 | + | |
| 179 | + dod = run.agent.last_turn_summary.definition_of_done | |
| 180 | + assert dod is not None | |
| 181 | + assert workflow_modes(run)[:3] == ["plan", "execute", "verify"] | |
| 182 | + assert artifact_kinds(run) == ["implementation_plan", "verification_plan"] | |
| 183 | + assert dod.implementation_plan is not None | |
| 184 | + assert dod.verification_plan is not None | |
| 185 | + assert Path(dod.implementation_plan).exists() | |
| 186 | + assert Path(dod.verification_plan).exists() | |
| 187 | + assert dod.verification_commands == [f"test -f {target}"] | |
| 188 | + assert "## Plan Mode" in backend.invocations[0].messages[0].content | |
| 189 | + verify_calls = [ | |
| 190 | + event | |
| 191 | + for event in run.events | |
| 192 | + if event.type == "tool_call" and event.phase == "verification" | |
| 193 | + ] | |
| 194 | + assert [event.tool_args["command"] for event in verify_calls] == [f"test -f {target}"] | |
| 195 | + | |
| 196 | + | |
| 197 | +@pytest.mark.asyncio | |
| 198 | +async def test_verify_failure_returns_to_execute_without_retriggering_plan( | |
| 199 | + temp_dir: Path, | |
| 200 | +) -> None: | |
| 201 | + target = temp_dir / "retry.txt" | |
| 202 | + backend = ScriptedBackend( | |
| 203 | + completions=[ | |
| 204 | + CompletionResponse( | |
| 205 | + content="\n".join( | |
| 206 | + [ | |
| 207 | + "# Implementation Plan", | |
| 208 | + "", | |
| 209 | + "## File Changes", | |
| 210 | + f"- Create {target.name}.", | |
| 211 | + "", | |
| 212 | + "## Execution Order", | |
| 213 | + f"1. Write {target.name}.", | |
| 214 | + "2. Fix it if verification fails.", | |
| 215 | + "", | |
| 216 | + "## Risks", | |
| 217 | + "- Initial content may be wrong.", | |
| 218 | + "", | |
| 219 | + "<<<VERIFICATION>>>", | |
| 220 | + "", | |
| 221 | + "# Verification Plan", | |
| 222 | + "", | |
| 223 | + "## Acceptance Criteria", | |
| 224 | + "- The file contains the word fixed.", | |
| 225 | + "", | |
| 226 | + "## Verification Commands", | |
| 227 | + f"- `grep -q fixed {target}`", | |
| 228 | + "", | |
| 229 | + "## Notes", | |
| 230 | + "- Retry if the first write misses the target string.", | |
| 231 | + ] | |
| 232 | + ) | |
| 233 | + ), | |
| 234 | + CompletionResponse( | |
| 235 | + content="I'll write the first draft.", | |
| 236 | + tool_calls=[ | |
| 237 | + ToolCall( | |
| 238 | + id="write-1", | |
| 239 | + name="write", | |
| 240 | + arguments={ | |
| 241 | + "file_path": str(target), | |
| 242 | + "content": "draft output\n", | |
| 243 | + }, | |
| 244 | + ) | |
| 245 | + ], | |
| 246 | + ), | |
| 247 | + CompletionResponse(content="First draft is written."), | |
| 248 | + CompletionResponse( | |
| 249 | + content="I'll correct the file.", | |
| 250 | + tool_calls=[ | |
| 251 | + ToolCall( | |
| 252 | + id="write-2", | |
| 253 | + name="write", | |
| 254 | + arguments={ | |
| 255 | + "file_path": str(target), | |
| 256 | + "content": "fixed output\n", | |
| 257 | + }, | |
| 258 | + ) | |
| 259 | + ], | |
| 260 | + ), | |
| 261 | + CompletionResponse(content="The file now contains the fixed output."), | |
| 262 | + ] | |
| 263 | + ) | |
| 264 | + | |
| 265 | + run = await run_scenario( | |
| 266 | + "Implement a persistent workflow mode router with clarify artifacts, " | |
| 267 | + "planning artifacts, and verification-plan wiring in the runtime.", | |
| 268 | + backend, | |
| 269 | + config=non_streaming_config(), | |
| 270 | + project_root=temp_dir, | |
| 271 | + ) | |
| 272 | + | |
| 273 | + modes = workflow_modes(run) | |
| 274 | + assert modes.count("plan") == 1 | |
| 275 | + assert modes.count("clarify") == 0 | |
| 276 | + assert modes.count("execute") >= 2 | |
| 277 | + assert modes.count("verify") >= 2 | |
| 278 | + assert "fixed output" in target.read_text() | |
tests/test_workflow_runtime_tools.pymodified@@ -12,7 +12,12 @@ from tests.helpers.runtime_harness import ScriptedBackend, run_scenario | ||
| 12 | 12 | def non_streaming_config() -> AgentConfig: |
| 13 | 13 | """Shared deterministic config for runtime tool tests.""" |
| 14 | 14 | |
| 15 | - return AgentConfig(auto_context=False, stream=False, max_iterations=4) | |
| 15 | + return AgentConfig( | |
| 16 | + auto_context=False, | |
| 17 | + stream=False, | |
| 18 | + max_iterations=4, | |
| 19 | + workflow_mode_override="execute", | |
| 20 | + ) | |
| 16 | 21 | |
| 17 | 22 | |
| 18 | 23 | async def _answer(question: str, options: list[str] | None) -> str: |