Add workflow tools and question callbacks
- SHA
87132084a2a21e3b40c11ab3a77eb753b3e2af00- Parents
-
581d54c - Tree
f3ff357
8713208
87132084a2a21e3b40c11ab3a77eb753b3e2af00581d54c
f3ff357| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/agent/loop.py
|
32 | 6 |
| M |
src/loader/runtime/conversation.py
|
3 | 0 |
| M |
src/loader/runtime/executor.py
|
20 | 2 |
| M |
src/loader/tools/base.py
|
3 | 0 |
| A |
src/loader/tools/workflow_tools.py
|
257 | 0 |
| M |
tests/helpers/runtime_harness.py
|
7 | 1 |
| A |
tests/test_workflow_runtime_tools.py
|
57 | 0 |
| A |
tests/test_workflow_tools.py
|
117 | 0 |
src/loader/agent/loop.pymodified@@ -426,6 +426,7 @@ class Agent: | ||
| 426 | 426 | user_message: str, |
| 427 | 427 | on_event: Callable[[AgentEvent], None] | Callable[[AgentEvent], Awaitable[None]] | None = None, |
| 428 | 428 | on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None = None, |
| 429 | + on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None = None, | |
| 429 | 430 | use_plan: bool | None = None, |
| 430 | 431 | ) -> str: |
| 431 | 432 | """Run the agent with a user message. |
@@ -434,6 +435,7 @@ class Agent: | ||
| 434 | 435 | user_message: The user's input |
| 435 | 436 | on_event: Optional callback for streaming events (sync or async) |
| 436 | 437 | on_confirmation: Optional callback for tool confirmation. Takes (tool_name, message, details) and returns True to confirm. |
| 438 | + on_user_question: Optional callback for AskUserQuestion. Takes (question, options) and returns the answer. | |
| 437 | 439 | use_plan: Force planning on/off. None = auto-detect. |
| 438 | 440 | |
| 439 | 441 | Returns: |
@@ -451,7 +453,13 @@ class Agent: | ||
| 451 | 453 | # Mark agent as running (enables steering) |
| 452 | 454 | self._is_running = True |
| 453 | 455 | try: |
| 454 | - return await self._run_with_steering(user_message, emit, on_confirmation, use_plan) | |
| 456 | + return await self._run_with_steering( | |
| 457 | + user_message, | |
| 458 | + emit, | |
| 459 | + on_confirmation, | |
| 460 | + on_user_question, | |
| 461 | + use_plan, | |
| 462 | + ) | |
| 455 | 463 | finally: |
| 456 | 464 | self._is_running = False |
| 457 | 465 | |
@@ -460,6 +468,7 @@ class Agent: | ||
| 460 | 468 | user_message: str, |
| 461 | 469 | emit: Callable[[AgentEvent], Awaitable[None]], |
| 462 | 470 | on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None, |
| 471 | + on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None, | |
| 463 | 472 | use_plan: bool | None, |
| 464 | 473 | ) -> str: |
| 465 | 474 | """Internal run method that supports steering.""" |
@@ -506,7 +515,10 @@ class Agent: | ||
| 506 | 515 | f"Verification: {subtask.verification}", |
| 507 | 516 | )) |
| 508 | 517 | subtask_response = await self._run_inner( |
| 509 | - subtask.description, emit, on_confirmation, | |
| 518 | + subtask.description, | |
| 519 | + emit, | |
| 520 | + on_confirmation, | |
| 521 | + on_user_question=on_user_question, | |
| 510 | 522 | original_task=self._current_task, |
| 511 | 523 | ) |
| 512 | 524 | |
@@ -532,7 +544,10 @@ class Agent: | ||
| 532 | 544 | ) |
| 533 | 545 | self.messages.append(Message(role=Role.USER, content=summary_prompt)) |
| 534 | 546 | return await self._run_inner( |
| 535 | - summary_prompt, emit, on_confirmation, | |
| 547 | + summary_prompt, | |
| 548 | + emit, | |
| 549 | + on_confirmation, | |
| 550 | + on_user_question=on_user_question, | |
| 536 | 551 | original_task=self._current_task, |
| 537 | 552 | ) |
| 538 | 553 | else: |
@@ -564,7 +579,10 @@ class Agent: | ||
| 564 | 579 | # Run the step |
| 565 | 580 | step_prompt = format_step_prompt(plan, step) |
| 566 | 581 | await self._run_inner( |
| 567 | - step_prompt, emit, on_confirmation, | |
| 582 | + step_prompt, | |
| 583 | + emit, | |
| 584 | + on_confirmation, | |
| 585 | + on_user_question=on_user_question, | |
| 568 | 586 | original_task=self._current_task, |
| 569 | 587 | ) |
| 570 | 588 | |
@@ -574,14 +592,20 @@ class Agent: | ||
| 574 | 592 | self.messages.append(Message(role=Role.USER, content=user_message)) |
| 575 | 593 | summary_prompt = f"I've completed the plan. Summarize what was done:\n{plan.to_prompt()}" |
| 576 | 594 | return await self._run_inner( |
| 577 | - summary_prompt, emit, on_confirmation, | |
| 595 | + summary_prompt, | |
| 596 | + emit, | |
| 597 | + on_confirmation, | |
| 598 | + on_user_question=on_user_question, | |
| 578 | 599 | original_task=self._current_task, |
| 579 | 600 | ) |
| 580 | 601 | |
| 581 | 602 | # No planning or decomposition - run directly |
| 582 | 603 | self.messages.append(Message(role=Role.USER, content=user_message)) |
| 583 | 604 | return await self._run_inner( |
| 584 | - user_message, emit, on_confirmation, | |
| 605 | + user_message, | |
| 606 | + emit, | |
| 607 | + on_confirmation, | |
| 608 | + on_user_question=on_user_question, | |
| 585 | 609 | original_task=self._current_task, |
| 586 | 610 | ) |
| 587 | 611 | |
@@ -590,6 +614,7 @@ class Agent: | ||
| 590 | 614 | task: str, |
| 591 | 615 | emit: Callable[[AgentEvent], Awaitable[None]], |
| 592 | 616 | on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None = None, |
| 617 | + on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None = None, | |
| 593 | 618 | original_task: str | None = None, |
| 594 | 619 | ) -> str: |
| 595 | 620 | """Inner execution loop without planning.""" |
@@ -599,6 +624,7 @@ class Agent: | ||
| 599 | 624 | task, |
| 600 | 625 | emit, |
| 601 | 626 | on_confirmation=on_confirmation, |
| 627 | + on_user_question=on_user_question, | |
| 602 | 628 | original_task=original_task, |
| 603 | 629 | ) |
| 604 | 630 | return self.last_turn_summary.final_response |
src/loader/runtime/conversation.pymodified@@ -33,6 +33,7 @@ from .tracing import RuntimeTracer | ||
| 33 | 33 | |
| 34 | 34 | EventSink = Callable[[AgentEvent], Awaitable[None]] |
| 35 | 35 | ConfirmationHandler = Callable[[str, str, str], Awaitable[bool]] | None |
| 36 | +UserQuestionHandler = Callable[[str, list[str] | None], Awaitable[str]] | None | |
| 36 | 37 | |
| 37 | 38 | |
| 38 | 39 | @dataclass |
@@ -68,6 +69,7 @@ class ConversationRuntime: | ||
| 68 | 69 | task: str, |
| 69 | 70 | emit: EventSink, |
| 70 | 71 | on_confirmation: ConfirmationHandler = None, |
| 72 | + on_user_question: UserQuestionHandler = None, | |
| 71 | 73 | original_task: str | None = None, |
| 72 | 74 | ) -> TurnSummary: |
| 73 | 75 | """Run one task turn and return a structured summary.""" |
@@ -281,6 +283,7 @@ class ConversationRuntime: | ||
| 281 | 283 | outcome = await self.executor.execute_tool_call( |
| 282 | 284 | tool_call, |
| 283 | 285 | on_confirmation=on_confirmation, |
| 286 | + on_user_question=on_user_question, | |
| 284 | 287 | emit_confirmation=self._emit_confirmation(emit), |
| 285 | 288 | source=tool_source, |
| 286 | 289 | ) |
src/loader/runtime/executor.pymodified@@ -12,6 +12,7 @@ from ..agent.recovery import ErrorCategory, categorize_error | ||
| 12 | 12 | from ..llm.base import Message, ToolCall |
| 13 | 13 | from ..tools.base import ConfirmationRequired, ToolRegistry |
| 14 | 14 | from ..tools.base import ToolResult as RegistryToolResult |
| 15 | +from ..tools.workflow_tools import UserQuestionHandler | |
| 15 | 16 | from .hooks import HookContext, HookDecision, HookManager |
| 16 | 17 | from .permissions import PermissionDecision, PermissionMode, PermissionPolicy |
| 17 | 18 | from .tracing import RuntimeTracer |
@@ -66,6 +67,7 @@ class ToolExecutor: | ||
| 66 | 67 | tool_call: ToolCall, |
| 67 | 68 | *, |
| 68 | 69 | on_confirmation: BrowserConfirmation = None, |
| 70 | + on_user_question: UserQuestionHandler | None = None, | |
| 69 | 71 | emit_confirmation: ConfirmationEmitter = None, |
| 70 | 72 | source: str, |
| 71 | 73 | skip_duplicate_check: bool = False, |
@@ -249,6 +251,7 @@ class ToolExecutor: | ||
| 249 | 251 | result = await self._execute_registry( |
| 250 | 252 | tool_call, |
| 251 | 253 | on_confirmation, |
| 254 | + on_user_question, | |
| 252 | 255 | emit_confirmation, |
| 253 | 256 | skip_confirmation=skip_confirmation, |
| 254 | 257 | ) |
@@ -354,6 +357,7 @@ class ToolExecutor: | ||
| 354 | 357 | self, |
| 355 | 358 | tool_call: ToolCall, |
| 356 | 359 | on_confirmation: BrowserConfirmation, |
| 360 | + on_user_question: UserQuestionHandler | None, | |
| 357 | 361 | emit_confirmation: ConfirmationEmitter, |
| 358 | 362 | *, |
| 359 | 363 | skip_confirmation: bool = False, |
@@ -362,7 +366,14 @@ class ToolExecutor: | ||
| 362 | 366 | if skip_confirmation: |
| 363 | 367 | self.registry.skip_confirmation = True |
| 364 | 368 | try: |
| 365 | - return await self.registry.execute(tool_call.name, **tool_call.arguments) | |
| 369 | + extra_kwargs: dict[str, Any] = {} | |
| 370 | + if tool_call.name == "AskUserQuestion": | |
| 371 | + extra_kwargs["user_response_handler"] = on_user_question | |
| 372 | + return await self.registry.execute( | |
| 373 | + tool_call.name, | |
| 374 | + **tool_call.arguments, | |
| 375 | + **extra_kwargs, | |
| 376 | + ) | |
| 366 | 377 | except ConfirmationRequired as confirmation: |
| 367 | 378 | self.tracer.record( |
| 368 | 379 | "tool.confirmation_requested", |
@@ -392,7 +403,14 @@ class ToolExecutor: | ||
| 392 | 403 | |
| 393 | 404 | self.registry.skip_confirmation = True |
| 394 | 405 | try: |
| 395 | - return await self.registry.execute(tool_call.name, **tool_call.arguments) | |
| 406 | + extra_kwargs: dict[str, Any] = {} | |
| 407 | + if tool_call.name == "AskUserQuestion": | |
| 408 | + extra_kwargs["user_response_handler"] = on_user_question | |
| 409 | + return await self.registry.execute( | |
| 410 | + tool_call.name, | |
| 411 | + **tool_call.arguments, | |
| 412 | + **extra_kwargs, | |
| 413 | + ) | |
| 396 | 414 | finally: |
| 397 | 415 | self.registry.skip_confirmation = previous_skip |
| 398 | 416 | finally: |
src/loader/tools/base.pymodified@@ -181,6 +181,7 @@ def create_default_registry( | ||
| 181 | 181 | from .file_tools import EditTool, GlobTool, ReadTool, WriteTool |
| 182 | 182 | from .search_tools import GrepTool |
| 183 | 183 | from .shell_tools import BashTool |
| 184 | + from .workflow_tools import AskUserQuestionTool, TodoWriteTool | |
| 184 | 185 | |
| 185 | 186 | registry = ToolRegistry(workspace_root=workspace_root) |
| 186 | 187 | registry.register(ReadTool()) |
@@ -189,5 +190,7 @@ def create_default_registry( | ||
| 189 | 190 | registry.register(GlobTool()) |
| 190 | 191 | registry.register(BashTool()) |
| 191 | 192 | registry.register(GrepTool()) |
| 193 | + registry.register(TodoWriteTool()) | |
| 194 | + registry.register(AskUserQuestionTool()) | |
| 192 | 195 | |
| 193 | 196 | return registry |
src/loader/tools/workflow_tools.pyadded@@ -0,0 +1,257 @@ | ||
| 1 | +"""Workflow-oriented tools for task tracking and user clarification.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import asyncio | |
| 6 | +import json | |
| 7 | +from collections.abc import Awaitable, Callable | |
| 8 | +from dataclasses import dataclass | |
| 9 | +from pathlib import Path | |
| 10 | +from typing import Any | |
| 11 | + | |
| 12 | +from ..runtime.permissions import PermissionMode | |
| 13 | +from .base import Tool, ToolResult | |
| 14 | + | |
| 15 | +UserQuestionHandler = Callable[[str, list[str] | None], Awaitable[str]] | |
| 16 | + | |
| 17 | +TODO_STATUSES = {"pending", "in_progress", "completed"} | |
| 18 | + | |
| 19 | + | |
| 20 | +@dataclass(slots=True) | |
| 21 | +class TodoItem: | |
| 22 | + """Structured todo item compatible with Loader workflow state.""" | |
| 23 | + | |
| 24 | + content: str | |
| 25 | + active_form: str | |
| 26 | + status: str | |
| 27 | + | |
| 28 | + @classmethod | |
| 29 | + def from_dict(cls, data: dict[str, Any]) -> TodoItem: | |
| 30 | + active_form = str( | |
| 31 | + data.get("active_form") | |
| 32 | + or data.get("activeForm") | |
| 33 | + or data.get("active") | |
| 34 | + or "" | |
| 35 | + ).strip() | |
| 36 | + return cls( | |
| 37 | + content=str(data.get("content", "")).strip(), | |
| 38 | + active_form=active_form, | |
| 39 | + status=str(data.get("status", "")).strip().lower(), | |
| 40 | + ) | |
| 41 | + | |
| 42 | + def to_dict(self) -> dict[str, str]: | |
| 43 | + return { | |
| 44 | + "content": self.content, | |
| 45 | + "active_form": self.active_form, | |
| 46 | + "status": self.status, | |
| 47 | + } | |
| 48 | + | |
| 49 | + | |
| 50 | +class TodoWriteTool(Tool): | |
| 51 | + """Persist the current task list under `.loader/`.""" | |
| 52 | + | |
| 53 | + required_permission = PermissionMode.READ_ONLY | |
| 54 | + | |
| 55 | + def __init__(self, workspace_root: Path | str | None = None) -> None: | |
| 56 | + self.workspace_root = ( | |
| 57 | + Path(workspace_root).expanduser().resolve() if workspace_root else None | |
| 58 | + ) | |
| 59 | + | |
| 60 | + @property | |
| 61 | + def name(self) -> str: | |
| 62 | + return "TodoWrite" | |
| 63 | + | |
| 64 | + def set_workspace_root(self, workspace_root: Path | None) -> None: | |
| 65 | + self.workspace_root = workspace_root | |
| 66 | + | |
| 67 | + @property | |
| 68 | + def description(self) -> str: | |
| 69 | + return ( | |
| 70 | + "Persist the current task list under .loader/todos/. " | |
| 71 | + "Use it to track pending, in-progress, and completed work items." | |
| 72 | + ) | |
| 73 | + | |
| 74 | + @property | |
| 75 | + def parameters(self) -> dict[str, Any]: | |
| 76 | + return { | |
| 77 | + "type": "object", | |
| 78 | + "properties": { | |
| 79 | + "todos": { | |
| 80 | + "type": "array", | |
| 81 | + "description": "Current task list for the active workflow.", | |
| 82 | + "items": { | |
| 83 | + "type": "object", | |
| 84 | + "properties": { | |
| 85 | + "content": { | |
| 86 | + "type": "string", | |
| 87 | + "description": "Short task description in base form.", | |
| 88 | + }, | |
| 89 | + "active_form": { | |
| 90 | + "type": "string", | |
| 91 | + "description": "Progressive-tense form, e.g. 'Running tests'.", | |
| 92 | + }, | |
| 93 | + "status": { | |
| 94 | + "type": "string", | |
| 95 | + "enum": ["pending", "in_progress", "completed"], | |
| 96 | + "description": "Current todo status.", | |
| 97 | + }, | |
| 98 | + }, | |
| 99 | + "required": ["content", "active_form", "status"], | |
| 100 | + }, | |
| 101 | + } | |
| 102 | + }, | |
| 103 | + "required": ["todos"], | |
| 104 | + } | |
| 105 | + | |
| 106 | + async def execute( | |
| 107 | + self, | |
| 108 | + todos: list[dict[str, Any]], | |
| 109 | + **kwargs: Any, | |
| 110 | + ) -> ToolResult: | |
| 111 | + try: | |
| 112 | + items = [TodoItem.from_dict(todo) for todo in todos] | |
| 113 | + self._validate_items(items) | |
| 114 | + except ValueError as exc: | |
| 115 | + return ToolResult(str(exc), is_error=True) | |
| 116 | + | |
| 117 | + store_path = self._store_path() | |
| 118 | + old_todos = await asyncio.to_thread(self._read_existing_items, store_path) | |
| 119 | + | |
| 120 | + all_done = all(item.status == "completed" for item in items) | |
| 121 | + persisted_items = [] if all_done else [item.to_dict() for item in items] | |
| 122 | + | |
| 123 | + store_path.parent.mkdir(parents=True, exist_ok=True) | |
| 124 | + await asyncio.to_thread( | |
| 125 | + store_path.write_text, | |
| 126 | + json.dumps(persisted_items, indent=2, sort_keys=True), | |
| 127 | + ) | |
| 128 | + | |
| 129 | + verification_nudge_needed = ( | |
| 130 | + all_done | |
| 131 | + and len(items) >= 3 | |
| 132 | + and not any("verif" in item.content.lower() for item in items) | |
| 133 | + ) | |
| 134 | + | |
| 135 | + payload = { | |
| 136 | + "old_todos": old_todos, | |
| 137 | + "new_todos": [item.to_dict() for item in items], | |
| 138 | + "verification_nudge_needed": verification_nudge_needed, | |
| 139 | + "store_path": str(store_path), | |
| 140 | + } | |
| 141 | + return ToolResult( | |
| 142 | + output=json.dumps(payload, indent=2, sort_keys=True), | |
| 143 | + metadata=payload, | |
| 144 | + ) | |
| 145 | + | |
| 146 | + def _store_path(self) -> Path: | |
| 147 | + root = self.workspace_root or Path.cwd() | |
| 148 | + return root / ".loader" / "todos" / "active.json" | |
| 149 | + | |
| 150 | + def _read_existing_items(self, store_path: Path) -> list[dict[str, Any]]: | |
| 151 | + if not store_path.exists(): | |
| 152 | + return [] | |
| 153 | + raw = json.loads(store_path.read_text()) | |
| 154 | + if not isinstance(raw, list): | |
| 155 | + return [] | |
| 156 | + items: list[dict[str, Any]] = [] | |
| 157 | + for item in raw: | |
| 158 | + if isinstance(item, dict): | |
| 159 | + items.append(TodoItem.from_dict(item).to_dict()) | |
| 160 | + return items | |
| 161 | + | |
| 162 | + def _validate_items(self, items: list[TodoItem]) -> None: | |
| 163 | + if not items: | |
| 164 | + raise ValueError("todos must not be empty") | |
| 165 | + for item in items: | |
| 166 | + if not item.content: | |
| 167 | + raise ValueError("todo content must not be empty") | |
| 168 | + if not item.active_form: | |
| 169 | + raise ValueError("todo active_form must not be empty") | |
| 170 | + if item.status not in TODO_STATUSES: | |
| 171 | + raise ValueError( | |
| 172 | + "todo status must be one of pending, in_progress, or completed" | |
| 173 | + ) | |
| 174 | + | |
| 175 | + | |
| 176 | +class AskUserQuestionTool(Tool): | |
| 177 | + """Ask the user one structured question and capture the answer.""" | |
| 178 | + | |
| 179 | + required_permission = PermissionMode.READ_ONLY | |
| 180 | + | |
| 181 | + @property | |
| 182 | + def name(self) -> str: | |
| 183 | + return "AskUserQuestion" | |
| 184 | + | |
| 185 | + @property | |
| 186 | + def description(self) -> str: | |
| 187 | + return "Ask the user one question and wait for their response." | |
| 188 | + | |
| 189 | + @property | |
| 190 | + def parameters(self) -> dict[str, Any]: | |
| 191 | + return { | |
| 192 | + "type": "object", | |
| 193 | + "properties": { | |
| 194 | + "question": { | |
| 195 | + "type": "string", | |
| 196 | + "description": "The exact question to present to the user.", | |
| 197 | + }, | |
| 198 | + "options": { | |
| 199 | + "type": "array", | |
| 200 | + "description": "Optional short answer choices.", | |
| 201 | + "items": {"type": "string"}, | |
| 202 | + }, | |
| 203 | + }, | |
| 204 | + "required": ["question"], | |
| 205 | + } | |
| 206 | + | |
| 207 | + async def execute( | |
| 208 | + self, | |
| 209 | + question: str, | |
| 210 | + options: list[str] | None = None, | |
| 211 | + user_response_handler: UserQuestionHandler | None = None, | |
| 212 | + **kwargs: Any, | |
| 213 | + ) -> ToolResult: | |
| 214 | + normalized_question = question.strip() | |
| 215 | + normalized_options = [ | |
| 216 | + str(option).strip() | |
| 217 | + for option in (options or []) | |
| 218 | + if str(option).strip() | |
| 219 | + ] | |
| 220 | + | |
| 221 | + if not normalized_question: | |
| 222 | + return ToolResult("question must not be empty", is_error=True) | |
| 223 | + if user_response_handler is None: | |
| 224 | + return ToolResult( | |
| 225 | + "AskUserQuestion requires a user_response_handler callback", | |
| 226 | + is_error=True, | |
| 227 | + ) | |
| 228 | + | |
| 229 | + answer = ( | |
| 230 | + await user_response_handler( | |
| 231 | + normalized_question, | |
| 232 | + normalized_options or None, | |
| 233 | + ) | |
| 234 | + ).strip() | |
| 235 | + resolved_answer = self._resolve_answer(answer, normalized_options or None) | |
| 236 | + payload = { | |
| 237 | + "question": normalized_question, | |
| 238 | + "options": normalized_options or None, | |
| 239 | + "answer": resolved_answer, | |
| 240 | + "status": "answered", | |
| 241 | + } | |
| 242 | + return ToolResult( | |
| 243 | + output=json.dumps(payload, indent=2, sort_keys=True), | |
| 244 | + metadata=payload, | |
| 245 | + ) | |
| 246 | + | |
| 247 | + @staticmethod | |
| 248 | + def _resolve_answer(answer: str, options: list[str] | None) -> str: | |
| 249 | + if not options: | |
| 250 | + return answer | |
| 251 | + try: | |
| 252 | + index = int(answer) - 1 | |
| 253 | + except ValueError: | |
| 254 | + return answer | |
| 255 | + if 0 <= index < len(options): | |
| 256 | + return options[index] | |
| 257 | + return answer | |
tests/helpers/runtime_harness.pymodified@@ -105,6 +105,7 @@ async def run_scenario( | ||
| 105 | 105 | config: AgentConfig | None = None, |
| 106 | 106 | project_root: Path | str | None = None, |
| 107 | 107 | on_confirmation=None, |
| 108 | + on_user_question=None, | |
| 108 | 109 | ) -> ScenarioRun: |
| 109 | 110 | """Run a scripted agent scenario and collect emitted events.""" |
| 110 | 111 | |
@@ -119,7 +120,12 @@ async def run_scenario( | ||
| 119 | 120 | async def capture(event: AgentEvent) -> None: |
| 120 | 121 | events.append(event) |
| 121 | 122 | |
| 122 | - response = await agent.run(prompt, on_event=capture, on_confirmation=on_confirmation) | |
| 123 | + response = await agent.run( | |
| 124 | + prompt, | |
| 125 | + on_event=capture, | |
| 126 | + on_confirmation=on_confirmation, | |
| 127 | + on_user_question=on_user_question, | |
| 128 | + ) | |
| 123 | 129 | return ScenarioRun( |
| 124 | 130 | response=response, |
| 125 | 131 | events=events, |
tests/test_workflow_runtime_tools.pyadded@@ -0,0 +1,57 @@ | ||
| 1 | +"""Runtime coverage for Sprint 04 workflow tools.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import pytest | |
| 6 | + | |
| 7 | +from loader.agent.loop import AgentConfig | |
| 8 | +from loader.llm.base import CompletionResponse, ToolCall | |
| 9 | +from tests.helpers.runtime_harness import ScriptedBackend, run_scenario | |
| 10 | + | |
| 11 | + | |
| 12 | +def non_streaming_config() -> AgentConfig: | |
| 13 | + """Shared deterministic config for runtime tool tests.""" | |
| 14 | + | |
| 15 | + return AgentConfig(auto_context=False, stream=False, max_iterations=4) | |
| 16 | + | |
| 17 | + | |
| 18 | +async def _answer(question: str, options: list[str] | None) -> str: | |
| 19 | + assert "Which path" in question | |
| 20 | + assert options == ["Plan first", "Execute now"] | |
| 21 | + return "1" | |
| 22 | + | |
| 23 | + | |
| 24 | +@pytest.mark.asyncio | |
| 25 | +async def test_ask_user_question_round_trips_through_runtime() -> None: | |
| 26 | + backend = ScriptedBackend( | |
| 27 | + completions=[ | |
| 28 | + CompletionResponse( | |
| 29 | + content="I need one clarification.", | |
| 30 | + tool_calls=[ | |
| 31 | + ToolCall( | |
| 32 | + id="ask-1", | |
| 33 | + name="AskUserQuestion", | |
| 34 | + arguments={ | |
| 35 | + "question": "Which path should we take?", | |
| 36 | + "options": ["Plan first", "Execute now"], | |
| 37 | + }, | |
| 38 | + ) | |
| 39 | + ], | |
| 40 | + ), | |
| 41 | + CompletionResponse(content="We'll plan first."), | |
| 42 | + ] | |
| 43 | + ) | |
| 44 | + | |
| 45 | + run = await run_scenario( | |
| 46 | + "Implement the task, but ask me which path to take first.", | |
| 47 | + backend, | |
| 48 | + config=non_streaming_config(), | |
| 49 | + on_user_question=_answer, | |
| 50 | + ) | |
| 51 | + | |
| 52 | + tool_events = [event for event in run.events if event.type == "tool_call"] | |
| 53 | + tool_results = [event for event in run.events if event.type == "tool_result"] | |
| 54 | + | |
| 55 | + assert "We'll plan first." in run.response | |
| 56 | + assert [event.tool_name for event in tool_events] == ["AskUserQuestion"] | |
| 57 | + assert any("Plan first" in event.content for event in tool_results) | |
tests/test_workflow_tools.pyadded@@ -0,0 +1,117 @@ | ||
| 1 | +"""Tests for workflow-oriented tools introduced in Sprint 04.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import json | |
| 6 | +from pathlib import Path | |
| 7 | + | |
| 8 | +import pytest | |
| 9 | + | |
| 10 | +from loader.tools.workflow_tools import AskUserQuestionTool, TodoWriteTool | |
| 11 | + | |
| 12 | + | |
| 13 | +@pytest.mark.asyncio | |
| 14 | +async def test_todo_write_persists_and_returns_previous_state(tmp_path: Path) -> None: | |
| 15 | + tool = TodoWriteTool(tmp_path) | |
| 16 | + | |
| 17 | + first = await tool.execute( | |
| 18 | + todos=[ | |
| 19 | + { | |
| 20 | + "content": "Create runtime router", | |
| 21 | + "active_form": "Creating runtime router", | |
| 22 | + "status": "in_progress", | |
| 23 | + } | |
| 24 | + ] | |
| 25 | + ) | |
| 26 | + second = await tool.execute( | |
| 27 | + todos=[ | |
| 28 | + { | |
| 29 | + "content": "Create runtime router", | |
| 30 | + "active_form": "Creating runtime router", | |
| 31 | + "status": "completed", | |
| 32 | + } | |
| 33 | + ] | |
| 34 | + ) | |
| 35 | + | |
| 36 | + first_payload = json.loads(first.output) | |
| 37 | + second_payload = json.loads(second.output) | |
| 38 | + store_path = tmp_path / ".loader" / "todos" / "active.json" | |
| 39 | + | |
| 40 | + assert first.is_error is False | |
| 41 | + assert first_payload["old_todos"] == [] | |
| 42 | + assert second_payload["old_todos"] == first_payload["new_todos"] | |
| 43 | + assert json.loads(store_path.read_text()) == [] | |
| 44 | + | |
| 45 | + | |
| 46 | +@pytest.mark.asyncio | |
| 47 | +async def test_todo_write_rejects_invalid_payloads_and_sets_verification_nudge( | |
| 48 | + tmp_path: Path, | |
| 49 | +) -> None: | |
| 50 | + tool = TodoWriteTool(tmp_path) | |
| 51 | + | |
| 52 | + empty = await tool.execute(todos=[]) | |
| 53 | + blank = await tool.execute( | |
| 54 | + todos=[ | |
| 55 | + { | |
| 56 | + "content": " ", | |
| 57 | + "active_form": "Reviewing plan", | |
| 58 | + "status": "pending", | |
| 59 | + } | |
| 60 | + ] | |
| 61 | + ) | |
| 62 | + nudged = await tool.execute( | |
| 63 | + todos=[ | |
| 64 | + { | |
| 65 | + "content": "Implement router", | |
| 66 | + "active_form": "Implementing router", | |
| 67 | + "status": "completed", | |
| 68 | + }, | |
| 69 | + { | |
| 70 | + "content": "Write tests", | |
| 71 | + "active_form": "Writing tests", | |
| 72 | + "status": "completed", | |
| 73 | + }, | |
| 74 | + { | |
| 75 | + "content": "Update docs", | |
| 76 | + "active_form": "Updating docs", | |
| 77 | + "status": "completed", | |
| 78 | + }, | |
| 79 | + ] | |
| 80 | + ) | |
| 81 | + | |
| 82 | + assert empty.is_error is True | |
| 83 | + assert "todos must not be empty" in empty.output | |
| 84 | + assert blank.is_error is True | |
| 85 | + assert "todo content must not be empty" in blank.output | |
| 86 | + assert json.loads(nudged.output)["verification_nudge_needed"] is True | |
| 87 | + | |
| 88 | + | |
| 89 | +@pytest.mark.asyncio | |
| 90 | +async def test_ask_user_question_uses_callback_and_resolves_numbered_options() -> None: | |
| 91 | + tool = AskUserQuestionTool() | |
| 92 | + | |
| 93 | + async def answer(question: str, options: list[str] | None) -> str: | |
| 94 | + assert "Which path" in question | |
| 95 | + assert options == ["Plan first", "Execute now"] | |
| 96 | + return "2" | |
| 97 | + | |
| 98 | + result = await tool.execute( | |
| 99 | + question="Which path should we take?", | |
| 100 | + options=["Plan first", "Execute now"], | |
| 101 | + user_response_handler=answer, | |
| 102 | + ) | |
| 103 | + | |
| 104 | + payload = json.loads(result.output) | |
| 105 | + assert result.is_error is False | |
| 106 | + assert payload["answer"] == "Execute now" | |
| 107 | + assert payload["status"] == "answered" | |
| 108 | + | |
| 109 | + | |
| 110 | +@pytest.mark.asyncio | |
| 111 | +async def test_ask_user_question_requires_callback() -> None: | |
| 112 | + tool = AskUserQuestionTool() | |
| 113 | + | |
| 114 | + result = await tool.execute(question="Need an answer?") | |
| 115 | + | |
| 116 | + assert result.is_error is True | |
| 117 | + assert "user_response_handler" in result.output | |