`8713208`

Add workflow tools and question callbacks

Authored by

espadonne 1 month ago

SHA: 87132084a2a21e3b40c11ab3a77eb753b3e2af00
Parents: 581d54c
Tree: f3ff357

8 changed files

Status	File	+	-
M	`src/loader/agent/loop.py`	32	6
M	`src/loader/runtime/conversation.py`	3	0
M	`src/loader/runtime/executor.py`	20	2
M	`src/loader/tools/base.py`	3	0
A	`src/loader/tools/workflow_tools.py`	257	0
M	`tests/helpers/runtime_harness.py`	7	1
A	`tests/test_workflow_runtime_tools.py`	57	0
A	`tests/test_workflow_tools.py`	117	0

src/loader/agent/loop.pymodified

          user_message: str,
          on_event: Callable[[AgentEvent], None] | Callable[[AgentEvent], Awaitable[None]] | None = None,
          on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None = None,
 +        on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None = None,
          use_plan: bool | None = None,
      ) -> str:
          """Run the agent with a user message.
              user_message: The user's input
              on_event: Optional callback for streaming events (sync or async)
              on_confirmation: Optional callback for tool confirmation. Takes (tool_name, message, details) and returns True to confirm.
 +            on_user_question: Optional callback for AskUserQuestion. Takes (question, options) and returns the answer.
              use_plan: Force planning on/off. None = auto-detect.
          Returns:
          # Mark agent as running (enables steering)
          self._is_running = True
          try:
 -            return await self._run_with_steering(user_message, emit, on_confirmation, use_plan)
 +            return await self._run_with_steering(
 +                user_message,
 +                emit,
 +                on_confirmation,
 +                on_user_question,
 +                use_plan,
 +            )
          finally:
              self._is_running = False
          user_message: str,
          emit: Callable[[AgentEvent], Awaitable[None]],
          on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None,
 +        on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None,
          use_plan: bool | None,
      ) -> str:
          """Internal run method that supports steering."""
                                  f"Verification: {subtask.verification}",
                      ))
                      subtask_response = await self._run_inner(
 -                        subtask.description, emit, on_confirmation,
 +                        subtask.description,
 +                        emit,
 +                        on_confirmation,
 +                        on_user_question=on_user_question,
                          original_task=self._current_task,
+                     )
+                     )
                      self.messages.append(Message(role=Role.USER, content=summary_prompt))
                      return await self._run_inner(
 -                        summary_prompt, emit, on_confirmation,
 +                        summary_prompt,
 +                        emit,
 +                        on_confirmation,
 +                        on_user_question=on_user_question,
                          original_task=self._current_task,
+                     )
                  else:
                      # Run the step
                      step_prompt = format_step_prompt(plan, step)
                      await self._run_inner(
 -                        step_prompt, emit, on_confirmation,
 +                        step_prompt,
 +                        emit,
 +                        on_confirmation,
 +                        on_user_question=on_user_question,
                          original_task=self._current_task,
+                     )
                  self.messages.append(Message(role=Role.USER, content=user_message))
                  summary_prompt = f"I've completed the plan. Summarize what was done:\n{plan.to_prompt()}"
                  return await self._run_inner(
 -                    summary_prompt, emit, on_confirmation,
 +                    summary_prompt,
 +                    emit,
 +                    on_confirmation,
 +                    on_user_question=on_user_question,
                      original_task=self._current_task,
+                 )
          # No planning or decomposition - run directly
          self.messages.append(Message(role=Role.USER, content=user_message))
          return await self._run_inner(
 -            user_message, emit, on_confirmation,
 +            user_message,
 +            emit,
 +            on_confirmation,
 +            on_user_question=on_user_question,
              original_task=self._current_task,
+         )
          task: str,
          emit: Callable[[AgentEvent], Awaitable[None]],
          on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None = None,
 +        on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None = None,
          original_task: str | None = None,
      ) -> str:
          """Inner execution loop without planning."""
              task,
              emit,
              on_confirmation=on_confirmation,
 +            on_user_question=on_user_question,
              original_task=original_task,
+         )
          return self.last_turn_summary.final_response

src/loader/runtime/conversation.pymodified

  EventSink = Callable[[AgentEvent], Awaitable[None]]
  ConfirmationHandler = Callable[[str, str, str], Awaitable[bool]] | None
 +UserQuestionHandler = Callable[[str, list[str] | None], Awaitable[str]] | None
  @dataclass
          task: str,
          emit: EventSink,
          on_confirmation: ConfirmationHandler = None,
 +        on_user_question: UserQuestionHandler = None,
          original_task: str | None = None,
      ) -> TurnSummary:
          """Run one task turn and return a structured summary."""
                      outcome = await self.executor.execute_tool_call(
                          tool_call,
                          on_confirmation=on_confirmation,
 +                        on_user_question=on_user_question,
                          emit_confirmation=self._emit_confirmation(emit),
                          source=tool_source,
+                     )

src/loader/runtime/executor.pymodified

  from ..llm.base import Message, ToolCall
  from ..tools.base import ConfirmationRequired, ToolRegistry
  from ..tools.base import ToolResult as RegistryToolResult
 +from ..tools.workflow_tools import UserQuestionHandler
  from .hooks import HookContext, HookDecision, HookManager
  from .permissions import PermissionDecision, PermissionMode, PermissionPolicy
  from .tracing import RuntimeTracer
          tool_call: ToolCall,
          *,
          on_confirmation: BrowserConfirmation = None,
 +        on_user_question: UserQuestionHandler | None = None,
          emit_confirmation: ConfirmationEmitter = None,
          source: str,
          skip_duplicate_check: bool = False,
          result = await self._execute_registry(
              tool_call,
              on_confirmation,
 +            on_user_question,
              emit_confirmation,
              skip_confirmation=skip_confirmation,
+         )
          self,
          tool_call: ToolCall,
          on_confirmation: BrowserConfirmation,
 +        on_user_question: UserQuestionHandler | None,
          emit_confirmation: ConfirmationEmitter,
          *,
          skip_confirmation: bool = False,
          if skip_confirmation:
              self.registry.skip_confirmation = True
          try:
 -            return await self.registry.execute(tool_call.name, **tool_call.arguments)
 +            extra_kwargs: dict[str, Any] = {}
 +            if tool_call.name == "AskUserQuestion":
 +                extra_kwargs["user_response_handler"] = on_user_question
 +            return await self.registry.execute(
 +                tool_call.name,
 +                **tool_call.arguments,
 +                **extra_kwargs,
 +            )
          except ConfirmationRequired as confirmation:
              self.tracer.record(
                  "tool.confirmation_requested",
              self.registry.skip_confirmation = True
              try:
 -                return await self.registry.execute(tool_call.name, **tool_call.arguments)
 +                extra_kwargs: dict[str, Any] = {}
 +                if tool_call.name == "AskUserQuestion":
 +                    extra_kwargs["user_response_handler"] = on_user_question
 +                return await self.registry.execute(
 +                    tool_call.name,
 +                    **tool_call.arguments,
 +                    **extra_kwargs,
 +                )
              finally:
                  self.registry.skip_confirmation = previous_skip
          finally:

src/loader/tools/base.pymodified

      from .file_tools import EditTool, GlobTool, ReadTool, WriteTool
      from .search_tools import GrepTool
      from .shell_tools import BashTool
 +    from .workflow_tools import AskUserQuestionTool, TodoWriteTool
      registry = ToolRegistry(workspace_root=workspace_root)
      registry.register(ReadTool())
      registry.register(GlobTool())
      registry.register(BashTool())
      registry.register(GrepTool())
 +    registry.register(TodoWriteTool())
 +    registry.register(AskUserQuestionTool())
      return registry

src/loader/tools/workflow_tools.pyadded

 +"""Workflow-oriented tools for task tracking and user clarification."""
++
 +from __future__ import annotations
++
 +import asyncio
 +import json
 +from collections.abc import Awaitable, Callable
 +from dataclasses import dataclass
 +from pathlib import Path
 +from typing import Any
++
 +from ..runtime.permissions import PermissionMode
 +from .base import Tool, ToolResult
++
 +UserQuestionHandler = Callable[[str, list[str] | None], Awaitable[str]]
++
 +TODO_STATUSES = {"pending", "in_progress", "completed"}
++
++
 +@dataclass(slots=True)
 +class TodoItem:
 +    """Structured todo item compatible with Loader workflow state."""
++
 +    content: str
 +    active_form: str
 +    status: str
++
 +    @classmethod
 +    def from_dict(cls, data: dict[str, Any]) -> TodoItem:
 +        active_form = str(
 +            data.get("active_form")
 +            or data.get("activeForm")
 +            or data.get("active")
 +            or ""
 +        ).strip()
 +        return cls(
 +            content=str(data.get("content", "")).strip(),
 +            active_form=active_form,
 +            status=str(data.get("status", "")).strip().lower(),
 +        )
++
 +    def to_dict(self) -> dict[str, str]:
 +        return {
 +            "content": self.content,
 +            "active_form": self.active_form,
 +            "status": self.status,
 +        }
++
++
 +class TodoWriteTool(Tool):
 +    """Persist the current task list under `.loader/`."""
++
 +    required_permission = PermissionMode.READ_ONLY
++
 +    def __init__(self, workspace_root: Path | str | None = None) -> None:
 +        self.workspace_root = (
 +            Path(workspace_root).expanduser().resolve() if workspace_root else None
 +        )
++
 +    @property
 +    def name(self) -> str:
 +        return "TodoWrite"
++
 +    def set_workspace_root(self, workspace_root: Path | None) -> None:
 +        self.workspace_root = workspace_root
++
 +    @property
 +    def description(self) -> str:
 +        return (
 +            "Persist the current task list under .loader/todos/. "
 +            "Use it to track pending, in-progress, and completed work items."
 +        )
++
 +    @property
 +    def parameters(self) -> dict[str, Any]:
 +        return {
 +            "type": "object",
 +            "properties": {
 +                "todos": {
 +                    "type": "array",
 +                    "description": "Current task list for the active workflow.",
 +                    "items": {
 +                        "type": "object",
 +                        "properties": {
 +                            "content": {
 +                                "type": "string",
 +                                "description": "Short task description in base form.",
 +                            },
 +                            "active_form": {
 +                                "type": "string",
 +                                "description": "Progressive-tense form, e.g. 'Running tests'.",
 +                            },
 +                            "status": {
 +                                "type": "string",
 +                                "enum": ["pending", "in_progress", "completed"],
 +                                "description": "Current todo status.",
 +                            },
 +                        },
 +                        "required": ["content", "active_form", "status"],
 +                    },
 +                }
 +            },
 +            "required": ["todos"],
 +        }
++
 +    async def execute(
 +        self,
 +        todos: list[dict[str, Any]],
 +        **kwargs: Any,
 +    ) -> ToolResult:
 +        try:
 +            items = [TodoItem.from_dict(todo) for todo in todos]
 +            self._validate_items(items)
 +        except ValueError as exc:
 +            return ToolResult(str(exc), is_error=True)
++
 +        store_path = self._store_path()
 +        old_todos = await asyncio.to_thread(self._read_existing_items, store_path)
++
 +        all_done = all(item.status == "completed" for item in items)
 +        persisted_items = [] if all_done else [item.to_dict() for item in items]
++
 +        store_path.parent.mkdir(parents=True, exist_ok=True)
 +        await asyncio.to_thread(
 +            store_path.write_text,
 +            json.dumps(persisted_items, indent=2, sort_keys=True),
 +        )
++
 +        verification_nudge_needed = (
 +            all_done
 +            and len(items) >= 3
 +            and not any("verif" in item.content.lower() for item in items)
 +        )
++
 +        payload = {
 +            "old_todos": old_todos,
 +            "new_todos": [item.to_dict() for item in items],
 +            "verification_nudge_needed": verification_nudge_needed,
 +            "store_path": str(store_path),
 +        }
 +        return ToolResult(
 +            output=json.dumps(payload, indent=2, sort_keys=True),
 +            metadata=payload,
 +        )
++
 +    def _store_path(self) -> Path:
 +        root = self.workspace_root or Path.cwd()
 +        return root / ".loader" / "todos" / "active.json"
++
 +    def _read_existing_items(self, store_path: Path) -> list[dict[str, Any]]:
 +        if not store_path.exists():
 +            return []
 +        raw = json.loads(store_path.read_text())
 +        if not isinstance(raw, list):
 +            return []
 +        items: list[dict[str, Any]] = []
 +        for item in raw:
 +            if isinstance(item, dict):
 +                items.append(TodoItem.from_dict(item).to_dict())
 +        return items
++
 +    def _validate_items(self, items: list[TodoItem]) -> None:
 +        if not items:
 +            raise ValueError("todos must not be empty")
 +        for item in items:
 +            if not item.content:
 +                raise ValueError("todo content must not be empty")
 +            if not item.active_form:
 +                raise ValueError("todo active_form must not be empty")
 +            if item.status not in TODO_STATUSES:
 +                raise ValueError(
 +                    "todo status must be one of pending, in_progress, or completed"
 +                )
++
++
 +class AskUserQuestionTool(Tool):
 +    """Ask the user one structured question and capture the answer."""
++
 +    required_permission = PermissionMode.READ_ONLY
++
 +    @property
 +    def name(self) -> str:
 +        return "AskUserQuestion"
++
 +    @property
 +    def description(self) -> str:
 +        return "Ask the user one question and wait for their response."
++
 +    @property
 +    def parameters(self) -> dict[str, Any]:
 +        return {
 +            "type": "object",
 +            "properties": {
 +                "question": {
 +                    "type": "string",
 +                    "description": "The exact question to present to the user.",
 +                },
 +                "options": {
 +                    "type": "array",
 +                    "description": "Optional short answer choices.",
 +                    "items": {"type": "string"},
 +                },
 +            },
 +            "required": ["question"],
 +        }
++
 +    async def execute(
 +        self,
 +        question: str,
 +        options: list[str] | None = None,
 +        user_response_handler: UserQuestionHandler | None = None,
 +        **kwargs: Any,
 +    ) -> ToolResult:
 +        normalized_question = question.strip()
 +        normalized_options = [
 +            str(option).strip()
 +            for option in (options or [])
 +            if str(option).strip()
 +        ]
++
 +        if not normalized_question:
 +            return ToolResult("question must not be empty", is_error=True)
 +        if user_response_handler is None:
 +            return ToolResult(
 +                "AskUserQuestion requires a user_response_handler callback",
 +                is_error=True,
 +            )
++
 +        answer = (
 +            await user_response_handler(
 +                normalized_question,
 +                normalized_options or None,
 +            )
 +        ).strip()
 +        resolved_answer = self._resolve_answer(answer, normalized_options or None)
 +        payload = {
 +            "question": normalized_question,
 +            "options": normalized_options or None,
 +            "answer": resolved_answer,
 +            "status": "answered",
 +        }
 +        return ToolResult(
 +            output=json.dumps(payload, indent=2, sort_keys=True),
 +            metadata=payload,
 +        )
++
 +    @staticmethod
 +    def _resolve_answer(answer: str, options: list[str] | None) -> str:
 +        if not options:
 +            return answer
 +        try:
 +            index = int(answer) - 1
 +        except ValueError:
 +            return answer
 +        if 0 <= index < len(options):
 +            return options[index]
 +        return answer

tests/helpers/runtime_harness.pymodified

      config: AgentConfig | None = None,
      project_root: Path | str | None = None,
      on_confirmation=None,
 +    on_user_question=None,
  ) -> ScenarioRun:
      """Run a scripted agent scenario and collect emitted events."""
      async def capture(event: AgentEvent) -> None:
          events.append(event)
 -    response = await agent.run(prompt, on_event=capture, on_confirmation=on_confirmation)
 +    response = await agent.run(
 +        prompt,
 +        on_event=capture,
 +        on_confirmation=on_confirmation,
 +        on_user_question=on_user_question,
 +    )
      return ScenarioRun(
          response=response,
          events=events,

tests/test_workflow_runtime_tools.pyadded

 +"""Runtime coverage for Sprint 04 workflow tools."""
++
 +from __future__ import annotations
++
 +import pytest
++
 +from loader.agent.loop import AgentConfig
 +from loader.llm.base import CompletionResponse, ToolCall
 +from tests.helpers.runtime_harness import ScriptedBackend, run_scenario
++
++
 +def non_streaming_config() -> AgentConfig:
 +    """Shared deterministic config for runtime tool tests."""
++
 +    return AgentConfig(auto_context=False, stream=False, max_iterations=4)
++
++
 +async def _answer(question: str, options: list[str] | None) -> str:
 +    assert "Which path" in question
 +    assert options == ["Plan first", "Execute now"]
 +    return "1"
++
++
 +@pytest.mark.asyncio
 +async def test_ask_user_question_round_trips_through_runtime() -> None:
 +    backend = ScriptedBackend(
 +        completions=[
 +            CompletionResponse(
 +                content="I need one clarification.",
 +                tool_calls=[
 +                    ToolCall(
 +                        id="ask-1",
 +                        name="AskUserQuestion",
 +                        arguments={
 +                            "question": "Which path should we take?",
 +                            "options": ["Plan first", "Execute now"],
 +                        },
 +                    )
 +                ],
 +            ),
 +            CompletionResponse(content="We'll plan first."),
 +        ]
 +    )
++
 +    run = await run_scenario(
 +        "Implement the task, but ask me which path to take first.",
 +        backend,
 +        config=non_streaming_config(),
 +        on_user_question=_answer,
 +    )
++
 +    tool_events = [event for event in run.events if event.type == "tool_call"]
 +    tool_results = [event for event in run.events if event.type == "tool_result"]
++
 +    assert "We'll plan first." in run.response
 +    assert [event.tool_name for event in tool_events] == ["AskUserQuestion"]
 +    assert any("Plan first" in event.content for event in tool_results)

tests/test_workflow_tools.pyadded

 +"""Tests for workflow-oriented tools introduced in Sprint 04."""
++
 +from __future__ import annotations
++
 +import json
 +from pathlib import Path
++
 +import pytest
++
 +from loader.tools.workflow_tools import AskUserQuestionTool, TodoWriteTool
++
++
 +@pytest.mark.asyncio
 +async def test_todo_write_persists_and_returns_previous_state(tmp_path: Path) -> None:
 +    tool = TodoWriteTool(tmp_path)
++
 +    first = await tool.execute(
 +        todos=[
 +            {
 +                "content": "Create runtime router",
 +                "active_form": "Creating runtime router",
 +                "status": "in_progress",
 +            }
 +        ]
 +    )
 +    second = await tool.execute(
 +        todos=[
 +            {
 +                "content": "Create runtime router",
 +                "active_form": "Creating runtime router",
 +                "status": "completed",
 +            }
 +        ]
 +    )
++
 +    first_payload = json.loads(first.output)
 +    second_payload = json.loads(second.output)
 +    store_path = tmp_path / ".loader" / "todos" / "active.json"
++
 +    assert first.is_error is False
 +    assert first_payload["old_todos"] == []
 +    assert second_payload["old_todos"] == first_payload["new_todos"]
 +    assert json.loads(store_path.read_text()) == []
++
++
 +@pytest.mark.asyncio
 +async def test_todo_write_rejects_invalid_payloads_and_sets_verification_nudge(
 +    tmp_path: Path,
 +) -> None:
 +    tool = TodoWriteTool(tmp_path)
++
 +    empty = await tool.execute(todos=[])
 +    blank = await tool.execute(
 +        todos=[
 +            {
 +                "content": "  ",
 +                "active_form": "Reviewing plan",
 +                "status": "pending",
 +            }
 +        ]
 +    )
 +    nudged = await tool.execute(
 +        todos=[
 +            {
 +                "content": "Implement router",
 +                "active_form": "Implementing router",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Write tests",
 +                "active_form": "Writing tests",
 +                "status": "completed",
 +            },
 +            {
 +                "content": "Update docs",
 +                "active_form": "Updating docs",
 +                "status": "completed",
 +            },
 +        ]
 +    )
++
 +    assert empty.is_error is True
 +    assert "todos must not be empty" in empty.output
 +    assert blank.is_error is True
 +    assert "todo content must not be empty" in blank.output
 +    assert json.loads(nudged.output)["verification_nudge_needed"] is True
++
++
 +@pytest.mark.asyncio
 +async def test_ask_user_question_uses_callback_and_resolves_numbered_options() -> None:
 +    tool = AskUserQuestionTool()
++
 +    async def answer(question: str, options: list[str] | None) -> str:
 +        assert "Which path" in question
 +        assert options == ["Plan first", "Execute now"]
 +        return "2"
++
 +    result = await tool.execute(
 +        question="Which path should we take?",
 +        options=["Plan first", "Execute now"],
 +        user_response_handler=answer,
 +    )
++
 +    payload = json.loads(result.output)
 +    assert result.is_error is False
 +    assert payload["answer"] == "Execute now"
 +    assert payload["status"] == "answered"
++
++
 +@pytest.mark.asyncio
 +async def test_ask_user_question_requires_callback() -> None:
 +    tool = AskUserQuestionTool()
++
 +    result = await tool.execute(question="Need an answer?")
++
 +    assert result.is_error is True
 +    assert "user_response_handler" in result.output