tenseleyflow/loader / 8713208

Browse files

Add workflow tools and question callbacks

Authored by espadonne
SHA
87132084a2a21e3b40c11ab3a77eb753b3e2af00
Parents
581d54c
Tree
f3ff357

8 changed files

StatusFile+-
M src/loader/agent/loop.py 32 6
M src/loader/runtime/conversation.py 3 0
M src/loader/runtime/executor.py 20 2
M src/loader/tools/base.py 3 0
A src/loader/tools/workflow_tools.py 257 0
M tests/helpers/runtime_harness.py 7 1
A tests/test_workflow_runtime_tools.py 57 0
A tests/test_workflow_tools.py 117 0
src/loader/agent/loop.pymodified
@@ -426,6 +426,7 @@ class Agent:
426426
         user_message: str,
427427
         on_event: Callable[[AgentEvent], None] | Callable[[AgentEvent], Awaitable[None]] | None = None,
428428
         on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None = None,
429
+        on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None = None,
429430
         use_plan: bool | None = None,
430431
     ) -> str:
431432
         """Run the agent with a user message.
@@ -434,6 +435,7 @@ class Agent:
434435
             user_message: The user's input
435436
             on_event: Optional callback for streaming events (sync or async)
436437
             on_confirmation: Optional callback for tool confirmation. Takes (tool_name, message, details) and returns True to confirm.
438
+            on_user_question: Optional callback for AskUserQuestion. Takes (question, options) and returns the answer.
437439
             use_plan: Force planning on/off. None = auto-detect.
438440
 
439441
         Returns:
@@ -451,7 +453,13 @@ class Agent:
451453
         # Mark agent as running (enables steering)
452454
         self._is_running = True
453455
         try:
454
-            return await self._run_with_steering(user_message, emit, on_confirmation, use_plan)
456
+            return await self._run_with_steering(
457
+                user_message,
458
+                emit,
459
+                on_confirmation,
460
+                on_user_question,
461
+                use_plan,
462
+            )
455463
         finally:
456464
             self._is_running = False
457465
 
@@ -460,6 +468,7 @@ class Agent:
460468
         user_message: str,
461469
         emit: Callable[[AgentEvent], Awaitable[None]],
462470
         on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None,
471
+        on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None,
463472
         use_plan: bool | None,
464473
     ) -> str:
465474
         """Internal run method that supports steering."""
@@ -506,7 +515,10 @@ class Agent:
506515
                                 f"Verification: {subtask.verification}",
507516
                     ))
508517
                     subtask_response = await self._run_inner(
509
-                        subtask.description, emit, on_confirmation,
518
+                        subtask.description,
519
+                        emit,
520
+                        on_confirmation,
521
+                        on_user_question=on_user_question,
510522
                         original_task=self._current_task,
511523
                     )
512524
 
@@ -532,7 +544,10 @@ class Agent:
532544
                     )
533545
                     self.messages.append(Message(role=Role.USER, content=summary_prompt))
534546
                     return await self._run_inner(
535
-                        summary_prompt, emit, on_confirmation,
547
+                        summary_prompt,
548
+                        emit,
549
+                        on_confirmation,
550
+                        on_user_question=on_user_question,
536551
                         original_task=self._current_task,
537552
                     )
538553
                 else:
@@ -564,7 +579,10 @@ class Agent:
564579
                     # Run the step
565580
                     step_prompt = format_step_prompt(plan, step)
566581
                     await self._run_inner(
567
-                        step_prompt, emit, on_confirmation,
582
+                        step_prompt,
583
+                        emit,
584
+                        on_confirmation,
585
+                        on_user_question=on_user_question,
568586
                         original_task=self._current_task,
569587
                     )
570588
 
@@ -574,14 +592,20 @@ class Agent:
574592
                 self.messages.append(Message(role=Role.USER, content=user_message))
575593
                 summary_prompt = f"I've completed the plan. Summarize what was done:\n{plan.to_prompt()}"
576594
                 return await self._run_inner(
577
-                    summary_prompt, emit, on_confirmation,
595
+                    summary_prompt,
596
+                    emit,
597
+                    on_confirmation,
598
+                    on_user_question=on_user_question,
578599
                     original_task=self._current_task,
579600
                 )
580601
 
581602
         # No planning or decomposition - run directly
582603
         self.messages.append(Message(role=Role.USER, content=user_message))
583604
         return await self._run_inner(
584
-            user_message, emit, on_confirmation,
605
+            user_message,
606
+            emit,
607
+            on_confirmation,
608
+            on_user_question=on_user_question,
585609
             original_task=self._current_task,
586610
         )
587611
 
@@ -590,6 +614,7 @@ class Agent:
590614
         task: str,
591615
         emit: Callable[[AgentEvent], Awaitable[None]],
592616
         on_confirmation: Callable[[str, str, str], Awaitable[bool]] | None = None,
617
+        on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None = None,
593618
         original_task: str | None = None,
594619
     ) -> str:
595620
         """Inner execution loop without planning."""
@@ -599,6 +624,7 @@ class Agent:
599624
             task,
600625
             emit,
601626
             on_confirmation=on_confirmation,
627
+            on_user_question=on_user_question,
602628
             original_task=original_task,
603629
         )
604630
         return self.last_turn_summary.final_response
src/loader/runtime/conversation.pymodified
@@ -33,6 +33,7 @@ from .tracing import RuntimeTracer
3333
 
3434
 EventSink = Callable[[AgentEvent], Awaitable[None]]
3535
 ConfirmationHandler = Callable[[str, str, str], Awaitable[bool]] | None
36
+UserQuestionHandler = Callable[[str, list[str] | None], Awaitable[str]] | None
3637
 
3738
 
3839
 @dataclass
@@ -68,6 +69,7 @@ class ConversationRuntime:
6869
         task: str,
6970
         emit: EventSink,
7071
         on_confirmation: ConfirmationHandler = None,
72
+        on_user_question: UserQuestionHandler = None,
7173
         original_task: str | None = None,
7274
     ) -> TurnSummary:
7375
         """Run one task turn and return a structured summary."""
@@ -281,6 +283,7 @@ class ConversationRuntime:
281283
                     outcome = await self.executor.execute_tool_call(
282284
                         tool_call,
283285
                         on_confirmation=on_confirmation,
286
+                        on_user_question=on_user_question,
284287
                         emit_confirmation=self._emit_confirmation(emit),
285288
                         source=tool_source,
286289
                     )
src/loader/runtime/executor.pymodified
@@ -12,6 +12,7 @@ from ..agent.recovery import ErrorCategory, categorize_error
1212
 from ..llm.base import Message, ToolCall
1313
 from ..tools.base import ConfirmationRequired, ToolRegistry
1414
 from ..tools.base import ToolResult as RegistryToolResult
15
+from ..tools.workflow_tools import UserQuestionHandler
1516
 from .hooks import HookContext, HookDecision, HookManager
1617
 from .permissions import PermissionDecision, PermissionMode, PermissionPolicy
1718
 from .tracing import RuntimeTracer
@@ -66,6 +67,7 @@ class ToolExecutor:
6667
         tool_call: ToolCall,
6768
         *,
6869
         on_confirmation: BrowserConfirmation = None,
70
+        on_user_question: UserQuestionHandler | None = None,
6971
         emit_confirmation: ConfirmationEmitter = None,
7072
         source: str,
7173
         skip_duplicate_check: bool = False,
@@ -249,6 +251,7 @@ class ToolExecutor:
249251
         result = await self._execute_registry(
250252
             tool_call,
251253
             on_confirmation,
254
+            on_user_question,
252255
             emit_confirmation,
253256
             skip_confirmation=skip_confirmation,
254257
         )
@@ -354,6 +357,7 @@ class ToolExecutor:
354357
         self,
355358
         tool_call: ToolCall,
356359
         on_confirmation: BrowserConfirmation,
360
+        on_user_question: UserQuestionHandler | None,
357361
         emit_confirmation: ConfirmationEmitter,
358362
         *,
359363
         skip_confirmation: bool = False,
@@ -362,7 +366,14 @@ class ToolExecutor:
362366
         if skip_confirmation:
363367
             self.registry.skip_confirmation = True
364368
         try:
365
-            return await self.registry.execute(tool_call.name, **tool_call.arguments)
369
+            extra_kwargs: dict[str, Any] = {}
370
+            if tool_call.name == "AskUserQuestion":
371
+                extra_kwargs["user_response_handler"] = on_user_question
372
+            return await self.registry.execute(
373
+                tool_call.name,
374
+                **tool_call.arguments,
375
+                **extra_kwargs,
376
+            )
366377
         except ConfirmationRequired as confirmation:
367378
             self.tracer.record(
368379
                 "tool.confirmation_requested",
@@ -392,7 +403,14 @@ class ToolExecutor:
392403
 
393404
             self.registry.skip_confirmation = True
394405
             try:
395
-                return await self.registry.execute(tool_call.name, **tool_call.arguments)
406
+                extra_kwargs: dict[str, Any] = {}
407
+                if tool_call.name == "AskUserQuestion":
408
+                    extra_kwargs["user_response_handler"] = on_user_question
409
+                return await self.registry.execute(
410
+                    tool_call.name,
411
+                    **tool_call.arguments,
412
+                    **extra_kwargs,
413
+                )
396414
             finally:
397415
                 self.registry.skip_confirmation = previous_skip
398416
         finally:
src/loader/tools/base.pymodified
@@ -181,6 +181,7 @@ def create_default_registry(
181181
     from .file_tools import EditTool, GlobTool, ReadTool, WriteTool
182182
     from .search_tools import GrepTool
183183
     from .shell_tools import BashTool
184
+    from .workflow_tools import AskUserQuestionTool, TodoWriteTool
184185
 
185186
     registry = ToolRegistry(workspace_root=workspace_root)
186187
     registry.register(ReadTool())
@@ -189,5 +190,7 @@ def create_default_registry(
189190
     registry.register(GlobTool())
190191
     registry.register(BashTool())
191192
     registry.register(GrepTool())
193
+    registry.register(TodoWriteTool())
194
+    registry.register(AskUserQuestionTool())
192195
 
193196
     return registry
src/loader/tools/workflow_tools.pyadded
@@ -0,0 +1,257 @@
1
+"""Workflow-oriented tools for task tracking and user clarification."""
2
+
3
+from __future__ import annotations
4
+
5
+import asyncio
6
+import json
7
+from collections.abc import Awaitable, Callable
8
+from dataclasses import dataclass
9
+from pathlib import Path
10
+from typing import Any
11
+
12
+from ..runtime.permissions import PermissionMode
13
+from .base import Tool, ToolResult
14
+
15
+UserQuestionHandler = Callable[[str, list[str] | None], Awaitable[str]]
16
+
17
+TODO_STATUSES = {"pending", "in_progress", "completed"}
18
+
19
+
20
+@dataclass(slots=True)
21
+class TodoItem:
22
+    """Structured todo item compatible with Loader workflow state."""
23
+
24
+    content: str
25
+    active_form: str
26
+    status: str
27
+
28
+    @classmethod
29
+    def from_dict(cls, data: dict[str, Any]) -> TodoItem:
30
+        active_form = str(
31
+            data.get("active_form")
32
+            or data.get("activeForm")
33
+            or data.get("active")
34
+            or ""
35
+        ).strip()
36
+        return cls(
37
+            content=str(data.get("content", "")).strip(),
38
+            active_form=active_form,
39
+            status=str(data.get("status", "")).strip().lower(),
40
+        )
41
+
42
+    def to_dict(self) -> dict[str, str]:
43
+        return {
44
+            "content": self.content,
45
+            "active_form": self.active_form,
46
+            "status": self.status,
47
+        }
48
+
49
+
50
+class TodoWriteTool(Tool):
51
+    """Persist the current task list under `.loader/`."""
52
+
53
+    required_permission = PermissionMode.READ_ONLY
54
+
55
+    def __init__(self, workspace_root: Path | str | None = None) -> None:
56
+        self.workspace_root = (
57
+            Path(workspace_root).expanduser().resolve() if workspace_root else None
58
+        )
59
+
60
+    @property
61
+    def name(self) -> str:
62
+        return "TodoWrite"
63
+
64
+    def set_workspace_root(self, workspace_root: Path | None) -> None:
65
+        self.workspace_root = workspace_root
66
+
67
+    @property
68
+    def description(self) -> str:
69
+        return (
70
+            "Persist the current task list under .loader/todos/. "
71
+            "Use it to track pending, in-progress, and completed work items."
72
+        )
73
+
74
+    @property
75
+    def parameters(self) -> dict[str, Any]:
76
+        return {
77
+            "type": "object",
78
+            "properties": {
79
+                "todos": {
80
+                    "type": "array",
81
+                    "description": "Current task list for the active workflow.",
82
+                    "items": {
83
+                        "type": "object",
84
+                        "properties": {
85
+                            "content": {
86
+                                "type": "string",
87
+                                "description": "Short task description in base form.",
88
+                            },
89
+                            "active_form": {
90
+                                "type": "string",
91
+                                "description": "Progressive-tense form, e.g. 'Running tests'.",
92
+                            },
93
+                            "status": {
94
+                                "type": "string",
95
+                                "enum": ["pending", "in_progress", "completed"],
96
+                                "description": "Current todo status.",
97
+                            },
98
+                        },
99
+                        "required": ["content", "active_form", "status"],
100
+                    },
101
+                }
102
+            },
103
+            "required": ["todos"],
104
+        }
105
+
106
+    async def execute(
107
+        self,
108
+        todos: list[dict[str, Any]],
109
+        **kwargs: Any,
110
+    ) -> ToolResult:
111
+        try:
112
+            items = [TodoItem.from_dict(todo) for todo in todos]
113
+            self._validate_items(items)
114
+        except ValueError as exc:
115
+            return ToolResult(str(exc), is_error=True)
116
+
117
+        store_path = self._store_path()
118
+        old_todos = await asyncio.to_thread(self._read_existing_items, store_path)
119
+
120
+        all_done = all(item.status == "completed" for item in items)
121
+        persisted_items = [] if all_done else [item.to_dict() for item in items]
122
+
123
+        store_path.parent.mkdir(parents=True, exist_ok=True)
124
+        await asyncio.to_thread(
125
+            store_path.write_text,
126
+            json.dumps(persisted_items, indent=2, sort_keys=True),
127
+        )
128
+
129
+        verification_nudge_needed = (
130
+            all_done
131
+            and len(items) >= 3
132
+            and not any("verif" in item.content.lower() for item in items)
133
+        )
134
+
135
+        payload = {
136
+            "old_todos": old_todos,
137
+            "new_todos": [item.to_dict() for item in items],
138
+            "verification_nudge_needed": verification_nudge_needed,
139
+            "store_path": str(store_path),
140
+        }
141
+        return ToolResult(
142
+            output=json.dumps(payload, indent=2, sort_keys=True),
143
+            metadata=payload,
144
+        )
145
+
146
+    def _store_path(self) -> Path:
147
+        root = self.workspace_root or Path.cwd()
148
+        return root / ".loader" / "todos" / "active.json"
149
+
150
+    def _read_existing_items(self, store_path: Path) -> list[dict[str, Any]]:
151
+        if not store_path.exists():
152
+            return []
153
+        raw = json.loads(store_path.read_text())
154
+        if not isinstance(raw, list):
155
+            return []
156
+        items: list[dict[str, Any]] = []
157
+        for item in raw:
158
+            if isinstance(item, dict):
159
+                items.append(TodoItem.from_dict(item).to_dict())
160
+        return items
161
+
162
+    def _validate_items(self, items: list[TodoItem]) -> None:
163
+        if not items:
164
+            raise ValueError("todos must not be empty")
165
+        for item in items:
166
+            if not item.content:
167
+                raise ValueError("todo content must not be empty")
168
+            if not item.active_form:
169
+                raise ValueError("todo active_form must not be empty")
170
+            if item.status not in TODO_STATUSES:
171
+                raise ValueError(
172
+                    "todo status must be one of pending, in_progress, or completed"
173
+                )
174
+
175
+
176
+class AskUserQuestionTool(Tool):
177
+    """Ask the user one structured question and capture the answer."""
178
+
179
+    required_permission = PermissionMode.READ_ONLY
180
+
181
+    @property
182
+    def name(self) -> str:
183
+        return "AskUserQuestion"
184
+
185
+    @property
186
+    def description(self) -> str:
187
+        return "Ask the user one question and wait for their response."
188
+
189
+    @property
190
+    def parameters(self) -> dict[str, Any]:
191
+        return {
192
+            "type": "object",
193
+            "properties": {
194
+                "question": {
195
+                    "type": "string",
196
+                    "description": "The exact question to present to the user.",
197
+                },
198
+                "options": {
199
+                    "type": "array",
200
+                    "description": "Optional short answer choices.",
201
+                    "items": {"type": "string"},
202
+                },
203
+            },
204
+            "required": ["question"],
205
+        }
206
+
207
+    async def execute(
208
+        self,
209
+        question: str,
210
+        options: list[str] | None = None,
211
+        user_response_handler: UserQuestionHandler | None = None,
212
+        **kwargs: Any,
213
+    ) -> ToolResult:
214
+        normalized_question = question.strip()
215
+        normalized_options = [
216
+            str(option).strip()
217
+            for option in (options or [])
218
+            if str(option).strip()
219
+        ]
220
+
221
+        if not normalized_question:
222
+            return ToolResult("question must not be empty", is_error=True)
223
+        if user_response_handler is None:
224
+            return ToolResult(
225
+                "AskUserQuestion requires a user_response_handler callback",
226
+                is_error=True,
227
+            )
228
+
229
+        answer = (
230
+            await user_response_handler(
231
+                normalized_question,
232
+                normalized_options or None,
233
+            )
234
+        ).strip()
235
+        resolved_answer = self._resolve_answer(answer, normalized_options or None)
236
+        payload = {
237
+            "question": normalized_question,
238
+            "options": normalized_options or None,
239
+            "answer": resolved_answer,
240
+            "status": "answered",
241
+        }
242
+        return ToolResult(
243
+            output=json.dumps(payload, indent=2, sort_keys=True),
244
+            metadata=payload,
245
+        )
246
+
247
+    @staticmethod
248
+    def _resolve_answer(answer: str, options: list[str] | None) -> str:
249
+        if not options:
250
+            return answer
251
+        try:
252
+            index = int(answer) - 1
253
+        except ValueError:
254
+            return answer
255
+        if 0 <= index < len(options):
256
+            return options[index]
257
+        return answer
tests/helpers/runtime_harness.pymodified
@@ -105,6 +105,7 @@ async def run_scenario(
105105
     config: AgentConfig | None = None,
106106
     project_root: Path | str | None = None,
107107
     on_confirmation=None,
108
+    on_user_question=None,
108109
 ) -> ScenarioRun:
109110
     """Run a scripted agent scenario and collect emitted events."""
110111
 
@@ -119,7 +120,12 @@ async def run_scenario(
119120
     async def capture(event: AgentEvent) -> None:
120121
         events.append(event)
121122
 
122
-    response = await agent.run(prompt, on_event=capture, on_confirmation=on_confirmation)
123
+    response = await agent.run(
124
+        prompt,
125
+        on_event=capture,
126
+        on_confirmation=on_confirmation,
127
+        on_user_question=on_user_question,
128
+    )
123129
     return ScenarioRun(
124130
         response=response,
125131
         events=events,
tests/test_workflow_runtime_tools.pyadded
@@ -0,0 +1,57 @@
1
+"""Runtime coverage for Sprint 04 workflow tools."""
2
+
3
+from __future__ import annotations
4
+
5
+import pytest
6
+
7
+from loader.agent.loop import AgentConfig
8
+from loader.llm.base import CompletionResponse, ToolCall
9
+from tests.helpers.runtime_harness import ScriptedBackend, run_scenario
10
+
11
+
12
+def non_streaming_config() -> AgentConfig:
13
+    """Shared deterministic config for runtime tool tests."""
14
+
15
+    return AgentConfig(auto_context=False, stream=False, max_iterations=4)
16
+
17
+
18
+async def _answer(question: str, options: list[str] | None) -> str:
19
+    assert "Which path" in question
20
+    assert options == ["Plan first", "Execute now"]
21
+    return "1"
22
+
23
+
24
+@pytest.mark.asyncio
25
+async def test_ask_user_question_round_trips_through_runtime() -> None:
26
+    backend = ScriptedBackend(
27
+        completions=[
28
+            CompletionResponse(
29
+                content="I need one clarification.",
30
+                tool_calls=[
31
+                    ToolCall(
32
+                        id="ask-1",
33
+                        name="AskUserQuestion",
34
+                        arguments={
35
+                            "question": "Which path should we take?",
36
+                            "options": ["Plan first", "Execute now"],
37
+                        },
38
+                    )
39
+                ],
40
+            ),
41
+            CompletionResponse(content="We'll plan first."),
42
+        ]
43
+    )
44
+
45
+    run = await run_scenario(
46
+        "Implement the task, but ask me which path to take first.",
47
+        backend,
48
+        config=non_streaming_config(),
49
+        on_user_question=_answer,
50
+    )
51
+
52
+    tool_events = [event for event in run.events if event.type == "tool_call"]
53
+    tool_results = [event for event in run.events if event.type == "tool_result"]
54
+
55
+    assert "We'll plan first." in run.response
56
+    assert [event.tool_name for event in tool_events] == ["AskUserQuestion"]
57
+    assert any("Plan first" in event.content for event in tool_results)
tests/test_workflow_tools.pyadded
@@ -0,0 +1,117 @@
1
+"""Tests for workflow-oriented tools introduced in Sprint 04."""
2
+
3
+from __future__ import annotations
4
+
5
+import json
6
+from pathlib import Path
7
+
8
+import pytest
9
+
10
+from loader.tools.workflow_tools import AskUserQuestionTool, TodoWriteTool
11
+
12
+
13
+@pytest.mark.asyncio
14
+async def test_todo_write_persists_and_returns_previous_state(tmp_path: Path) -> None:
15
+    tool = TodoWriteTool(tmp_path)
16
+
17
+    first = await tool.execute(
18
+        todos=[
19
+            {
20
+                "content": "Create runtime router",
21
+                "active_form": "Creating runtime router",
22
+                "status": "in_progress",
23
+            }
24
+        ]
25
+    )
26
+    second = await tool.execute(
27
+        todos=[
28
+            {
29
+                "content": "Create runtime router",
30
+                "active_form": "Creating runtime router",
31
+                "status": "completed",
32
+            }
33
+        ]
34
+    )
35
+
36
+    first_payload = json.loads(first.output)
37
+    second_payload = json.loads(second.output)
38
+    store_path = tmp_path / ".loader" / "todos" / "active.json"
39
+
40
+    assert first.is_error is False
41
+    assert first_payload["old_todos"] == []
42
+    assert second_payload["old_todos"] == first_payload["new_todos"]
43
+    assert json.loads(store_path.read_text()) == []
44
+
45
+
46
+@pytest.mark.asyncio
47
+async def test_todo_write_rejects_invalid_payloads_and_sets_verification_nudge(
48
+    tmp_path: Path,
49
+) -> None:
50
+    tool = TodoWriteTool(tmp_path)
51
+
52
+    empty = await tool.execute(todos=[])
53
+    blank = await tool.execute(
54
+        todos=[
55
+            {
56
+                "content": "  ",
57
+                "active_form": "Reviewing plan",
58
+                "status": "pending",
59
+            }
60
+        ]
61
+    )
62
+    nudged = await tool.execute(
63
+        todos=[
64
+            {
65
+                "content": "Implement router",
66
+                "active_form": "Implementing router",
67
+                "status": "completed",
68
+            },
69
+            {
70
+                "content": "Write tests",
71
+                "active_form": "Writing tests",
72
+                "status": "completed",
73
+            },
74
+            {
75
+                "content": "Update docs",
76
+                "active_form": "Updating docs",
77
+                "status": "completed",
78
+            },
79
+        ]
80
+    )
81
+
82
+    assert empty.is_error is True
83
+    assert "todos must not be empty" in empty.output
84
+    assert blank.is_error is True
85
+    assert "todo content must not be empty" in blank.output
86
+    assert json.loads(nudged.output)["verification_nudge_needed"] is True
87
+
88
+
89
+@pytest.mark.asyncio
90
+async def test_ask_user_question_uses_callback_and_resolves_numbered_options() -> None:
91
+    tool = AskUserQuestionTool()
92
+
93
+    async def answer(question: str, options: list[str] | None) -> str:
94
+        assert "Which path" in question
95
+        assert options == ["Plan first", "Execute now"]
96
+        return "2"
97
+
98
+    result = await tool.execute(
99
+        question="Which path should we take?",
100
+        options=["Plan first", "Execute now"],
101
+        user_response_handler=answer,
102
+    )
103
+
104
+    payload = json.loads(result.output)
105
+    assert result.is_error is False
106
+    assert payload["answer"] == "Execute now"
107
+    assert payload["status"] == "answered"
108
+
109
+
110
+@pytest.mark.asyncio
111
+async def test_ask_user_question_requires_callback() -> None:
112
+    tool = AskUserQuestionTool()
113
+
114
+    result = await tool.execute(question="Need an answer?")
115
+
116
+    assert result.is_error is True
117
+    assert "user_response_handler" in result.output