tenseleyflow/loader / ad61aba

Browse files

Adopt runtime-owned action reasoning services

Authored by espadonne
SHA
ad61aba70e9bb47b380336335314b48b4fb9444d
Parents
8902e62
Tree
771c2bb

10 changed files

StatusFile+-
M src/loader/agent/loop.py 2 100
A src/loader/runtime/action_reasoning.py 227 0
M src/loader/runtime/context.py 50 5
A src/loader/runtime/reasoning_service.py 100 0
M src/loader/runtime/tool_batches.py 6 6
M tests/test_assistant_turns.py 0 4
M tests/test_finalization.py 0 4
M tests/test_repair.py 0 4
M tests/test_runtime_context.py 3 3
M tests/test_tool_batches.py 11 13
src/loader/agent/loop.pymodified
@@ -20,6 +20,7 @@ from ..runtime.permissions import (
2020
     load_permission_rules,
2121
 )
2222
 from ..runtime.prompt_history import PromptSnapshot
23
+from ..runtime.reasoning_service import RuntimeReasoningService
2324
 from ..runtime.session import ConversationSession
2425
 from ..runtime.workflow import WorkflowMode
2526
 from ..tools.base import ToolRegistry, create_default_registry
@@ -33,25 +34,15 @@ from .planner import (
3334
 )
3435
 from .prompts import build_system_prompt_result
3536
 from .reasoning import (
36
-    CONFIDENCE_PROMPT,
3737
     DECOMPOSITION_PROMPT,
3838
     SELF_CRITIQUE_PROMPT,
39
-    VERIFICATION_PROMPT,
40
-    ActionVerification,
41
-    ConfidenceAssessment,
42
-    ConfidenceLevel,
4339
     SelfCritique,
4440
     TaskDecomposition,
45
-    estimate_confidence_quick,
4641
     is_conversational,
47
-    parse_confidence,
4842
     parse_decomposition,
4943
     parse_self_critique,
50
-    parse_verification,
51
-    quick_verify,
5244
     should_decompose,
5345
 )
54
-from .recovery import RecoveryContext
5546
 from .safeguards import RuntimeSafeguards
5647
 
5748
 
@@ -151,9 +142,6 @@ class Agent:
151142
         self.capability_profile = resolve_backend_capability_profile(self.backend)
152143
         self.last_turn_summary: TurnSummary | None = None
153144
 
154
-        # Recovery tracking
155
-        self._recovery_context: RecoveryContext | None = None
156
-
157145
         # Steering: allow user to send messages during execution
158146
         self._steering_queue: asyncio.Queue[str] = asyncio.Queue()
159147
         self._is_running: bool = False
@@ -376,12 +364,6 @@ class Agent:
376364
             if context is not None:
377365
                 context.capability_profile = self.capability_profile
378366
 
379
-        def _get_recovery_context() -> RecoveryContext | None:
380
-            return self._recovery_context
381
-
382
-        def _set_recovery_context(value: RecoveryContext | None) -> None:
383
-            self._recovery_context = value
384
-
385367
         context = RuntimeContext(
386368
             project_root=self.project_root,
387369
             backend=self.backend,
@@ -400,11 +382,8 @@ class Agent:
400382
                 queue_steering_message=_queue_steering_message,
401383
                 set_workflow_mode=_set_workflow_mode,
402384
                 refresh_capability_profile=_refresh_capability_profile,
403
-                assess_confidence=self._assess_confidence,
404
-                verify_action=self._verify_action,
405
-                get_recovery_context=_get_recovery_context,
406
-                set_recovery_context=_set_recovery_context,
407385
             ),
386
+            reasoning=RuntimeReasoningService(self.backend, self.config),
408387
             prompt_format=self.prompt_format,
409388
             prompt_sections=list(self.prompt_sections),
410389
         )
@@ -484,82 +463,6 @@ class Agent:
484463
         )
485464
         return parse_self_critique(critique_response.content, response)
486465
 
487
-    async def _assess_confidence(
488
-        self,
489
-        tool_name: str,
490
-        tool_args: dict,
491
-        context: str = "",
492
-    ) -> ConfidenceAssessment:
493
-        """Assess confidence in a tool action."""
494
-        cfg = self.config.reasoning
495
-
496
-        # Try quick heuristic first
497
-        if cfg.use_quick_confidence:
498
-            quick_level = estimate_confidence_quick(tool_name, tool_args, context)
499
-            # Only call LLM if quick estimate is low
500
-            if quick_level.value >= ConfidenceLevel.MEDIUM.value:
501
-                return ConfidenceAssessment(
502
-                    action=f"{tool_name} with {tool_args}",
503
-                    tool_name=tool_name,
504
-                    tool_args=tool_args,
505
-                    level=quick_level,
506
-                    reasoning="Quick heuristic assessment",
507
-                )
508
-
509
-        # Full LLM assessment
510
-        action = f"Call {tool_name} with arguments: {tool_args}"
511
-        prompt = CONFIDENCE_PROMPT.format(
512
-            action=action,
513
-            tool_name=tool_name,
514
-            tool_args=tool_args,
515
-            context=context[-2000:] if context else "No prior context",
516
-        )
517
-        response = await self.backend.complete(
518
-            messages=[Message(role=Role.USER, content=prompt)],
519
-            tools=None,
520
-            temperature=0.3,
521
-            max_tokens=300,
522
-        )
523
-        return parse_confidence(response.content, tool_name, tool_args)
524
-
525
-    async def _verify_action(
526
-        self,
527
-        tool_name: str,
528
-        tool_args: dict,
529
-        result: str,
530
-        expected: str = "",
531
-    ) -> ActionVerification:
532
-        """Verify that an action produced the expected result."""
533
-        cfg = self.config.reasoning
534
-
535
-        # Try quick verification first
536
-        if cfg.use_quick_verification:
537
-            quick_result = quick_verify(tool_name, tool_args, result)
538
-            if quick_result:
539
-                return ActionVerification(
540
-                    tool_name=tool_name,
541
-                    tool_args=tool_args,
542
-                    expected_outcome=expected or "Success",
543
-                    actual_result=result[:500],
544
-                    verified=True,
545
-                    verification_method="quick_heuristic",
546
-                )
547
-
548
-        # Full LLM verification
549
-        prompt = VERIFICATION_PROMPT.format(
550
-            tool_name=tool_name,
551
-            tool_args=tool_args,
552
-            expected=expected or "The action should complete successfully",
553
-            result=result[:2000],  # Truncate long results
554
-        )
555
-        response = await self.backend.complete(
556
-            messages=[Message(role=Role.USER, content=prompt)],
557
-            tools=None,
558
-            temperature=0.3,
559
-            max_tokens=300,
560
-        )
561
-        return parse_verification(response.content, tool_name, tool_args, expected, result)
562
-
563466
     async def _handle_conversational(
564467
         self,
565468
         user_message: str,
@@ -1179,7 +1082,6 @@ class Agent:
11791082
         self.prompt_format = None
11801083
         self.prompt_sections = []
11811084
         self.session = self._create_session(messages=self.messages)
1182
-        self._recovery_context = None
11831085
         self._current_task = None
11841086
         self.last_turn_summary = None
11851087
         self.workflow_mode = WorkflowMode.EXECUTE.value
src/loader/runtime/action_reasoning.pyadded
@@ -0,0 +1,227 @@
1
+"""Runtime-owned prompts and heuristics for action reasoning."""
2
+
3
+from __future__ import annotations
4
+
5
+import re
6
+
7
+from .reasoning_types import (
8
+    ActionVerification,
9
+    ConfidenceAssessment,
10
+    ConfidenceLevel,
11
+)
12
+
13
+CONFIDENCE_PROMPT = """Rate your confidence in this action before executing.
14
+
15
+Action: {action}
16
+Tool: {tool_name}
17
+Arguments: {tool_args}
18
+
19
+Previous context:
20
+{context}
21
+
22
+Consider:
23
+1. Do you have enough information to proceed?
24
+2. What could go wrong?
25
+3. Is this the right approach?
26
+4. Are there better alternatives?
27
+
28
+Respond in this exact JSON format:
29
+{{
30
+  "confidence": 1-5,  // 1=very low, 2=low, 3=medium, 4=high, 5=very high
31
+  "reasoning": "Why this confidence level",
32
+  "risks": ["Risk 1", "Risk 2"],
33
+  "mitigations": ["How to mitigate risk 1"],
34
+  "requires_verification": true/false,
35
+  "alternative_approaches": ["Alternative 1 if confidence is low"]
36
+}}
37
+
38
+Only output the JSON, no other text."""
39
+
40
+
41
+VERIFICATION_PROMPT = """Verify that the action produced the expected result.
42
+
43
+Action taken:
44
+- Tool: {tool_name}
45
+- Arguments: {tool_args}
46
+
47
+Expected outcome: {expected}
48
+
49
+Actual result:
50
+{result}
51
+
52
+Analyze:
53
+1. Did the action succeed?
54
+2. Does the result match expectations?
55
+3. Are there any unexpected side effects?
56
+4. Is any correction needed?
57
+
58
+Respond in this exact JSON format:
59
+{{
60
+  "verified": true/false,
61
+  "verification_method": "How you verified (e.g., output_contains, no_error, file_created)",
62
+  "discrepancies": ["Discrepancy 1 if any"],
63
+  "needs_correction": true/false,
64
+  "correction_suggestion": "What to do if correction is needed"
65
+}}
66
+
67
+Only output the JSON, no other text."""
68
+
69
+
70
+def parse_confidence(
71
+    response: str,
72
+    tool_name: str,
73
+    tool_args: dict,
74
+) -> ConfidenceAssessment:
75
+    """Parse LLM response into ConfidenceAssessment."""
76
+
77
+    import json
78
+
79
+    json_match = re.search(r"\{.*\}", response, re.DOTALL)
80
+    if not json_match:
81
+        return ConfidenceAssessment(
82
+            action="",
83
+            tool_name=tool_name,
84
+            tool_args=tool_args,
85
+        )
86
+
87
+    try:
88
+        data = json.loads(json_match.group())
89
+        confidence_val = data.get("confidence", 3)
90
+        level = ConfidenceLevel(max(1, min(5, confidence_val)))
91
+
92
+        return ConfidenceAssessment(
93
+            action=data.get("action", ""),
94
+            tool_name=tool_name,
95
+            tool_args=tool_args,
96
+            level=level,
97
+            reasoning=data.get("reasoning", ""),
98
+            risks=data.get("risks", []),
99
+            mitigations=data.get("mitigations", []),
100
+            requires_verification=data.get("requires_verification", level.value <= 3),
101
+        )
102
+    except (json.JSONDecodeError, ValueError):
103
+        return ConfidenceAssessment(
104
+            action="",
105
+            tool_name=tool_name,
106
+            tool_args=tool_args,
107
+        )
108
+
109
+
110
+def parse_verification(
111
+    response: str,
112
+    tool_name: str,
113
+    tool_args: dict,
114
+    expected: str,
115
+    result: str,
116
+) -> ActionVerification:
117
+    """Parse LLM response into ActionVerification."""
118
+
119
+    import json
120
+
121
+    json_match = re.search(r"\{.*\}", response, re.DOTALL)
122
+    if not json_match:
123
+        return ActionVerification(
124
+            tool_name=tool_name,
125
+            tool_args=tool_args,
126
+            expected_outcome=expected,
127
+            actual_result=result,
128
+            verified="error" not in result.lower(),
129
+        )
130
+
131
+    try:
132
+        data = json.loads(json_match.group())
133
+        return ActionVerification(
134
+            tool_name=tool_name,
135
+            tool_args=tool_args,
136
+            expected_outcome=expected,
137
+            actual_result=result,
138
+            verified=data.get("verified", False),
139
+            verification_method=data.get("verification_method", ""),
140
+            discrepancies=data.get("discrepancies", []),
141
+            needs_correction=data.get("needs_correction", False),
142
+            correction_suggestion=data.get("correction_suggestion", ""),
143
+        )
144
+    except json.JSONDecodeError:
145
+        return ActionVerification(
146
+            tool_name=tool_name,
147
+            tool_args=tool_args,
148
+            expected_outcome=expected,
149
+            actual_result=result,
150
+            verified="error" not in result.lower(),
151
+        )
152
+
153
+
154
+def estimate_confidence_quick(
155
+    tool_name: str,
156
+    tool_args: dict,
157
+    context: str = "",
158
+) -> ConfidenceLevel:
159
+    """Estimate action confidence with heuristics before an LLM call."""
160
+
161
+    del context
162
+
163
+    if tool_name in {"read", "glob", "grep", "git"}:
164
+        return ConfidenceLevel.HIGH
165
+
166
+    if tool_name == "write":
167
+        file_path = tool_args.get("file_path", "")
168
+        if not file_path:
169
+            return ConfidenceLevel.LOW
170
+        return ConfidenceLevel.MEDIUM
171
+
172
+    if tool_name == "edit":
173
+        old_string = tool_args.get("old_string", "")
174
+        if not old_string:
175
+            return ConfidenceLevel.LOW
176
+        return ConfidenceLevel.MEDIUM
177
+
178
+    if tool_name == "patch":
179
+        hunks = tool_args.get("hunks", [])
180
+        if not hunks:
181
+            return ConfidenceLevel.LOW
182
+        return ConfidenceLevel.MEDIUM
183
+
184
+    if tool_name == "bash":
185
+        command = tool_args.get("command", "")
186
+        dangerous = ["rm -rf", "sudo", "chmod 777", "dd if=", "> /dev/"]
187
+        if any(pattern in command for pattern in dangerous):
188
+            return ConfidenceLevel.VERY_LOW
189
+        safe_read = ["ls", "cat", "grep", "find", "pwd", "echo", "head", "tail"]
190
+        if any(command.strip().startswith(prefix) for prefix in safe_read):
191
+            return ConfidenceLevel.HIGH
192
+        return ConfidenceLevel.MEDIUM
193
+
194
+    return ConfidenceLevel.MEDIUM
195
+
196
+
197
+def quick_verify(tool_name: str, tool_args: dict, result: str) -> bool:
198
+    """Estimate whether a tool action succeeded without an LLM call."""
199
+
200
+    del tool_args
201
+
202
+    result_lower = result.lower()
203
+    error_indicators = [
204
+        "error:",
205
+        "failed",
206
+        "not found",
207
+        "permission denied",
208
+        "no such file",
209
+        "command not found",
210
+        "exception",
211
+        "traceback",
212
+        "fatal:",
213
+        "cannot",
214
+    ]
215
+    if any(indicator in result_lower for indicator in error_indicators):
216
+        return False
217
+
218
+    if tool_name == "write":
219
+        return "created" in result_lower or "wrote" in result_lower or len(result) < 200
220
+    if tool_name in {"edit", "patch"}:
221
+        return "edited" in result_lower or "patched" in result_lower or "+" in result or "-" in result
222
+    if tool_name in {"read", "git"}:
223
+        return len(result.strip()) > 0
224
+    if tool_name == "bash":
225
+        return True
226
+
227
+    return True
src/loader/runtime/context.pymodified
@@ -2,7 +2,7 @@
22
 
33
 from __future__ import annotations
44
 
5
-from collections.abc import Awaitable, Callable
5
+from collections.abc import Callable
66
 from dataclasses import dataclass, field
77
 from pathlib import Path
88
 from typing import Any, Protocol
@@ -74,6 +74,28 @@ class RuntimeSafeguardsProtocol(Protocol):
7474
     def record_response(self, content: str) -> None:
7575
         """Record a completed assistant response for safeguard bookkeeping."""
7676
 
77
+
78
+class RuntimeReasoningServiceProtocol(Protocol):
79
+    """Typed action-reasoning surface the runtime can rely on."""
80
+
81
+    async def assess_confidence(
82
+        self,
83
+        tool_name: str,
84
+        tool_args: dict[str, Any],
85
+        context: str = "",
86
+    ) -> ConfidenceAssessment:
87
+        """Assess confidence in a planned tool action."""
88
+
89
+    async def verify_action(
90
+        self,
91
+        tool_name: str,
92
+        tool_args: dict[str, Any],
93
+        result: str,
94
+        expected: str = "",
95
+    ) -> ActionVerification:
96
+        """Verify that a tool action produced the desired result."""
97
+
98
+
7799
 @dataclass(slots=True)
78100
 class RuntimeLegacyServices:
79101
     """Explicit migration seams for legacy agent-owned behavior."""
@@ -83,10 +105,6 @@ class RuntimeLegacyServices:
83105
     queue_steering_message: Callable[[str], None]
84106
     set_workflow_mode: Callable[[str], None]
85107
     refresh_capability_profile: Callable[[], None]
86
-    assess_confidence: Callable[[str, dict[str, Any], str], Awaitable[ConfidenceAssessment]]
87
-    verify_action: Callable[[str, dict[str, Any], str, str], Awaitable[ActionVerification]]
88
-    get_recovery_context: Callable[[], RecoveryContext | None]
89
-    set_recovery_context: Callable[[RecoveryContext | None], None]
90108
 
91109
 
92110
 @dataclass(slots=True)
@@ -105,6 +123,8 @@ class RuntimeContext:
105123
     workflow_mode: str
106124
     safeguards: RuntimeSafeguardsProtocol
107125
     legacy: RuntimeLegacyServices
126
+    reasoning: RuntimeReasoningServiceProtocol | None = None
127
+    recovery_context: RecoveryContext | None = None
108128
     prompt_format: str | None = None
109129
     prompt_sections: list[str] = field(default_factory=list)
110130
 
@@ -131,3 +151,28 @@ class RuntimeContext:
131151
         """Return rule counts for the active permission policy."""
132152
 
133153
         return self.permission_policy.rule_counts()
154
+
155
+    async def assess_confidence(
156
+        self,
157
+        tool_name: str,
158
+        tool_args: dict[str, Any],
159
+        context: str = "",
160
+    ) -> ConfidenceAssessment:
161
+        """Assess confidence using the primary runtime reasoning service."""
162
+
163
+        if self.reasoning is None:
164
+            raise RuntimeError("RuntimeContext.reasoning is required for confidence checks")
165
+        return await self.reasoning.assess_confidence(tool_name, tool_args, context)
166
+
167
+    async def verify_action(
168
+        self,
169
+        tool_name: str,
170
+        tool_args: dict[str, Any],
171
+        result: str,
172
+        expected: str = "",
173
+    ) -> ActionVerification:
174
+        """Verify a tool action using the primary runtime reasoning service."""
175
+
176
+        if self.reasoning is None:
177
+            raise RuntimeError("RuntimeContext.reasoning is required for verification checks")
178
+        return await self.reasoning.verify_action(tool_name, tool_args, result, expected)
src/loader/runtime/reasoning_service.pyadded
@@ -0,0 +1,100 @@
1
+"""Runtime-owned confidence and verification services."""
2
+
3
+from __future__ import annotations
4
+
5
+from typing import Any
6
+
7
+from ..llm.base import LLMBackend, Message, Role
8
+from .action_reasoning import (
9
+    CONFIDENCE_PROMPT,
10
+    VERIFICATION_PROMPT,
11
+    estimate_confidence_quick,
12
+    parse_confidence,
13
+    parse_verification,
14
+    quick_verify,
15
+)
16
+from .reasoning_types import (
17
+    ActionVerification,
18
+    ConfidenceAssessment,
19
+    ConfidenceLevel,
20
+)
21
+
22
+
23
+class RuntimeReasoningService:
24
+    """Provide confidence scoring and verification without Agent callbacks."""
25
+
26
+    def __init__(self, backend: LLMBackend, config: Any) -> None:
27
+        self.backend = backend
28
+        self.config = config
29
+
30
+    async def assess_confidence(
31
+        self,
32
+        tool_name: str,
33
+        tool_args: dict[str, Any],
34
+        context: str = "",
35
+    ) -> ConfidenceAssessment:
36
+        """Assess confidence in a planned tool action."""
37
+
38
+        cfg = self.config.reasoning
39
+
40
+        if getattr(cfg, "use_quick_confidence", True):
41
+            quick_level = estimate_confidence_quick(tool_name, tool_args, context)
42
+            if quick_level.value >= ConfidenceLevel.MEDIUM.value:
43
+                return ConfidenceAssessment(
44
+                    action=f"{tool_name} with {tool_args}",
45
+                    tool_name=tool_name,
46
+                    tool_args=tool_args,
47
+                    level=quick_level,
48
+                    reasoning="Quick heuristic assessment",
49
+                )
50
+
51
+        action = f"Call {tool_name} with arguments: {tool_args}"
52
+        prompt = CONFIDENCE_PROMPT.format(
53
+            action=action,
54
+            tool_name=tool_name,
55
+            tool_args=tool_args,
56
+            context=context[-2000:] if context else "No prior context",
57
+        )
58
+        response = await self.backend.complete(
59
+            messages=[Message(role=Role.USER, content=prompt)],
60
+            tools=None,
61
+            temperature=0.3,
62
+            max_tokens=300,
63
+        )
64
+        return parse_confidence(response.content, tool_name, tool_args)
65
+
66
+    async def verify_action(
67
+        self,
68
+        tool_name: str,
69
+        tool_args: dict[str, Any],
70
+        result: str,
71
+        expected: str = "",
72
+    ) -> ActionVerification:
73
+        """Verify that a completed tool action achieved its goal."""
74
+
75
+        cfg = self.config.reasoning
76
+
77
+        if getattr(cfg, "use_quick_verification", True):
78
+            if quick_verify(tool_name, tool_args, result):
79
+                return ActionVerification(
80
+                    tool_name=tool_name,
81
+                    tool_args=tool_args,
82
+                    expected_outcome=expected or "Success",
83
+                    actual_result=result[:500],
84
+                    verified=True,
85
+                    verification_method="quick_heuristic",
86
+                )
87
+
88
+        prompt = VERIFICATION_PROMPT.format(
89
+            tool_name=tool_name,
90
+            tool_args=tool_args,
91
+            expected=expected or "The action should complete successfully",
92
+            result=result[:2000],
93
+        )
94
+        response = await self.backend.complete(
95
+            messages=[Message(role=Role.USER, content=prompt)],
96
+            tools=None,
97
+            temperature=0.3,
98
+            max_tokens=300,
99
+        )
100
+        return parse_verification(response.content, tool_name, tool_args, expected, result)
src/loader/runtime/tool_batches.pymodified
@@ -181,7 +181,7 @@ class ToolBatchRunner:
181181
             for message in self.context.messages[-5:]
182182
             if message.content
183183
         )
184
-        confidence = await self.context.legacy.assess_confidence(
184
+        confidence = await self.context.assess_confidence(
185185
             tool_call.name,
186186
             tool_call.arguments,
187187
             context,
@@ -224,7 +224,7 @@ class ToolBatchRunner:
224224
             if isinstance(new_todos, list):
225225
                 sync_todos_to_definition_of_done(dod, new_todos)
226226
         self.dod_store.save(dod)
227
-        self.context.legacy.set_recovery_context(None)
227
+        self.context.recovery_context = None
228228
         return None
229229
 
230230
     async def _run_post_tool_verification(
@@ -244,7 +244,7 @@ class ToolBatchRunner:
244244
         ):
245245
             return False
246246
 
247
-        verification = await self.context.legacy.verify_action(
247
+        verification = await self.context.verify_action(
248248
             tool_call.name,
249249
             tool_call.arguments,
250250
             outcome.result_output,
@@ -277,14 +277,14 @@ class ToolBatchRunner:
277277
     ) -> Message | None:
278278
         """Generate a recovery follow-up after an executed tool failure."""
279279
 
280
-        recovery_context = self.context.legacy.get_recovery_context()
280
+        recovery_context = self.context.recovery_context
281281
         if recovery_context is None:
282282
             recovery_context = RecoveryContext(
283283
                 original_tool=tool_call.name,
284284
                 original_args=tool_call.arguments,
285285
                 max_retries=self.context.config.max_recovery_attempts,
286286
             )
287
-            self.context.legacy.set_recovery_context(recovery_context)
287
+            self.context.recovery_context = recovery_context
288288
 
289289
         if recovery_context.is_similar_attempt(
290290
             tool_call.name,
@@ -341,7 +341,7 @@ class ToolBatchRunner:
341341
                 tool_name=tool_call.name,
342342
             )
343343
         )
344
-        self.context.legacy.set_recovery_context(None)
344
+        self.context.recovery_context = None
345345
         return Message.tool_result_message(
346346
             tool_call_id=tool_call.id,
347347
             display_content=(f"Observation [{tool_call.name}]: Error: {failure_message}"),
tests/test_assistant_turns.pymodified
@@ -137,10 +137,6 @@ def build_runtime_context(
137137
             queue_steering_message=queued_messages.append,
138138
             set_workflow_mode=lambda mode: None,
139139
             refresh_capability_profile=lambda: None,
140
-            assess_confidence=lambda tool_name, tool_args, context: None,  # type: ignore[arg-type]
141
-            verify_action=lambda tool_name, tool_args, result, expected: None,  # type: ignore[arg-type]
142
-            get_recovery_context=lambda: None,
143
-            set_recovery_context=lambda value: None,
144140
         ),
145141
     )
146142
     return context, queued_messages
tests/test_finalization.pymodified
@@ -122,10 +122,6 @@ def build_context(temp_dir: Path, session: FakeSession) -> RuntimeContext:
122122
             queue_steering_message=lambda message: None,
123123
             set_workflow_mode=lambda mode: None,
124124
             refresh_capability_profile=lambda: None,
125
-            assess_confidence=lambda tool_name, tool_args, context: None,  # type: ignore[arg-type]
126
-            verify_action=lambda tool_name, tool_args, result, expected: None,  # type: ignore[arg-type]
127
-            get_recovery_context=lambda: None,
128
-            set_recovery_context=lambda value: None,
129125
         ),
130126
     )
131127
 
tests/test_repair.pymodified
@@ -91,10 +91,6 @@ def build_context(
9191
             queue_steering_message=lambda message: None,
9292
             set_workflow_mode=lambda mode: None,
9393
             refresh_capability_profile=lambda: None,
94
-            assess_confidence=lambda tool_name, tool_args, context: None,  # type: ignore[arg-type]
95
-            verify_action=lambda tool_name, tool_args, result, expected: None,  # type: ignore[arg-type]
96
-            get_recovery_context=lambda: None,
97
-            set_recovery_context=lambda value: None,
9894
         ),
9995
     )
10096
 
tests/test_runtime_context.pymodified
@@ -36,6 +36,7 @@ def test_agent_builds_typed_runtime_context(temp_dir: Path) -> None:
3636
     assert context.use_react == agent.use_react
3737
     assert context.active_permission_mode == agent.active_permission_mode
3838
     assert context.active_permission_rule_counts == agent.active_permission_rule_counts
39
+    assert context.reasoning is not None
3940
     assert context.legacy.message_history() is agent.messages
4041
 
4142
 
@@ -56,9 +57,8 @@ def test_runtime_context_legacy_services_stay_in_sync(temp_dir: Path) -> None:
5657
     assert context.workflow_mode == "clarify"
5758
 
5859
     recovery = RecoveryContext(original_tool="read", original_args={"file_path": "README.md"})
59
-    context.legacy.set_recovery_context(recovery)
60
-    assert agent._recovery_context is recovery
61
-    assert context.legacy.get_recovery_context() is recovery
60
+    context.recovery_context = recovery
61
+    assert context.recovery_context is recovery
6262
 
6363
     context.legacy.refresh_capability_profile()
6464
     assert context.capability_profile == agent.capability_profile
tests/test_tool_batches.pymodified
@@ -2,7 +2,6 @@
22
 
33
 from __future__ import annotations
44
 
5
-from dataclasses import dataclass
65
 from pathlib import Path
76
 from types import SimpleNamespace
87
 
@@ -18,14 +17,13 @@ from loader.runtime.permissions import (
1817
     build_permission_policy,
1918
     load_permission_rules,
2019
 )
21
-from loader.runtime.recovery import RecoveryContext
2220
 from loader.runtime.reasoning_types import (
2321
     ActionVerification,
2422
     ConfidenceAssessment,
2523
     ConfidenceLevel,
2624
 )
25
+from loader.runtime.recovery import RecoveryContext
2726
 from loader.runtime.tool_batches import ToolBatchRunner
28
-from loader.runtime.tracing import RuntimeTracer
2927
 from loader.tools.base import ToolResult as RegistryToolResult
3028
 from loader.tools.base import create_default_registry
3129
 from tests.helpers.runtime_harness import ScriptedBackend
@@ -97,7 +95,7 @@ def build_context(
9795
     verification: bool = False,
9896
     auto_recover: bool = True,
9997
     min_confidence_for_action: int = 3,
100
-) -> tuple[RuntimeContext, dict[str, RecoveryContext | None]]:
98
+) -> RuntimeContext:
10199
     registry = create_default_registry(temp_dir)
102100
     registry.configure_workspace_root(temp_dir)
103101
     rule_status = load_permission_rules(temp_dir)
@@ -107,7 +105,6 @@ def build_context(
107105
         tool_requirements=registry.get_tool_requirements(),
108106
         rules=rule_status.rules,
109107
     )
110
-    recovery_holder = {"value": recovery_context}
111108
     context = RuntimeContext(
112109
         project_root=temp_dir,
113110
         backend=ScriptedBackend(),
@@ -140,13 +137,14 @@ def build_context(
140137
             queue_steering_message=lambda message: None,
141138
             set_workflow_mode=lambda mode: None,
142139
             refresh_capability_profile=lambda: None,
140
+        ),
141
+        reasoning=SimpleNamespace(
143142
             assess_confidence=assess_confidence,
144143
             verify_action=verify_action,
145
-            get_recovery_context=lambda: recovery_holder["value"],
146
-            set_recovery_context=lambda value: recovery_holder.__setitem__("value", value),
147144
         ),
145
+        recovery_context=recovery_context,
148146
     )
149
-    return context, recovery_holder
147
+    return context
150148
 
151149
 
152150
 def tool_outcome(
@@ -189,7 +187,7 @@ async def test_tool_batch_runner_uses_context_for_confidence_gate(temp_dir: Path
189187
     async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
190188
         raise AssertionError("Verification should not run for skipped actions")
191189
 
192
-    context, _ = build_context(
190
+    context = build_context(
193191
         temp_dir=temp_dir,
194192
         messages=[
195193
             Message(role=Role.USER, content="Please inspect the project."),
@@ -239,7 +237,7 @@ async def test_tool_batch_runner_tracks_recovery_with_legacy_context(temp_dir: P
239237
     async def verify_action(tool_name: str, tool_args: dict, result: str, expected: str = "") -> ActionVerification:
240238
         raise AssertionError("Verification should not run for failed actions")
241239
 
242
-    context, recovery_holder = build_context(
240
+    context = build_context(
243241
         temp_dir=temp_dir,
244242
         messages=[],
245243
         safeguards=FakeSafeguards(),
@@ -270,7 +268,7 @@ async def test_tool_batch_runner_tracks_recovery_with_legacy_context(temp_dir: P
270268
         consecutive_errors=0,
271269
     )
272270
 
273
-    assert recovery_holder["value"] is not None
271
+    assert context.recovery_context is not None
274272
     assert summary.tool_result_messages
275273
     assert context.session.messages[-1] == summary.tool_result_messages[-1]
276274
     assert any(event.type == "recovery" for event in events)
@@ -300,7 +298,7 @@ async def test_tool_batch_runner_verifies_with_context_services(temp_dir: Path)
300298
         original_tool="edit",
301299
         original_args={"file_path": "README.md"},
302300
     )
303
-    context, recovery_holder = build_context(
301
+    context = build_context(
304302
         temp_dir=temp_dir,
305303
         messages=[],
306304
         safeguards=FakeSafeguards(),
@@ -332,7 +330,7 @@ async def test_tool_batch_runner_verifies_with_context_services(temp_dir: Path)
332330
     )
333331
 
334332
     assert verification_calls == ["file contents"]
335
-    assert recovery_holder["value"] is None
333
+    assert context.recovery_context is None
336334
     assert context.session.messages[-1].role == Role.TOOL
337335
     assert context.session.messages[-1].content == "file contents"
338336
     assert any(event.type == "verification" for event in events)