`3c5be85`

Refactor Loader into a typed turn runtime

Authored by

espadonne 1 month ago

SHA: 3c5be8521db3e1a409a1d619ae6c3e45bb0f2c26
Parents: 51c2c2e
Tree: 4283571

6 changed files

Status	File	+	-
M	`src/loader/agent/loop.py`	32	939
M	`src/loader/llm/base.py`	29	1
M	`src/loader/runtime/__init__.py`	6	1
M	`src/loader/runtime/capabilities.py`	40	1
A	`src/loader/runtime/conversation.py`	705	0
A	`src/loader/runtime/executor.py`	217	0

src/loader/agent/loop.pymodified

1024 lines changed — click to load

  from ..llm.base import LLMBackend, Message, Role, ToolCall
  from ..tools.base import ToolRegistry, create_default_registry, ConfirmationRequired
  from ..context.project import ProjectContext, detect_project
 +from ..runtime.capabilities import resolve_backend_capability_profile
 +from ..runtime.conversation import ConversationRuntime
 +from ..runtime.events import AgentEvent, TurnSummary
 +from ..runtime.session import ConversationSession
  from .prompts import build_system_prompt
  from .parsing import parse_tool_calls, format_tool_result
  from .planner import Plan, parse_plan, should_plan, format_step_prompt, PLANNING_PROMPT, SHOULD_PLAN_PROMPT
      estimate_complexity,
      get_token_budget,
+ )
 -from .safeguards import RuntimeSafeguards, ValidationResult
 +from .safeguards import RuntimeSafeguards
  @dataclass
              self.reasoning = ReasoningConfig()
 -@dataclass
 -class AgentEvent:
 -    """Event emitted during agent execution."""
 -    # Event types: thinking, tool_call, tool_result, response, error, plan, step,
 -    # recovery, stream, confirmation, steering, decomposition, subtask, critique,
 -    # confidence, verification
 -    type: str
 -    content: str = ""
 -    tool_name: str | None = None
 -    tool_args: dict | None = None
 -    step_info: str | None = None  # For step progress like "[2/5] Doing X"
 -    recovery_attempt: int | None = None  # For recovery events
 -    is_stream_end: bool = False  # For stream events - indicates final chunk
 -    confirm_message: str | None = None  # For confirmation events
 -    confirm_details: str | None = None  # For confirmation events
 -    is_error: bool = False  # For tool_result events
+-
 -    # Reasoning events
 -    decomposition: TaskDecomposition | None = None  # For decomposition events
 -    subtask: Subtask | None = None  # For subtask events
 -    critique: SelfCritique | None = None  # For critique events
 -    confidence: ConfidenceAssessment | None = None  # For confidence events
 -    verification: ActionVerification | None = None  # For verification events
 -    completion_check: TaskCompletionCheck | None = None  # For completion events
 -    rollback_plan: RollbackPlan | None = None  # For rollback events
 -    rollback_action: RollbackAction | None = None  # For individual rollback action
+-
+-
  class Agent:
      """The main agent that orchestrates the LLM and tools."""
          self.registry = registry or create_default_registry()
          self.config = config or AgentConfig()
          self.messages: list[Message] = []
 +        self.session = ConversationSession(
 +            system_message_factory=self._get_system_message,
 +            few_shot_factory=self._get_few_shot_examples,
 +            messages=self.messages,
 +        )
          self._system_message: Message | None = None
          self._use_react: bool | None = None
 +        self.capability_profile = resolve_backend_capability_profile(self.backend)
 +        self.last_turn_summary: TurnSummary | None = None
          # Recovery tracking
          self._recovery_context: RecoveryContext | None = None
              self._use_react = True
              return True
 -        # Check if backend supports native tools
 -        if hasattr(self.backend, "supports_native_tools"):
 -            supports_native = self.backend.supports_native_tools()
 -            self._use_react = not supports_native
 -            # Debug log
 -            try:
 -                with open("/tmp/loader_debug.log", "a") as f:
 -                    f.write(f"[loop] use_react: supports_native={supports_native}, use_react={self._use_react}\n")
 -            except Exception:
 -                pass
 -        else:
 -            # Default to ReAct for unknown backends
 -            self._use_react = True
 +        self._use_react = not self.capability_profile.supports_native_tools
          return self._use_react
      def _build_messages(self) -> list[Message]:
          """Build the full message list for the LLM."""
 -        messages = [self._get_system_message()]
 +        return self.session.build_request_messages()
 -        # Add few-shot examples if this is a fresh conversation
 -        if len(self.messages) <= 2:  # User message + maybe prefill
 -            messages.extend(self._get_few_shot_examples())
 +    def refresh_capability_profile(self) -> None:
 +        """Refresh the runtime capability profile from the current backend."""
 -        messages.extend(self.messages)
 -        return messages
 +        self.capability_profile = resolve_backend_capability_profile(self.backend)
 +        self._use_react = None
      def _get_few_shot_examples(self) -> list[Message]:
          """Get few-shot examples demonstrating proper tool use."""
          original_task: str | None = None,
      ) -> str:
          """Inner execution loop without planning."""
 -        iterations = 0
 -        final_response = ""
 -        actions_taken: list[str] = []  # Track what we've done
 -        continuation_count = 0  # How many times we've nudged to continue
 -        empty_retry_count = 0  # How many times we've retried on empty response
 -        MAX_EMPTY_RETRIES = 5  # More retries before giving up - small models need patience
 -        extracted_iterations = 0  # How many times we've extracted bracket-format tool calls
 -        MAX_EXTRACTED_ITERATIONS = 3  # Limit extracted tool call loops
 -        consecutive_errors = 0  # Track consecutive tool errors
+-
 -        # Adaptive token budgeting based on task complexity
 -        complexity = estimate_complexity(task)
 -        max_tokens, _ = get_token_budget(complexity)
 -        # Use configured max_tokens as ceiling, complexity as floor
 -        effective_max_tokens = min(self.config.max_tokens, max(max_tokens, 512))
+-
 -        # Rollback planning
 -        rollback_plan = RollbackPlan() if self.config.reasoning.rollback else None
+-
 -        while iterations < self.config.max_iterations:
 -            iterations += 1
 -            # On first iteration, add assistant prefilling to guide tool use
 -            if iterations == 1 and len(self.messages) == 1:  # Just the user's message
 -                # Check if task looks like it needs immediate action
 -                task_lower = task.lower()
 -                action_keywords = ['create', 'write', 'make', 'run', 'execute', 'build', 'install', 'delete', 'remove', 'add', 'edit', 'modify', 'update', 'fix']
 -                if any(kw in task_lower for kw in action_keywords):
 -                    # Prime with partial assistant response - start of tool call
 -                    self.messages.append(Message(
 -                        role=Role.ASSISTANT,
 -                        content="[",
 -                    ))
 -                    try:
 -                        with open("/tmp/loader_debug.log", "a") as f:
 -                            f.write(f"[loop] Added assistant prefill '[' for action task\n")
 -                    except Exception:
 -                        pass
+-
 -            # Check for steering messages from user
 -            steering_messages = self._drain_steering_queue()
 -            for steer_msg in steering_messages:
 -                await emit(AgentEvent(type="steering", content=steer_msg))
 -                self.messages.append(Message(
 -                    role=Role.USER,
 -                    content=f"[USER INTERRUPTION]: {steer_msg}",
 -                ))
+-
 -            # Get completion from LLM
 -            await emit(AgentEvent(type="thinking"))
+-
 -            # Reset code block filter state for this LLM call
 -            self.safeguards.code_filter.reset()
+-
 -            # Pass tools only for native tool calling
 -            tools = None if self.use_react else self.registry.get_schemas()
+-
 -            # Use streaming or regular completion
 -            pending_tool_calls_seen: set[str] = set()  # Track IDs of pending tool calls shown
 -            if self.config.stream:
 -                full_content = ""
 -                full_content_unfiltered = ""  # Keep original for history
 -                tool_calls: list[ToolCall] = []
+-
 -                async for chunk in self.backend.stream(
 -                    messages=self._build_messages(),
 -                    tools=tools,
 -                    temperature=self.config.temperature,
 -                    max_tokens=effective_max_tokens,
 -                ):
 -                    # Filter content through safeguards (removes code blocks)
 -                    filtered_content = ""
 -                    if chunk.content:
 -                        filtered_content = self.safeguards.filter_stream_chunk(chunk.content)
 -                        full_content_unfiltered += chunk.content
+-
 -                    # Emit stream events for filtered content OR for final chunk (to signal end)
 -                    if filtered_content or chunk.is_done:
 -                        await emit(AgentEvent(
 -                            type="stream",
 -                            content=filtered_content,
 -                            is_stream_end=chunk.is_done,
 -                        ))
+-
 -                    # Check if we should inject steering (bad patterns detected)
 -                    if self.safeguards.should_steer():
 -                        steering_msg = self.safeguards.get_steering_message()
 -                        if steering_msg:
 -                            # Queue steering for next iteration
 -                            self._steering_queue.put_nowait(steering_msg)
+-
 -                    # Show pending tool calls as they're detected (ReAct mode interleaving)
 -                    if chunk.pending_tool_call and chunk.pending_tool_call.id not in pending_tool_calls_seen:
 -                        pending_tool_calls_seen.add(chunk.pending_tool_call.id)
 -                        await emit(AgentEvent(
 -                            type="tool_call",
 -                            tool_name=chunk.pending_tool_call.name,
 -                            tool_args=chunk.pending_tool_call.arguments,
 -                        ))
 -                    if chunk.is_done:
 -                        full_content = chunk.full_content or full_content_unfiltered
 -                        tool_calls = chunk.tool_calls
 -                        # Debug log
 -                        try:
 -                            with open("/tmp/loader_debug.log", "a") as f:
 -                                f.write(f"[loop] chunk.is_done: got {len(tool_calls)} tool_calls\n")
 -                        except Exception:
 -                            pass
+-
 -                content = full_content
 -                response_content = full_content
 -            else:
 -                response = await self.backend.complete(
 -                    messages=self._build_messages(),
 -                    tools=tools,
 -                    temperature=self.config.temperature,
 -                    max_tokens=effective_max_tokens,
 -                )
 -                # Filter content through safeguards (removes code blocks)
 -                response_content = response.content  # Keep original for history
 -                content = self.safeguards.filter_complete_content(response.content)
 -                tool_calls = response.tool_calls if not self.use_react else []
+-
 -                # Check if we should inject steering (bad patterns detected)
 -                if self.safeguards.should_steer():
 -                    steering_msg = self.safeguards.get_steering_message()
 -                    if steering_msg:
 -                        self._steering_queue.put_nowait(steering_msg)
+-
 -            # Handle empty responses (common with small models after clarifications)
 -            if not content.strip():
 -                empty_retry_count += 1
 -                if empty_retry_count <= MAX_EMPTY_RETRIES:
 -                    # Use progressively more direct prompts
 -                    task_context = original_task or task
 -                    retry_prompts = [
 -                        # Retry 1: Gentle nudge with action focus
 -                        f"Great! Now let me proceed with the task. I'll start by using my tools.",
 -                        # Retry 2: More explicit about what to do
 -                        f"I understand. Let me create that now using my tools (write, bash, etc.).",
 -                        # Retry 3: Very direct instruction
 -                        f"Proceeding with: {task_context[:80]}. I'll use the write tool to create the files.",
 -                        # Retry 4: Action-first prompt
 -                        f"Starting now. First step: create the necessary files and directories.",
 -                        # Retry 5: Last attempt with full context
 -                        f"Let me complete this task step by step. The goal is: {task_context[:100]}",
 -                    ]
 -                    prompt = retry_prompts[min(empty_retry_count - 1, len(retry_prompts) - 1)]
 -                    self.messages.append(Message(
 -                        role=Role.ASSISTANT,
 -                        content=prompt,  # Add as assistant message to give model a "running start"
 -                    ))
 -                    continue
 -                else:
 -                    # Give up after max retries - but make the message less alarming
 -                    await emit(AgentEvent(
 -                        type="response",
 -                        content="I need a bit more direction. What specifically would you like me to create or do?",
 -                    ))
 -                    break
+-
 -            # Get tool calls - either native or parsed from text
 -            if self.use_react:
 -                # Parse tool calls from text (ReAct mode)
 -                parsed = parse_tool_calls(content)
 -                tool_calls = parsed.tool_calls
 -                content = parsed.content
+-
 -                # Check if this is a final answer
 -                if parsed.is_final_answer and not tool_calls:
 -                    final_response = content
 -                    self.messages.append(Message(
 -                        role=Role.ASSISTANT,
 -                        content=response_content,  # Keep original for history
 -                    ))
 -                    await emit(AgentEvent(type="response", content=final_response))
 -                    break
+-
 -            # If there are tool calls, execute them
 -            if tool_calls:
 -                # Debug log
 -                try:
 -                    with open("/tmp/loader_debug.log", "a") as f:
 -                        f.write(f"[loop] executing {len(tool_calls)} tool_calls\n")
 -                        for tc in tool_calls:
 -                            f.write(f"[loop]   - {tc.name}: id={tc.id}, args_keys={list(tc.arguments.keys())}\n")
 -                except Exception:
 -                    pass
+-
 -                # Add assistant message with tool calls
 -                self.messages.append(Message(
 -                    role=Role.ASSISTANT,
 -                    content=response_content,
 -                    tool_calls=tool_calls,
 -                ))
+-
 -                # Execute each tool (with recovery logic)
 -                for tool_call in tool_calls:
 -                    cfg = self.config.reasoning
+-
 -                    # Confidence scoring before execution
 -                    if cfg.confidence_scoring:
 -                        context = "\n".join(
 -                            m.content[:500] for m in self.messages[-5:]
 -                            if m.content
 -                        )
 -                        confidence = await self._assess_confidence(
 -                            tool_call.name,
 -                            tool_call.arguments,
 -                            context,
 -                        )
 -                        await emit(AgentEvent(
 -                            type="confidence",
 -                            content=f"Confidence: {confidence.level.name} ({confidence.score}/5)",
 -                            confidence=confidence,
 -                            tool_name=tool_call.name,
 -                        ))
+-
 -                        # If confidence is too low, ask LLM to reconsider
 -                        if confidence.score < cfg.min_confidence_for_action:
 -                            low_conf_msg = (
 -                                f"[LOW CONFIDENCE WARNING] The planned action has low confidence "
 -                                f"({confidence.level.name}).\n"
 -                                f"Reasoning: {confidence.reasoning}\n"
 -                                f"Risks: {', '.join(confidence.risks)}\n"
 -                                f"Consider an alternative approach or gather more information first."
 -                            )
 -                            self.messages.append(Message(
 -                                role=Role.USER,
 -                                content=low_conf_msg,
 -                            ))
 -                            continue  # Skip this tool call, let LLM reconsider
+-
 -                    # Only emit tool_call if not already shown during streaming
 -                    if tool_call.id not in pending_tool_calls_seen:
 -                        try:
 -                            with open("/tmp/loader_debug.log", "a") as f:
 -                                f.write(f"[loop] emitting tool_call event for {tool_call.name}\n")
 -                        except Exception:
 -                            pass
 -                        await emit(AgentEvent(
 -                            type="tool_call",
 -                            tool_name=tool_call.name,
 -                            tool_args=tool_call.arguments,
 -                        ))
 -                    else:
 -                        try:
 -                            with open("/tmp/loader_debug.log", "a") as f:
 -                                f.write(f"[loop] SKIPPING tool_call event for {tool_call.name} (already in pending_seen)\n")
 -                        except Exception:
 -                            pass
+-
 -                    # Track this action for completion checking
 -                    action_desc = f"{tool_call.name}: {str(tool_call.arguments)[:100]}"
 -                    actions_taken.append(action_desc)
+-
 -                    # Check for duplicate actions using safeguards
 -                    is_dup, dup_reason = self.safeguards.check_duplicate(
 -                        tool_call.name, tool_call.arguments
 -                    )
 -                    if is_dup:
 -                        try:
 -                            with open("/tmp/loader_debug.log", "a") as f:
 -                                f.write(f"[loop] SKIPPING duplicate: {dup_reason}\n")
 -                        except Exception:
 -                            pass
 -                        # Add a tool result indicating skip
 -                        self.messages.append(Message(
 -                            role=Role.TOOL,
 -                            content=f"[Skipped - duplicate action: {dup_reason}]",
 -                            tool_call_id=tool_call.id,
 -                        ))
 -                        continue  # Skip to next tool call
+-
 -                    # Pre-action validation
 -                    validation = self.safeguards.validate_action(
 -                        tool_call.name, tool_call.arguments
 -                    )
 -                    if not validation.valid:
 -                        try:
 -                            with open("/tmp/loader_debug.log", "a") as f:
 -                                f.write(f"[loop] BLOCKED by validation: {validation.reason}\n")
 -                        except Exception:
 -                            pass
 -                        # Add a tool result with the validation error
 -                        error_msg = f"[Blocked - {validation.reason}]"
 -                        if validation.suggestion:
 -                            error_msg += f" Suggestion: {validation.suggestion}"
 -                        self.messages.append(Message(
 -                            role=Role.TOOL,
 -                            content=error_msg,
 -                            tool_call_id=tool_call.id,
 -                        ))
 -                        await emit(AgentEvent(
 -                            type="tool_result",
 -                            content=error_msg,
 -                            tool_name=tool_call.name,
 -                            is_error=True,
 -                        ))
 -                        continue  # Skip to next tool call
+-
 -                    # Rollback planning: create rollback action before destructive ops
 -                    if rollback_plan and is_destructive_tool(tool_call.name, tool_call.arguments):
 -                        async def read_file_for_backup(path: str) -> str:
 -                            """Read file contents for backup."""
 -                            result = await self.registry.execute("read", file_path=path)
 -                            return result.output if not result.is_error else ""
+-
 -                        rollback_action = await create_rollback_plan_for_action(
 -                            tool_call.name,
 -                            tool_call.arguments,
 -                            read_file_for_backup,
 -                        )
 -                        if rollback_action:
 -                            rollback_plan.actions.append(rollback_action)
 -                            if self.config.reasoning.show_rollback_plan:
 -                                await emit(AgentEvent(
 -                                    type="rollback",
 -                                    content=f"Rollback tracked: {rollback_action.description}",
 -                                    rollback_action=rollback_action,
 -                                ))
+-
 -                    # Try to execute, handling confirmation if needed
 -                    try:
 -                        result = await self.registry.execute(
 -                            tool_call.name,
 -                            **tool_call.arguments,
 -                        )
 -                    except ConfirmationRequired as conf:
 -                        # Emit confirmation event
 -                        await emit(AgentEvent(
 -                            type="confirmation",
 -                            tool_name=conf.tool_name,
 -                            confirm_message=conf.message,
 -                            confirm_details=conf.details,
 -                        ))
+-
 -                        # If we have a confirmation callback, ask user
 -                        if on_confirmation:
 -                            confirmed = await on_confirmation(
 -                                conf.tool_name,
 -                                conf.message,
 -                                conf.details,
 -                            )
 -                            if confirmed:
 -                                # Re-execute with skip_confirmation
 -                                old_skip = self.registry.skip_confirmation
 -                                self.registry.skip_confirmation = True
 -                                try:
 -                                    result = await self.registry.execute(
 -                                        tool_call.name,
 -                                        **tool_call.arguments,
 -                                    )
 -                                finally:
 -                                    self.registry.skip_confirmation = old_skip
 -                            else:
 -                                # User declined - create a skip result
 -                                from ..tools.base import ToolResult
 -                                result = ToolResult(
 -                                    output=f"Tool {tool_call.name} was declined by user",
 -                                    is_error=False,
 -                                )
 -                        else:
 -                            # No callback - treat as auto-confirmed (for non-TUI mode)
 -                            old_skip = self.registry.skip_confirmation
 -                            self.registry.skip_confirmation = True
 -                            try:
 -                                result = await self.registry.execute(
 -                                    tool_call.name,
 -                                    **tool_call.arguments,
 -                                )
 -                            finally:
 -                                self.registry.skip_confirmation = old_skip
+-
 -                    # Handle errors with recovery
 -                    if result.is_error and self.config.auto_recover:
 -                        # Initialize or update recovery context
 -                        if self._recovery_context is None:
 -                            self._recovery_context = RecoveryContext(
 -                                original_tool=tool_call.name,
 -                                original_args=tool_call.arguments,
 -                                max_retries=self.config.max_recovery_attempts,
 -                            )
+-
 -                        # Check if this or a similar call was already tried (loop detection)
 -                        if self._recovery_context.is_similar_attempt(tool_call.name, tool_call.arguments):
 -                            await emit(AgentEvent(
 -                                type="error",
 -                                content=f"Loop detected: already tried a similar command. Try a DIFFERENT approach (e.g., read a config file first).",
 -                                tool_name=tool_call.name,
 -                            ))
 -                        else:
 -                            # Record this attempt
 -                            self._recovery_context.add_attempt(
 -                                tool_call.name,
 -                                tool_call.arguments,
 -                                result.output,
 -                            )
+-
 -                        # Can we retry?
 -                        if self._recovery_context.can_retry():
 -                            attempt_num = len(self._recovery_context.attempts)
 -                            await emit(AgentEvent(
 -                                type="recovery",
 -                                content=f"Tool failed, attempting recovery ({attempt_num}/{self._recovery_context.max_retries})",
 -                                tool_name=tool_call.name,
 -                                recovery_attempt=attempt_num,
 -                            ))
+-
 -                            # Add recovery prompt for LLM
 -                            recovery_prompt = format_recovery_prompt(
 -                                self._recovery_context,
 -                                tool_call.name,
 -                                tool_call.arguments,
 -                                result.output,
 -                            )
 -                            self.messages.append(Message(
 -                                role=Role.TOOL,
 -                                content=recovery_prompt,
 -                            ))
+-
 -                            # Continue to let LLM try an alternative
 -                            continue
 -                        else:
 -                            # Max retries exceeded
 -                            failure_msg = format_failure_message(self._recovery_context)
 -                            await emit(AgentEvent(
 -                                type="error",
 -                                content=failure_msg,
 -                                tool_name=tool_call.name,
 -                            ))
 -                            self._recovery_context = None
+-
 -                            # Add the final error result
 -                            result_text = format_tool_result(
 -                                tool_call.name,
 -                                failure_msg,
 -                                is_error=True,
 -                            )
 -                            self.messages.append(Message(
 -                                role=Role.TOOL,
 -                                content=result_text,
 -                            ))
 -                            continue
 -                    else:
 -                        # Success or no auto-recover - clear recovery context
 -                        if not result.is_error:
 -                            self._recovery_context = None
 -                            # Record successful action to prevent duplicates
 -                            self.safeguards.record_action(tool_call.name, tool_call.arguments)
+-
 -                            # Check for repetitive loop pattern
 -                            is_loop, loop_desc = self.safeguards.detect_loop()
 -                            if is_loop:
 -                                await emit(AgentEvent(
 -                                    type="error",
 -                                    content=f"Loop detected: {loop_desc}. Stopping to prevent repetitive behavior.",
 -                                ))
 -                                final_response = "I noticed I was repeating the same actions. Let me know what you'd like me to do differently."
 -                                self.messages.append(Message(
 -                                    role=Role.ASSISTANT,
 -                                    content=final_response,
 -                                ))
 -                                await emit(AgentEvent(type="response", content=final_response))
 -                                return final_response
+-
 -                    await emit(AgentEvent(
 -                        type="tool_result",
 -                        content=result.output,
 -                        tool_name=tool_call.name,
 -                        is_error=result.is_error,
 -                    ))
+-
 -                    # Post-action verification
 -                    if cfg.verification and not result.is_error:
 -                        verification = await self._verify_action(
 -                            tool_call.name,
 -                            tool_call.arguments,
 -                            result.output,
 -                        )
 -                        await emit(AgentEvent(
 -                            type="verification",
 -                            content=f"Verified: {verification.verified}",
 -                            verification=verification,
 -                            tool_name=tool_call.name,
 -                        ))
+-
 -                        if not verification.verified and verification.needs_correction:
 -                            # Add correction suggestion for LLM
 -                            correction_msg = (
 -                                f"[VERIFICATION FAILED] The action did not produce expected results.\n"
 -                                f"Discrepancies: {', '.join(verification.discrepancies)}\n"
 -                                f"Suggestion: {verification.correction_suggestion}"
 -                            )
 -                            self.messages.append(Message(
 -                                role=Role.USER,
 -                                content=correction_msg,
 -                            ))
 -                            # Don't add the tool result - let LLM try correction
 -                            continue
+-
 -                    # Add tool result message
 -                    result_text = format_tool_result(
 -                        tool_call.name,
 -                        result.output,
 -                        result.is_error,
 -                    )
 -                    self.messages.append(Message(
 -                        role=Role.TOOL,
 -                        content=result_text,
 -                    ))
+-
 -                # Continue the loop to get next response
 -                continue
+-
 -            # No tool calls - check if model outputted raw JSON tool calls as text
 -            # Some small models do this instead of using the proper API
 -            if not tool_calls:
 -                try:
 -                    with open("/tmp/loader_debug.log", "a") as f:
 -                        f.write(f"[loop] no tool_calls, checking for raw JSON/bracket format in content (len={len(content)})\n")
 -                except Exception:
 -                    pass
 -                raw_tool_calls = self._extract_raw_json_tool_calls(content)
 -                try:
 -                    with open("/tmp/loader_debug.log", "a") as f:
 -                        f.write(f"[loop] _extract_raw_json_tool_calls returned {len(raw_tool_calls)} calls\n")
 -                        for tc in raw_tool_calls:
 -                            f.write(f"[loop]   - {tc.name}: {list(tc.arguments.keys())}\n")
 -                except Exception:
 -                    pass
 -                if raw_tool_calls:
 -                    # Successfully extracted tool calls from raw JSON - use them
 -                    tool_calls = raw_tool_calls
 -                    # Clear the streamed content (it was raw JSON, looks ugly)
 -                    await emit(AgentEvent(type="clear_stream"))
+-
 -            # If we now have tool calls (from raw JSON extraction), execute them
 -            if tool_calls:
 -                extracted_iterations += 1
+-
 -                # Check if we've exceeded extraction limits
 -                if extracted_iterations > MAX_EXTRACTED_ITERATIONS:
 -                    # Model keeps outputting bracket-format calls - stop and let user continue
 -                    final_response = content
 -                    self.messages.append(Message(role=Role.ASSISTANT, content=response_content))
 -                    await emit(AgentEvent(
 -                        type="response",
 -                        content=final_response + "\n\nLet me know if you'd like me to continue or make changes."
 -                    ))
 -                    break
+-
 -                try:
 -                    with open("/tmp/loader_debug.log", "a") as f:
 -                        f.write(f"[loop] executing {len(tool_calls)} extracted tool calls (iteration {extracted_iterations})\n")
 -                except Exception:
 -                    pass
+-
 -                # Track errors in this batch
 -                batch_errors = 0
+-
 -                # This duplicates the tool execution logic above, but that's intentional
 -                # to handle the case where raw JSON tool calls are extracted
 -                for i, tc in enumerate(tool_calls):
 -                    # Skip browser/display commands that don't work in terminal
 -                    if tc.name == "bash":
 -                        cmd = tc.arguments.get("command", "")
 -                        if any(x in cmd for x in ["xdg-open", "open ", "firefox", "chrome", "browser"]):
 -                            try:
 -                                with open("/tmp/loader_debug.log", "a") as f:
 -                                    f.write(f"[loop] skipping browser command: {cmd[:50]}\n")
 -                            except Exception:
 -                                pass
 -                            continue
+-
 -                    # Use safeguards for duplicate checking
 -                    is_dup, dup_reason = self.safeguards.check_duplicate(tc.name, tc.arguments)
 -                    if is_dup:
 -                        try:
 -                            with open("/tmp/loader_debug.log", "a") as f:
 -                                f.write(f"[loop] skipping duplicate: {dup_reason}\n")
 -                        except Exception:
 -                            pass
 -                        continue
+-
 -                    # Pre-action validation
 -                    validation = self.safeguards.validate_action(tc.name, tc.arguments)
 -                    if not validation.valid:
 -                        try:
 -                            with open("/tmp/loader_debug.log", "a") as f:
 -                                f.write(f"[loop] BLOCKED by validation: {validation.reason}\n")
 -                        except Exception:
 -                            pass
 -                        error_msg = f"[Blocked - {validation.reason}]"
 -                        if validation.suggestion:
 -                            error_msg += f" Suggestion: {validation.suggestion}"
 -                        await emit(AgentEvent(
 -                            type="tool_result",
 -                            content=error_msg,
 -                            tool_name=tc.name,
 -                            is_error=True,
 -                        ))
 -                        self.messages.append(Message(
 -                            role=Role.TOOL,
 -                            content=error_msg,
 -                        ))
 -                        batch_errors += 1
 -                        continue
+-
 -                    # Small delay between tool executions for better UX
 -                    if i > 0:
 -                        await asyncio.sleep(0.4)
 -                    try:
 -                        with open("/tmp/loader_debug.log", "a") as f:
 -                            f.write(f"[loop] executing extracted tool: {tc.name} args={tc.arguments}\n")
 -                    except Exception:
 -                        pass
 -                    actions_taken.append(f"{tc.name}: {str(tc.arguments)[:50]}...")
 -                    await emit(AgentEvent(
 -                        type="tool_call",
 -                        tool_name=tc.name,
 -                        tool_args=tc.arguments,
 -                    ))
+-
 -                    # Execute the tool
 -                    is_error = False
 -                    try:
 -                        result = await self.registry.execute(tc.name, **tc.arguments)
 -                        result_text = result.output
 -                        is_error = result.is_error
 -                    except ConfirmationRequired as e:
 -                        # Emit confirmation event
 -                        await emit(AgentEvent(
 -                            type="confirmation",
 -                            tool_name=e.tool_name,
 -                            confirm_message=e.message,
 -                            confirm_details=e.details,
 -                        ))
 -                        if on_confirmation:
 -                            confirmed = await on_confirmation(tc.name, e.message, e.details)
 -                            if confirmed:
 -                                # Re-execute with skip_confirmation
 -                                old_skip = self.registry.skip_confirmation
 -                                self.registry.skip_confirmation = True
 -                                try:
 -                                    result = await self.registry.execute(tc.name, **tc.arguments)
 -                                    result_text = result.output
 -                                    is_error = result.is_error
 -                                finally:
 -                                    self.registry.skip_confirmation = old_skip
 -                            else:
 -                                result_text = "Tool execution cancelled by user."
 -                        else:
 -                            # No callback - auto-confirm for extracted tool calls
 -                            old_skip = self.registry.skip_confirmation
 -                            self.registry.skip_confirmation = True
 -                            try:
 -                                result = await self.registry.execute(tc.name, **tc.arguments)
 -                                result_text = result.output
 -                                is_error = result.is_error
 -                            finally:
 -                                self.registry.skip_confirmation = old_skip
 -                    except Exception as e:
 -                        result_text = f"Error: {e}"
 -                        is_error = True
+-
 -                    # Track errors
 -                    if is_error:
 -                        batch_errors += 1
 -                        consecutive_errors += 1
 -                    else:
 -                        consecutive_errors = 0  # Reset on success
 -                        # Record successful action to prevent duplicates
 -                        self.safeguards.record_action(tc.name, tc.arguments)
+-
 -                        # Check for repetitive loop pattern
 -                        is_loop, loop_desc = self.safeguards.detect_loop()
 -                        if is_loop:
 -                            await emit(AgentEvent(
 -                                type="error",
 -                                content=f"Loop detected: {loop_desc}. Stopping to prevent repetitive behavior.",
 -                            ))
 -                            final_response = "I noticed I was repeating the same actions. Let me know what you'd like me to do differently."
 -                            self.messages.append(Message(
 -                                role=Role.ASSISTANT,
 -                                content=final_response,
 -                            ))
 -                            await emit(AgentEvent(type="response", content=final_response))
 -                            return final_response
+-
 -                    await emit(AgentEvent(
 -                        type="tool_result",
 -                        content=result_text,
 -                        tool_name=tc.name,
 -                        is_error=is_error,
 -                    ))
+-
 -                    self.messages.append(Message(
 -                        role=Role.ASSISTANT,
 -                        content=response_content,
 -                    ))
 -                    self.messages.append(Message(
 -                        role=Role.TOOL,
 -                        content=result_text,
 -                    ))
+-
 -                # After executing batch, check if we should stop
 -                # Stop if: all tools in batch failed, or we have many consecutive errors
 -                if batch_errors == len(tool_calls) or consecutive_errors >= 3:
 -                    # All failed or too many consecutive errors - stop trying
 -                    final_response = "I ran into some issues. Let me know if you'd like me to try a different approach."
 -                    await emit(AgentEvent(type="response", content=final_response))
 -                    break
+-
 -                continue
+-
 -            # No tool calls - check if model is describing instead of acting
 -            # IMPORTANT: Check ORIGINAL content before safeguards filtered it!
 -            # Debug log
 -            try:
 -                has_unexecuted = self._contains_unexecuted_code(response_content)
 -                with open("/tmp/loader_debug.log", "a") as f:
 -                    f.write(f"[chatbot-check] iterations={iterations}, has_unexecuted={has_unexecuted}\n")
 -                    f.write(f"[chatbot-check] response_content (first 200): {response_content[:200]}\n")
 -                    f.write(f"[chatbot-check] filtered content (first 200): {content[:200]}\n")
 -            except Exception:
 -                pass
+-
 -            if self._contains_unexecuted_code(response_content) and iterations < self.config.max_iterations - 1:
 -                # Model outputted code blocks without using tools - nudge it
 -                try:
 -                    with open("/tmp/loader_debug.log", "a") as f:
 -                        f.write(f"[chatbot-check] TRIGGERING chatbot recovery\n")
 -                except Exception:
 -                    pass
 -                # Silently steer - don't show error to user (internal correction)
 -                self.messages.append(Message(
 -                    role=Role.ASSISTANT,
 -                    content=response_content,
 -                ))
 -                self.messages.append(Message(
 -                    role=Role.USER,
 -                    content="CRITICAL ERROR: You are PRETENDING to use tools instead of actually using them.\n\n"
 -                            "DO NOT write:\n"
 -                            "- 'Used bash tool with command...' (THIS IS FAKE)\n"
 -                            "- 'Created a file using the write tool...' (THIS IS FAKE)\n"
 -                            "- 'Here is what I did:' followed by descriptions\n"
 -                            "- Numbered steps or instructions\n"
 -                            "- Code blocks for me to copy\n\n"
 -                            "Your tool calls MUST go through the proper tool interface.\n"
 -                            "Writing 'Used bash tool...' does NOT execute anything!\n\n"
 -                            "ACTUALLY call the tools using the tool_call mechanism.\n"
 -                            "DO IT NOW - stop narrating and start executing.",
 -                ))
 -                continue
+-
 -            # No tool calls and early in the task - MAY be giving up too soon
 -            # But only intervene if we haven't done ANY work yet
 -            if not self.use_react and len(actions_taken) == 0 and iterations < self.config.max_iterations - 2:
 -                # Check if response looks like deflection without having done anything
 -                deflection_phrases = ["you can", "you should", "you could", "try running"]
 -                looks_like_deflection = any(p in content.lower() for p in deflection_phrases)
+-
 -                if looks_like_deflection:
 -                    self.messages.append(Message(
 -                        role=Role.ASSISTANT,
 -                        content=response_content,
 -                    ))
 -                    self.messages.append(Message(
 -                        role=Role.USER,
 -                        content="Please use your tools to execute the task rather than telling me what to do.",
 -                    ))
 -                    continue
+-
 -            # Self-critique before finalizing (if enabled and response has substance)
 -            cfg = self.config.reasoning
 -            if cfg.self_critique and len(content) > 100:
 -                # Check if we should critique this response
 -                is_code_response = "```" in content or any(
 -                    keyword in content.lower()
 -                    for keyword in ["def ", "function ", "class ", "import "]
 -                )
 -                if should_self_critique(content, is_code=is_code_response):
 -                    context = task
 -                    critique = await self._self_critique(content, context)
+-
 -                    await emit(AgentEvent(
 -                        type="critique",
 -                        content=f"Self-critique: {len(critique.issues_found)} issues found",
 -                        critique=critique,
 -                    ))
+-
 -                    if critique.can_revise():
 -                        # Ask for revision
 -                        revision_msg = (
 -                            f"[SELF-CRITIQUE] Review your response:\n"
 -                            f"Issues found: {', '.join(critique.issues_found)}\n"
 -                            f"Suggestions: {', '.join(critique.suggestions)}\n\n"
 -                            "Please provide an improved response addressing these issues."
 -                        )
 -                        self.messages.append(Message(
 -                            role=Role.ASSISTANT,
 -                            content=response_content,
 -                        ))
 -                        self.messages.append(Message(
 -                            role=Role.USER,
 -                            content=revision_msg,
 -                        ))
 -                        critique.revision_count += 1
 -                        continue  # Loop to get revised response
+-
 -            # Check for text loop (agent repeating the same response)
 -            is_text_loop, text_loop_desc = self.safeguards.detect_text_loop(content)
 -            if is_text_loop:
 -                await emit(AgentEvent(
 -                    type="error",
 -                    content=f"Text loop detected: {text_loop_desc}. Stopping.",
 -                ))
 -                final_response = "I seem to be repeating myself. Let me know if you'd like me to try a different approach."
 -                self.messages.append(Message(
 -                    role=Role.ASSISTANT,
 -                    content=final_response,
 -                ))
 -                await emit(AgentEvent(type="response", content=final_response))
 -                return final_response
+-
 -            # Record response for future loop detection
 -            self.safeguards.record_response(content)
+-
 -            # Task completion check - don't give up too early!
 -            # Use original_task if available (for multi-turn conversations)
 -            effective_task = original_task or task
 -            if cfg.completion_check and continuation_count < cfg.max_continuation_prompts:
 -                # Quick heuristic check first
 -                if cfg.use_quick_completion:
 -                    is_premature = detect_premature_completion(effective_task, content, actions_taken)
 -                else:
 -                    is_premature = False
+-
 -                if is_premature:
 -                    continuation_count += 1
 -                    continuation_prompt = get_continuation_prompt(effective_task, actions_taken, content)
+-
 -                    await emit(AgentEvent(
 -                        type="completion_check",
 -                        content=f"Task may be incomplete ({len(actions_taken)} actions taken)",
 -                        completion_check=TaskCompletionCheck(
 -                            original_task=effective_task,
 -                            is_complete=False,
 -                            accomplished=[a.split(":")[0] for a in actions_taken],
 -                            continuation_prompt=continuation_prompt,
 -                        ),
 -                    ))
+-
 -                    # Add the assistant's response and nudge to continue
 -                    self.messages.append(Message(
 -                        role=Role.ASSISTANT,
 -                        content=response_content,
 -                    ))
 -                    self.messages.append(Message(
 -                        role=Role.USER,
 -                        content=continuation_prompt,
 -                    ))
 -                    continue  # Loop to get continuation
+-
 -            # This is the final response
 -            final_response = content
+-
 -            # If we completed actions, add follow-up question to encourage continued conversation
 -            if actions_taken and final_response.strip():
 -                # Only add if the response doesn't already end with a question
 -                if not final_response.rstrip().endswith('?'):
 -                    final_response = final_response.rstrip() + "\n\nWould you like me to make any changes or additions?"
+-
 -            self.messages.append(Message(
 -                role=Role.ASSISTANT,
 -                content=response_content,
 -            ))
+-
 -            # Emit rollback plan summary if we tracked any actions
 -            if rollback_plan and rollback_plan.actions:
 -                await emit(AgentEvent(
 -                    type="rollback_summary",
 -                    content=f"Rollback plan: {len(rollback_plan.actions)} action(s) tracked",
 -                    rollback_plan=rollback_plan,
 -                ))
+-
 -            await emit(AgentEvent(type="response", content=final_response))
 -            break
+-
 -        return final_response
 +        runtime = ConversationRuntime(self)
 +        self.last_turn_summary = await runtime.run_turn(
 +            task,
 +            emit,
 +            on_confirmation=on_confirmation,
 +            original_task=original_task,
 +        )
 +        return self.last_turn_summary.final_response
      async def run_streaming(
          self,
      def clear_history(self) -> None:
          """Clear conversation history."""
          self.messages = []
 +        self.session = ConversationSession(
 +            system_message_factory=self._get_system_message,
 +            few_shot_factory=self._get_few_shot_examples,
 +            messages=self.messages,
 +        )
          self._recovery_context = None
          self._current_task = None
 -        self._executed_commands = set()  # Clear command dedup tracking
 +        self.last_turn_summary = None
          self.safeguards.reset()  # Reset all runtime safeguards

src/loader/llm/base.pymodified

  """Base classes for LLM backends."""
  from abc import ABC, abstractmethod
 +from collections.abc import AsyncIterator
  from dataclasses import dataclass, field
  from enum import Enum
 -from typing import AsyncIterator, Any
 +from typing import Any
  class Role(str, Enum):
      tool_calls: list[ToolCall] = field(default_factory=list)
      tool_results: list[ToolResult] = field(default_factory=list)
 +    @classmethod
 +    def tool_result_message(
 +        cls,
 +        *,
 +        tool_call_id: str,
 +        display_content: str,
 +        result_content: str,
 +        is_error: bool = False,
 +    ) -> "Message":
 +        """Build a tool-result message with a typed tool result payload."""
++
 +        return cls(
 +            role=Role.TOOL,
 +            content=display_content,
 +            tool_results=[
 +                ToolResult(
 +                    tool_call_id=tool_call_id,
 +                    content=result_content,
 +                    is_error=is_error,
 +                )
 +            ],
 +        )
++
      def to_dict(self) -> dict[str, Any]:
          """Convert to dict for API calls."""
          result: dict[str, Any] = {
                  {"id": tc.id, "name": tc.name, "arguments": tc.arguments}
                  for tc in self.tool_calls
+             ]
 +        if self.tool_results:
 +            primary_result = self.tool_results[0]
 +            result["tool_call_id"] = primary_result.tool_call_id
 +            result["is_error"] = primary_result.is_error
          return result

src/loader/runtime/__init__.pymodified

  """Runtime primitives for Loader's turn engine."""
 -from .capabilities import CapabilityProfile, resolve_capability_profile
 +from .capabilities import (
 +    CapabilityProfile,
 +    resolve_backend_capability_profile,
 +    resolve_capability_profile,
 +)
  from .events import AgentEvent, TurnSummary
  from .session import ConversationSession
  from .tracing import RuntimeTraceEvent, RuntimeTracer
      "RuntimeTraceEvent",
      "RuntimeTracer",
      "TurnSummary",
 +    "resolve_backend_capability_profile",
      "resolve_capability_profile",
+ ]

src/loader/runtime/capabilities.pymodified

  from __future__ import annotations
  from dataclasses import dataclass, field
 -from typing import Any, Literal
 +from typing import Any, Literal, Protocol
  ToolCallFormat = Literal["native", "json_tag", "bracket"]
  VerificationStrictness = Literal["lax", "standard", "strict"]
 +class SupportsCapabilityProfile(Protocol):
 +    """Runtime interface for backends that can describe capabilities."""
++
 +    def capability_profile(self) -> CapabilityProfile: ...
++
++
 +class SupportsNativeTools(Protocol):
 +    """Runtime interface for backends that can explicitly report tool support."""
++
 +    def supports_native_tools(self) -> bool: ...
++
++
  @dataclass(frozen=True)
  class CapabilityProfile:
      """Resolved model/runtime capability profile."""
          verification_strictness="standard",
          notes=["Unknown model family; defaulting to safe ReAct-style tool use."],
+     )
++
++
 +def resolve_backend_capability_profile(backend: Any) -> CapabilityProfile:
 +    """Resolve capabilities from the backend first, then fall back to model heuristics."""
++
 +    explicit_profile = getattr(backend, "capability_profile", None)
 +    if callable(explicit_profile):
 +        profile = explicit_profile()
 +        if isinstance(profile, CapabilityProfile):
 +            return profile
++
 +    model_name = getattr(backend, "model", backend.__class__.__name__)
 +    explicit_native_tools = getattr(backend, "supports_native_tools", None)
 +    if callable(explicit_native_tools):
 +        supports_native_tools = bool(explicit_native_tools())
 +        preferred_tool_call_format: ToolCallFormat = (
 +            "native" if supports_native_tools else "json_tag"
 +        )
 +        return _profile(
 +            model_name,
 +            supports_native_tools=supports_native_tools,
 +            preferred_tool_call_format=preferred_tool_call_format,
 +            verification_strictness="standard",
 +            notes=["Resolved from backend capability surface."],
 +        )
++
 +    return resolve_capability_profile(model_name)

src/loader/runtime/conversation.pyadded

 +"""Typed turn engine for Loader runtime execution."""
++
 +from __future__ import annotations
++
 +from collections.abc import Awaitable, Callable
 +from dataclasses import dataclass, field
 +from typing import Any
++
 +from ..agent.parsing import parse_tool_calls
 +from ..agent.reasoning import (
 +    RollbackPlan,
 +    TaskCompletionCheck,
 +    create_rollback_plan_for_action,
 +    detect_premature_completion,
 +    estimate_complexity,
 +    get_continuation_prompt,
 +    get_token_budget,
 +    is_destructive_tool,
 +    should_self_critique,
 +)
 +from ..agent.recovery import RecoveryContext, format_failure_message, format_recovery_prompt
 +from ..llm.base import Message, Role, ToolCall
 +from .events import AgentEvent, TurnSummary
 +from .executor import ToolExecutionState, ToolExecutor
 +from .tracing import RuntimeTracer
++
 +EventSink = Callable[[AgentEvent], Awaitable[None]]
 +ConfirmationHandler = Callable[[str, str, str], Awaitable[bool]] | None
++
++
 +@dataclass
 +class AssistantTurn:
 +    """Assistant output for one iteration of the conversation loop."""
++
 +    content: str
 +    response_content: str
 +    tool_calls: list[ToolCall]
 +    pending_tool_calls_seen: set[str] = field(default_factory=set)
 +    usage: dict[str, int] = field(default_factory=dict)
++
++
 +class ConversationRuntime:
 +    """Runs one explicit conversation turn against the current session."""
++
 +    def __init__(self, agent: Any) -> None:
 +        self.agent = agent
 +        self.tracer = RuntimeTracer()
 +        self.executor = ToolExecutor(agent.registry, agent.safeguards, self.tracer)
++
 +    async def run_turn(
 +        self,
 +        task: str,
 +        emit: EventSink,
 +        on_confirmation: ConfirmationHandler = None,
 +        original_task: str | None = None,
 +    ) -> TurnSummary:
 +        """Run one task turn and return a structured summary."""
++
 +        iterations = 0
 +        final_response = ""
 +        actions_taken: list[str] = []
 +        continuation_count = 0
 +        empty_retry_count = 0
 +        max_empty_retries = 5
 +        extracted_iterations = 0
 +        max_extracted_iterations = 3
 +        consecutive_errors = 0
++
 +        complexity = estimate_complexity(task)
 +        max_tokens, _ = get_token_budget(complexity)
 +        effective_max_tokens = min(self.agent.config.max_tokens, max(max_tokens, 512))
++
 +        rollback_plan = RollbackPlan() if self.agent.config.reasoning.rollback else None
 +        summary = TurnSummary(final_response="")
++
 +        while iterations < self.agent.config.max_iterations:
 +            iterations += 1
 +            summary.iterations = iterations
 +            self.tracer.record("turn.iteration_started", iteration=iterations)
++
 +            if iterations == 1 and len(self.agent.messages) == 1:
 +                task_lower = task.lower()
 +                action_keywords = [
 +                    "create",
 +                    "write",
 +                    "make",
 +                    "run",
 +                    "execute",
 +                    "build",
 +                    "install",
 +                    "delete",
 +                    "remove",
 +                    "add",
 +                    "edit",
 +                    "modify",
 +                    "update",
 +                    "fix",
 +                ]
 +                if any(keyword in task_lower for keyword in action_keywords):
 +                    self.agent.session.append(Message(role=Role.ASSISTANT, content="["))
++
 +            steering_messages = self.agent._drain_steering_queue()
 +            for steering_message in steering_messages:
 +                await emit(AgentEvent(type="steering", content=steering_message))
 +                self.agent.session.append(
 +                    Message(
 +                        role=Role.USER,
 +                        content=f"[USER INTERRUPTION]: {steering_message}",
 +                    )
 +                )
++
 +            await emit(AgentEvent(type="thinking"))
 +            assistant_turn = await self._request_assistant_turn(
 +                emit=emit,
 +                max_tokens=effective_max_tokens,
 +            )
 +            self._merge_usage(summary.usage, assistant_turn.usage)
++
 +            content = assistant_turn.content
 +            response_content = assistant_turn.response_content
 +            tool_calls = list(assistant_turn.tool_calls)
 +            pending_tool_calls_seen = set(assistant_turn.pending_tool_calls_seen)
++
 +            if not content.strip():
 +                empty_retry_count += 1
 +                if empty_retry_count <= max_empty_retries:
 +                    task_context = original_task or task
 +                    retry_prompts = [
 +                        "Great! Now let me proceed with the task. I'll start by using my tools.",
 +                        "I understand. Let me create that now using my tools (write, bash, etc.).",
 +                        f"Proceeding with: {task_context[:80]}. I'll use the write tool to create the files.",
 +                        "Starting now. First step: create the necessary files and directories.",
 +                        f"Let me complete this task step by step. The goal is: {task_context[:100]}",
 +                    ]
 +                    prompt = retry_prompts[min(empty_retry_count - 1, len(retry_prompts) - 1)]
 +                    self.agent.session.append(Message(role=Role.ASSISTANT, content=prompt))
 +                    continue
++
 +                final_response = (
 +                    "I need a bit more direction. What specifically would you like me to create or do?"
 +                )
 +                summary.final_response = final_response
 +                summary.failures.append("assistant returned empty output repeatedly")
 +                await emit(AgentEvent(type="response", content=final_response))
 +                break
++
 +            if self.agent.use_react:
 +                parsed = parse_tool_calls(content)
 +                tool_calls = parsed.tool_calls
 +                content = parsed.content
++
 +                if parsed.is_final_answer and not tool_calls:
 +                    assistant_message = Message(role=Role.ASSISTANT, content=response_content)
 +                    self.agent.session.append(assistant_message)
 +                    summary.assistant_messages.append(assistant_message)
 +                    final_response = content
 +                    summary.final_response = final_response
 +                    self.tracer.record("turn.completed", reason="final_answer")
 +                    await emit(AgentEvent(type="response", content=final_response))
 +                    break
++
 +            tool_source = "native"
 +            if not tool_calls:
 +                raw_tool_calls = self.agent._extract_raw_json_tool_calls(response_content)
 +                if raw_tool_calls:
 +                    tool_calls = raw_tool_calls
 +                    tool_source = "raw_text"
 +                    await emit(AgentEvent(type="clear_stream"))
++
 +            if tool_calls:
 +                if tool_source == "raw_text":
 +                    extracted_iterations += 1
 +                    if extracted_iterations > max_extracted_iterations:
 +                        final_response = (
 +                            content
 +                            + "\n\nLet me know if you'd like me to continue or make changes."
 +                        )
 +                        assistant_message = Message(role=Role.ASSISTANT, content=response_content)
 +                        self.agent.session.append(assistant_message)
 +                        summary.assistant_messages.append(assistant_message)
 +                        summary.final_response = final_response
 +                        summary.failures.append("raw tool extraction exceeded iteration budget")
 +                        await emit(AgentEvent(type="response", content=final_response))
 +                        break
++
 +                assistant_message = Message(
 +                    role=Role.ASSISTANT,
 +                    content=response_content,
 +                    tool_calls=tool_calls,
 +                )
 +                self.agent.session.append(assistant_message)
 +                summary.assistant_messages.append(assistant_message)
 +                self.tracer.record(
 +                    "assistant.tool_batch",
 +                    tool_count=len(tool_calls),
 +                    source=tool_source,
 +                )
++
 +                for tool_call in tool_calls:
 +                    cfg = self.agent.config.reasoning
++
 +                    if cfg.confidence_scoring:
 +                        context = "\n".join(
 +                            message.content[:500]
 +                            for message in self.agent.messages[-5:]
 +                            if message.content
 +                        )
 +                        confidence = await self.agent._assess_confidence(
 +                            tool_call.name,
 +                            tool_call.arguments,
 +                            context,
 +                        )
 +                        await emit(
 +                            AgentEvent(
 +                                type="confidence",
 +                                content=f"Confidence: {confidence.level.name} ({confidence.score}/5)",
 +                                confidence=confidence,
 +                                tool_name=tool_call.name,
 +                            )
 +                        )
 +                        if confidence.score < cfg.min_confidence_for_action:
 +                            low_confidence_message = (
 +                                "[LOW CONFIDENCE WARNING] The planned action has low confidence "
 +                                f"({confidence.level.name}).\n"
 +                                f"Reasoning: {confidence.reasoning}\n"
 +                                f"Risks: {', '.join(confidence.risks)}\n"
 +                                "Consider an alternative approach or gather more information first."
 +                            )
 +                            self.agent.session.append(
 +                                Message(role=Role.USER, content=low_confidence_message)
 +                            )
 +                            continue
++
 +                    if tool_call.id not in pending_tool_calls_seen:
 +                        await emit(
 +                            AgentEvent(
 +                                type="tool_call",
 +                                tool_name=tool_call.name,
 +                                tool_args=tool_call.arguments,
 +                            )
 +                        )
++
 +                    actions_taken.append(f"{tool_call.name}: {str(tool_call.arguments)[:100]}")
++
 +                    if rollback_plan and is_destructive_tool(tool_call.name, tool_call.arguments):
++
 +                        async def read_file_for_backup(path: str) -> str:
 +                            read_result = await self.agent.registry.execute("read", file_path=path)
 +                            return read_result.output if not read_result.is_error else ""
++
 +                        rollback_action = await create_rollback_plan_for_action(
 +                            tool_call.name,
 +                            tool_call.arguments,
 +                            read_file_for_backup,
 +                        )
 +                        if rollback_action:
 +                            rollback_plan.actions.append(rollback_action)
 +                            if self.agent.config.reasoning.show_rollback_plan:
 +                                await emit(
 +                                    AgentEvent(
 +                                        type="rollback",
 +                                        content=f"Rollback tracked: {rollback_action.description}",
 +                                        rollback_action=rollback_action,
 +                                    )
 +                                )
++
 +                    outcome = await self.executor.execute_tool_call(
 +                        tool_call,
 +                        on_confirmation=on_confirmation,
 +                        emit_confirmation=self._emit_confirmation(emit),
 +                        source=tool_source,
 +                    )
++
 +                    if (
 +                        outcome.state == ToolExecutionState.EXECUTED
 +                        and outcome.is_error
 +                        and self.agent.config.auto_recover
 +                    ):
 +                        recovery_result = await self._handle_recovery(tool_call, outcome, emit)
 +                        if recovery_result is not None:
 +                            summary.tool_result_messages.append(recovery_result)
 +                            self.agent.session.append(recovery_result)
 +                            continue
++
 +                    if outcome.state == ToolExecutionState.EXECUTED and not outcome.is_error:
 +                        self.agent._recovery_context = None
 +                        is_loop, loop_description = self.agent.safeguards.detect_loop()
 +                        if is_loop:
 +                            final_response = (
 +                                "I noticed I was repeating the same actions. "
 +                                "Let me know what you'd like me to do differently."
 +                            )
 +                            summary.final_response = final_response
 +                            summary.failures.append(loop_description)
 +                            loop_message = Message(role=Role.ASSISTANT, content=final_response)
 +                            self.agent.session.append(loop_message)
 +                            summary.assistant_messages.append(loop_message)
 +                            await emit(
 +                                AgentEvent(
 +                                    type="error",
 +                                    content=(
 +                                        f"Loop detected: {loop_description}. "
 +                                        "Stopping to prevent repetitive behavior."
 +                                    ),
 +                                )
 +                            )
 +                            await emit(AgentEvent(type="response", content=final_response))
 +                            return self._finalize_summary(summary)
++
 +                    if outcome.is_error:
 +                        consecutive_errors += 1
 +                    else:
 +                        consecutive_errors = 0
++
 +                    await emit(
 +                        AgentEvent(
 +                            type="tool_result",
 +                            content=outcome.event_content,
 +                            tool_name=tool_call.name,
 +                            is_error=outcome.is_error,
 +                        )
 +                    )
++
 +                    if (
 +                        cfg.verification
 +                        and outcome.state == ToolExecutionState.EXECUTED
 +                        and not outcome.is_error
 +                    ):
 +                        verification = await self.agent._verify_action(
 +                            tool_call.name,
 +                            tool_call.arguments,
 +                            outcome.result_output,
 +                        )
 +                        await emit(
 +                            AgentEvent(
 +                                type="verification",
 +                                content=f"Verified: {verification.verified}",
 +                                verification=verification,
 +                                tool_name=tool_call.name,
 +                            )
 +                        )
 +                        if not verification.verified and verification.needs_correction:
 +                            correction_message = (
 +                                "[VERIFICATION FAILED] The action did not produce expected results.\n"
 +                                f"Discrepancies: {', '.join(verification.discrepancies)}\n"
 +                                f"Suggestion: {verification.correction_suggestion}"
 +                            )
 +                            self.agent.session.append(
 +                                Message(role=Role.USER, content=correction_message)
 +                            )
 +                            continue
++
 +                    self.agent.session.append(outcome.message)
 +                    summary.tool_result_messages.append(outcome.message)
++
 +                if consecutive_errors >= 3:
 +                    final_response = (
 +                        "I ran into some issues. Let me know if you'd like me to try a different approach."
 +                    )
 +                    summary.final_response = final_response
 +                    summary.failures.append("three consecutive tool errors")
 +                    await emit(AgentEvent(type="response", content=final_response))
 +                    break
++
 +                continue
++
 +            if self.agent._contains_unexecuted_code(response_content):
 +                if iterations < self.agent.config.max_iterations - 1:
 +                    self.agent.session.append(Message(role=Role.ASSISTANT, content=response_content))
 +                    self.agent.session.append(
 +                        Message(
 +                            role=Role.USER,
 +                            content=(
 +                                "CRITICAL ERROR: You are PRETENDING to use tools instead of actually "
 +                                "using them.\n\n"
 +                                "DO NOT write:\n"
 +                                "- 'Used bash tool with command...' (THIS IS FAKE)\n"
 +                                "- 'Created a file using the write tool...' (THIS IS FAKE)\n"
 +                                "- 'Here is what I did:' followed by descriptions\n"
 +                                "- Numbered steps or instructions\n"
 +                                "- Code blocks for me to copy\n\n"
 +                                "Your tool calls MUST go through the proper tool interface.\n"
 +                                "Writing 'Used bash tool...' does NOT execute anything!\n\n"
 +                                "ACTUALLY call the tools using the tool_call mechanism.\n"
 +                                "DO IT NOW - stop narrating and start executing."
 +                            ),
 +                        )
 +                    )
 +                    continue
++
 +            if (
 +                not self.agent.use_react
 +                and len(actions_taken) == 0
 +                and iterations < self.agent.config.max_iterations - 2
 +            ):
 +                deflection_phrases = ["you can", "you should", "you could", "try running"]
 +                if any(phrase in content.lower() for phrase in deflection_phrases):
 +                    self.agent.session.append(Message(role=Role.ASSISTANT, content=response_content))
 +                    self.agent.session.append(
 +                        Message(
 +                            role=Role.USER,
 +                            content="Please use your tools to execute the task rather than telling me what to do.",
 +                        )
 +                    )
 +                    continue
++
 +            cfg = self.agent.config.reasoning
 +            if cfg.self_critique and len(content) > 100:
 +                is_code_response = "```" in content or any(
 +                    keyword in content.lower()
 +                    for keyword in ["def ", "function ", "class ", "import "]
 +                )
 +                if should_self_critique(content, is_code=is_code_response):
 +                    critique = await self.agent._self_critique(content, task)
 +                    await emit(
 +                        AgentEvent(
 +                            type="critique",
 +                            content=f"Self-critique: {len(critique.issues_found)} issues found",
 +                            critique=critique,
 +                        )
 +                    )
 +                    if critique.can_revise():
 +                        revision_message = (
 +                            "[SELF-CRITIQUE] Review your response:\n"
 +                            f"Issues found: {', '.join(critique.issues_found)}\n"
 +                            f"Suggestions: {', '.join(critique.suggestions)}\n\n"
 +                            "Please provide an improved response addressing these issues."
 +                        )
 +                        self.agent.session.append(Message(role=Role.ASSISTANT, content=response_content))
 +                        self.agent.session.append(Message(role=Role.USER, content=revision_message))
 +                        critique.revision_count += 1
 +                        continue
++
 +            is_text_loop, loop_description = self.agent.safeguards.detect_text_loop(content)
 +            if is_text_loop:
 +                final_response = (
 +                    "I seem to be repeating myself. Let me know if you'd like me to try a different approach."
 +                )
 +                summary.final_response = final_response
 +                summary.failures.append(loop_description)
 +                final_message = Message(role=Role.ASSISTANT, content=final_response)
 +                self.agent.session.append(final_message)
 +                summary.assistant_messages.append(final_message)
 +                await emit(
 +                    AgentEvent(
 +                        type="error",
 +                        content=f"Text loop detected: {loop_description}. Stopping.",
 +                    )
 +                )
 +                await emit(AgentEvent(type="response", content=final_response))
 +                return self._finalize_summary(summary)
++
 +            self.agent.safeguards.record_response(content)
 +            effective_task = original_task or task
 +            if cfg.completion_check and continuation_count < cfg.max_continuation_prompts:
 +                is_premature = (
 +                    detect_premature_completion(effective_task, content, actions_taken)
 +                    if cfg.use_quick_completion
 +                    else False
 +                )
 +                if is_premature:
 +                    continuation_count += 1
 +                    continuation_prompt = get_continuation_prompt(
 +                        effective_task,
 +                        actions_taken,
 +                        content,
 +                    )
 +                    await emit(
 +                        AgentEvent(
 +                            type="completion_check",
 +                            content=f"Task may be incomplete ({len(actions_taken)} actions taken)",
 +                            completion_check=TaskCompletionCheck(
 +                                original_task=effective_task,
 +                                is_complete=False,
 +                                accomplished=[action.split(":")[0] for action in actions_taken],
 +                                continuation_prompt=continuation_prompt,
 +                            ),
 +                        )
 +                    )
 +                    self.agent.session.append(Message(role=Role.ASSISTANT, content=response_content))
 +                    self.agent.session.append(Message(role=Role.USER, content=continuation_prompt))
 +                    continue
++
 +            final_response = content
 +            if actions_taken and final_response.strip() and not final_response.rstrip().endswith("?"):
 +                final_response = (
 +                    final_response.rstrip()
 +                    + "\n\nWould you like me to make any changes or additions?"
 +                )
++
 +            final_message = Message(role=Role.ASSISTANT, content=response_content)
 +            self.agent.session.append(final_message)
 +            summary.assistant_messages.append(final_message)
++
 +            if rollback_plan and rollback_plan.actions:
 +                await emit(
 +                    AgentEvent(
 +                        type="rollback_summary",
 +                        content=f"Rollback plan: {len(rollback_plan.actions)} action(s) tracked",
 +                        rollback_plan=rollback_plan,
 +                    )
 +                )
++
 +            summary.final_response = final_response
 +            await emit(AgentEvent(type="response", content=final_response))
 +            break
++
 +        return self._finalize_summary(summary)
++
 +    async def _request_assistant_turn(
 +        self,
 +        *,
 +        emit: EventSink,
 +        max_tokens: int,
 +    ) -> AssistantTurn:
 +        self.agent.safeguards.code_filter.reset()
 +        tools = None if self.agent.use_react else self.agent.registry.get_schemas()
 +        self.tracer.record(
 +            "assistant.requested",
 +            use_react=self.agent.use_react,
 +            stream=self.agent.config.stream,
 +            max_tokens=max_tokens,
 +        )
++
 +        if self.agent.config.stream:
 +            full_content = ""
 +            full_content_unfiltered = ""
 +            tool_calls: list[ToolCall] = []
 +            pending_tool_calls_seen: set[str] = set()
++
 +            async for chunk in self.agent.backend.stream(
 +                messages=self.agent.session.build_request_messages(),
 +                tools=tools,
 +                temperature=self.agent.config.temperature,
 +                max_tokens=max_tokens,
 +            ):
 +                filtered_content = ""
 +                if chunk.content:
 +                    filtered_content = self.agent.safeguards.filter_stream_chunk(chunk.content)
 +                    full_content_unfiltered += chunk.content
++
 +                if filtered_content or chunk.is_done:
 +                    await emit(
 +                        AgentEvent(
 +                            type="stream",
 +                            content=filtered_content,
 +                            is_stream_end=chunk.is_done,
 +                        )
 +                    )
++
 +                if self.agent.safeguards.should_steer():
 +                    steering_message = self.agent.safeguards.get_steering_message()
 +                    if steering_message:
 +                        self.agent._steering_queue.put_nowait(steering_message)
++
 +                if chunk.pending_tool_call and chunk.pending_tool_call.id not in pending_tool_calls_seen:
 +                    pending_tool_calls_seen.add(chunk.pending_tool_call.id)
 +                    await emit(
 +                        AgentEvent(
 +                            type="tool_call",
 +                            tool_name=chunk.pending_tool_call.name,
 +                            tool_args=chunk.pending_tool_call.arguments,
 +                        )
 +                    )
++
 +                if chunk.is_done:
 +                    full_content = chunk.full_content or full_content_unfiltered
 +                    tool_calls = chunk.tool_calls
++
 +            self.tracer.record(
 +                "assistant.responded",
 +                stream=True,
 +                tool_call_count=len(tool_calls),
 +                content_length=len(full_content),
 +            )
 +            return AssistantTurn(
 +                content=full_content,
 +                response_content=full_content,
 +                tool_calls=tool_calls,
 +                pending_tool_calls_seen=pending_tool_calls_seen,
 +            )
++
 +        response = await self.agent.backend.complete(
 +            messages=self.agent.session.build_request_messages(),
 +            tools=tools,
 +            temperature=self.agent.config.temperature,
 +            max_tokens=max_tokens,
 +        )
 +        response_content = response.content
 +        content = self.agent.safeguards.filter_complete_content(response.content)
 +        tool_calls = response.tool_calls if not self.agent.use_react else []
 +        if self.agent.safeguards.should_steer():
 +            steering_message = self.agent.safeguards.get_steering_message()
 +            if steering_message:
 +                self.agent._steering_queue.put_nowait(steering_message)
 +        self.tracer.record(
 +            "assistant.responded",
 +            stream=False,
 +            tool_call_count=len(tool_calls),
 +            content_length=len(content),
 +        )
 +        return AssistantTurn(
 +            content=content,
 +            response_content=response_content,
 +            tool_calls=tool_calls,
 +            usage=response.usage,
 +        )
++
 +    async def _handle_recovery(
 +        self,
 +        tool_call: ToolCall,
 +        outcome,
 +        emit: EventSink,
 +    ) -> Message | None:
 +        if self.agent._recovery_context is None:
 +            self.agent._recovery_context = RecoveryContext(
 +                original_tool=tool_call.name,
 +                original_args=tool_call.arguments,
 +                max_retries=self.agent.config.max_recovery_attempts,
 +            )
++
 +        if self.agent._recovery_context.is_similar_attempt(tool_call.name, tool_call.arguments):
 +            await emit(
 +                AgentEvent(
 +                    type="error",
 +                    content=(
 +                        "Loop detected: already tried a similar command. "
 +                        "Try a DIFFERENT approach (e.g., read a config file first)."
 +                    ),
 +                    tool_name=tool_call.name,
 +                )
 +            )
 +        else:
 +            self.agent._recovery_context.add_attempt(
 +                tool_call.name,
 +                tool_call.arguments,
 +                outcome.result_output,
 +            )
++
 +        if self.agent._recovery_context.can_retry():
 +            attempt_number = len(self.agent._recovery_context.attempts)
 +            await emit(
 +                AgentEvent(
 +                    type="recovery",
 +                    content=(
 +                        "Tool failed, attempting recovery "
 +                        f"({attempt_number}/{self.agent._recovery_context.max_retries})"
 +                    ),
 +                    tool_name=tool_call.name,
 +                    recovery_attempt=attempt_number,
 +                )
 +            )
 +            recovery_prompt = format_recovery_prompt(
 +                self.agent._recovery_context,
 +                tool_call.name,
 +                tool_call.arguments,
 +                outcome.result_output,
 +            )
 +            return Message.tool_result_message(
 +                tool_call_id=tool_call.id,
 +                display_content=recovery_prompt,
 +                result_content=recovery_prompt,
 +                is_error=True,
 +            )
++
 +        failure_message = format_failure_message(self.agent._recovery_context)
 +        await emit(
 +            AgentEvent(
 +                type="error",
 +                content=failure_message,
 +                tool_name=tool_call.name,
 +            )
 +        )
 +        self.agent._recovery_context = None
 +        return Message.tool_result_message(
 +            tool_call_id=tool_call.id,
 +            display_content=(
 +                f"Observation [{tool_call.name}]: Error: {failure_message}"
 +            ),
 +            result_content=failure_message,
 +            is_error=True,
 +        )
++
 +    def _finalize_summary(self, summary: TurnSummary) -> TurnSummary:
 +        summary.trace = list(self.tracer.events)
 +        return summary
++
 +    @staticmethod
 +    def _merge_usage(target: dict[str, int], update: dict[str, int]) -> None:
 +        for key, value in update.items():
 +            target[key] = target.get(key, 0) + value
++
 +    @staticmethod
 +    def _emit_confirmation(emit: EventSink):
 +        async def _emit(tool_name: str, message: str, details: str) -> None:
 +            await emit(
 +                AgentEvent(
 +                    type="confirmation",
 +                    tool_name=tool_name,
 +                    confirm_message=message,
 +                    confirm_details=details,
 +                )
 +            )
++
 +        return _emit

src/loader/runtime/executor.pyadded

 +"""Unified tool execution path for runtime turns."""
++
 +from __future__ import annotations
++
 +from collections.abc import Awaitable, Callable
 +from dataclasses import dataclass
 +from enum import StrEnum
++
 +from ..agent.parsing import format_tool_result
 +from ..agent.recovery import ErrorCategory, categorize_error
 +from ..llm.base import Message, ToolCall
 +from ..tools.base import ConfirmationRequired, ToolRegistry
 +from ..tools.base import ToolResult as RegistryToolResult
 +from .tracing import RuntimeTracer
++
 +BrowserConfirmation = Callable[[str, str, str], Awaitable[bool]] | None
 +ConfirmationEmitter = Callable[[str, str, str], Awaitable[None]] | None
++
++
 +class ToolExecutionState(StrEnum):
 +    """Outcome states for one tool call."""
++
 +    EXECUTED = "executed"
 +    DUPLICATE = "duplicate"
 +    BLOCKED = "blocked"
 +    DECLINED = "declined"
++
++
 +@dataclass
 +class ToolExecutionOutcome:
 +    """Structured outcome for one tool call."""
++
 +    tool_call: ToolCall
 +    state: ToolExecutionState
 +    message: Message
 +    event_content: str
 +    is_error: bool
 +    result_output: str
 +    error_category: ErrorCategory | None = None
 +    registry_result: RegistryToolResult | None = None
++
++
 +class ToolExecutor:
 +    """Centralizes duplicate checks, validation, execution, and result messages."""
++
 +    def __init__(self, registry: ToolRegistry, safeguards, tracer: RuntimeTracer) -> None:
 +        self.registry = registry
 +        self.safeguards = safeguards
 +        self.tracer = tracer
++
 +    async def execute_tool_call(
 +        self,
 +        tool_call: ToolCall,
 +        *,
 +        on_confirmation: BrowserConfirmation = None,
 +        emit_confirmation: ConfirmationEmitter = None,
 +        source: str,
 +    ) -> ToolExecutionOutcome:
 +        """Execute a tool call through one consistent runtime path."""
++
 +        self.tracer.record(
 +            "tool.received",
 +            tool_name=tool_call.name,
 +            tool_call_id=tool_call.id,
 +            source=source,
 +        )
++
 +        browser_block = self._browser_command_message(tool_call)
 +        if browser_block is not None:
 +            return self._blocked_outcome(tool_call, browser_block)
++
 +        is_duplicate, duplicate_reason = self.safeguards.check_duplicate(
 +            tool_call.name,
 +            tool_call.arguments,
 +        )
 +        if is_duplicate:
 +            self.tracer.record(
 +                "tool.duplicate",
 +                tool_name=tool_call.name,
 +                tool_call_id=tool_call.id,
 +                reason=duplicate_reason,
 +            )
 +            duplicate_message = f"[Skipped - duplicate action: {duplicate_reason}]"
 +            return ToolExecutionOutcome(
 +                tool_call=tool_call,
 +                state=ToolExecutionState.DUPLICATE,
 +                message=Message.tool_result_message(
 +                    tool_call_id=tool_call.id,
 +                    display_content=duplicate_message,
 +                    result_content=duplicate_message,
 +                ),
 +                event_content=duplicate_message,
 +                is_error=False,
 +                result_output=duplicate_message,
 +            )
++
 +        validation = self.safeguards.validate_action(tool_call.name, tool_call.arguments)
 +        if not validation.valid:
 +            error_message = f"[Blocked - {validation.reason}]"
 +            if validation.suggestion:
 +                error_message += f" Suggestion: {validation.suggestion}"
 +            self.tracer.record(
 +                "tool.blocked",
 +                tool_name=tool_call.name,
 +                tool_call_id=tool_call.id,
 +                reason=validation.reason,
 +            )
 +            return self._blocked_outcome(tool_call, error_message)
++
 +        result = await self._execute_registry(
 +            tool_call,
 +            on_confirmation,
 +            emit_confirmation,
 +        )
 +        result_text = format_tool_result(
 +            tool_call.name,
 +            result.output,
 +            result.is_error,
 +        )
 +        if not result.is_error:
 +            self.safeguards.record_action(tool_call.name, tool_call.arguments)
++
 +        category = categorize_error(result.output) if result.is_error else None
 +        state = ToolExecutionState.EXECUTED
 +        if result.output == f"Tool {tool_call.name} was declined by user":
 +            state = ToolExecutionState.DECLINED
++
 +        self.tracer.record(
 +            "tool.executed",
 +            tool_name=tool_call.name,
 +            tool_call_id=tool_call.id,
 +            state=state,
 +            is_error=result.is_error,
 +        )
 +        return ToolExecutionOutcome(
 +            tool_call=tool_call,
 +            state=state,
 +            message=Message.tool_result_message(
 +                tool_call_id=tool_call.id,
 +                display_content=result_text,
 +                result_content=result.output,
 +                is_error=result.is_error,
 +            ),
 +            event_content=result.output,
 +            is_error=result.is_error,
 +            result_output=result.output,
 +            error_category=category,
 +            registry_result=result,
 +        )
++
 +    def _blocked_outcome(self, tool_call: ToolCall, message: str) -> ToolExecutionOutcome:
 +        return ToolExecutionOutcome(
 +            tool_call=tool_call,
 +            state=ToolExecutionState.BLOCKED,
 +            message=Message.tool_result_message(
 +                tool_call_id=tool_call.id,
 +                display_content=message,
 +                result_content=message,
 +                is_error=True,
 +            ),
 +            event_content=message,
 +            is_error=True,
 +            result_output=message,
 +            error_category=categorize_error(message),
 +        )
++
 +    async def _execute_registry(
 +        self,
 +        tool_call: ToolCall,
 +        on_confirmation: BrowserConfirmation,
 +        emit_confirmation: ConfirmationEmitter,
 +    ) -> RegistryToolResult:
 +        try:
 +            return await self.registry.execute(tool_call.name, **tool_call.arguments)
 +        except ConfirmationRequired as confirmation:
 +            self.tracer.record(
 +                "tool.confirmation_requested",
 +                tool_name=confirmation.tool_name,
 +                tool_call_id=tool_call.id,
 +            )
 +            if emit_confirmation:
 +                await emit_confirmation(
 +                    confirmation.tool_name,
 +                    confirmation.message,
 +                    confirmation.details,
 +                )
 +            if on_confirmation:
 +                confirmed = await on_confirmation(
 +                    confirmation.tool_name,
 +                    confirmation.message,
 +                    confirmation.details,
 +                )
 +            else:
 +                confirmed = True
++
 +            if not confirmed:
 +                return RegistryToolResult(
 +                    output=f"Tool {tool_call.name} was declined by user",
 +                    is_error=False,
 +                )
++
 +            previous_skip = self.registry.skip_confirmation
 +            self.registry.skip_confirmation = True
 +            try:
 +                return await self.registry.execute(tool_call.name, **tool_call.arguments)
 +            finally:
 +                self.registry.skip_confirmation = previous_skip
++
 +    def _browser_command_message(self, tool_call: ToolCall) -> str | None:
 +        if tool_call.name != "bash":
 +            return None
++
 +        command = str(tool_call.arguments.get("command", ""))
 +        browser_terms = ["xdg-open", "open ", "firefox", "chrome", "browser"]
 +        if any(term in command for term in browser_terms):
 +            return "[Blocked - Browser/display commands are not supported in the terminal runtime]"
 +        return None