tenseleyflow/loader / 3c5be85

Browse files

Refactor Loader into a typed turn runtime

Authored by espadonne
SHA
3c5be8521db3e1a409a1d619ae6c3e45bb0f2c26
Parents
51c2c2e
Tree
4283571

6 changed files

StatusFile+-
M src/loader/agent/loop.py 32 939
M src/loader/llm/base.py 29 1
M src/loader/runtime/__init__.py 6 1
M src/loader/runtime/capabilities.py 40 1
A src/loader/runtime/conversation.py 705 0
A src/loader/runtime/executor.py 217 0
src/loader/agent/loop.pymodified
1024 lines changed — click to load
@@ -8,6 +8,10 @@ from typing import AsyncIterator, Awaitable, Callable
88
 from ..llm.base import LLMBackend, Message, Role, ToolCall
99
 from ..tools.base import ToolRegistry, create_default_registry, ConfirmationRequired
1010
 from ..context.project import ProjectContext, detect_project
11
+from ..runtime.capabilities import resolve_backend_capability_profile
12
+from ..runtime.conversation import ConversationRuntime
13
+from ..runtime.events import AgentEvent, TurnSummary
14
+from ..runtime.session import ConversationSession
1115
 from .prompts import build_system_prompt
1216
 from .parsing import parse_tool_calls, format_tool_result
1317
 from .planner import Plan, parse_plan, should_plan, format_step_prompt, PLANNING_PROMPT, SHOULD_PLAN_PROMPT
@@ -45,7 +49,7 @@ from .reasoning import (
4549
     estimate_complexity,
4650
     get_token_budget,
4751
 )
48
-from .safeguards import RuntimeSafeguards, ValidationResult
52
+from .safeguards import RuntimeSafeguards
4953
 
5054
 
5155
 @dataclass
@@ -99,34 +103,6 @@ class AgentConfig:
99103
             self.reasoning = ReasoningConfig()
100104
 
101105
 
102
-@dataclass
103
-class AgentEvent:
104
-    """Event emitted during agent execution."""
105
-    # Event types: thinking, tool_call, tool_result, response, error, plan, step,
106
-    # recovery, stream, confirmation, steering, decomposition, subtask, critique,
107
-    # confidence, verification
108
-    type: str
109
-    content: str = ""
110
-    tool_name: str | None = None
111
-    tool_args: dict | None = None
112
-    step_info: str | None = None  # For step progress like "[2/5] Doing X"
113
-    recovery_attempt: int | None = None  # For recovery events
114
-    is_stream_end: bool = False  # For stream events - indicates final chunk
115
-    confirm_message: str | None = None  # For confirmation events
116
-    confirm_details: str | None = None  # For confirmation events
117
-    is_error: bool = False  # For tool_result events
118
-
119
-    # Reasoning events
120
-    decomposition: TaskDecomposition | None = None  # For decomposition events
121
-    subtask: Subtask | None = None  # For subtask events
122
-    critique: SelfCritique | None = None  # For critique events
123
-    confidence: ConfidenceAssessment | None = None  # For confidence events
124
-    verification: ActionVerification | None = None  # For verification events
125
-    completion_check: TaskCompletionCheck | None = None  # For completion events
126
-    rollback_plan: RollbackPlan | None = None  # For rollback events
127
-    rollback_action: RollbackAction | None = None  # For individual rollback action
128
-
129
-
130106
 class Agent:
131107
     """The main agent that orchestrates the LLM and tools."""
132108
 
@@ -141,8 +117,15 @@ class Agent:
141117
         self.registry = registry or create_default_registry()
142118
         self.config = config or AgentConfig()
143119
         self.messages: list[Message] = []
120
+        self.session = ConversationSession(
121
+            system_message_factory=self._get_system_message,
122
+            few_shot_factory=self._get_few_shot_examples,
123
+            messages=self.messages,
124
+        )
144125
         self._system_message: Message | None = None
145126
         self._use_react: bool | None = None
127
+        self.capability_profile = resolve_backend_capability_profile(self.backend)
128
+        self.last_turn_summary: TurnSummary | None = None
146129
 
147130
         # Recovery tracking
148131
         self._recovery_context: RecoveryContext | None = None
@@ -199,19 +182,7 @@ class Agent:
199182
             self._use_react = True
200183
             return True
201184
 
202
-        # Check if backend supports native tools
203
-        if hasattr(self.backend, "supports_native_tools"):
204
-            supports_native = self.backend.supports_native_tools()
205
-            self._use_react = not supports_native
206
-            # Debug log
207
-            try:
208
-                with open("/tmp/loader_debug.log", "a") as f:
209
-                    f.write(f"[loop] use_react: supports_native={supports_native}, use_react={self._use_react}\n")
210
-            except Exception:
211
-                pass
212
-        else:
213
-            # Default to ReAct for unknown backends
214
-            self._use_react = True
185
+        self._use_react = not self.capability_profile.supports_native_tools
215186
 
216187
         return self._use_react
217188
 
@@ -234,14 +205,13 @@ class Agent:
234205
 
235206
     def _build_messages(self) -> list[Message]:
236207
         """Build the full message list for the LLM."""
237
-        messages = [self._get_system_message()]
208
+        return self.session.build_request_messages()
238209
 
239
-        # Add few-shot examples if this is a fresh conversation
240
-        if len(self.messages) <= 2:  # User message + maybe prefill
241
-            messages.extend(self._get_few_shot_examples())
210
+    def refresh_capability_profile(self) -> None:
211
+        """Refresh the runtime capability profile from the current backend."""
242212
 
243
-        messages.extend(self.messages)
244
-        return messages
213
+        self.capability_profile = resolve_backend_capability_profile(self.backend)
214
+        self._use_react = None
245215
 
246216
     def _get_few_shot_examples(self) -> list[Message]:
247217
         """Get few-shot examples demonstrating proper tool use."""
@@ -612,897 +582,15 @@ class Agent:
612582
         original_task: str | None = None,
613583
     ) -> str:
614584
         """Inner execution loop without planning."""
615
-        iterations = 0
616
-        final_response = ""
617
-        actions_taken: list[str] = []  # Track what we've done
618
-        continuation_count = 0  # How many times we've nudged to continue
619
-        empty_retry_count = 0  # How many times we've retried on empty response
620
-        MAX_EMPTY_RETRIES = 5  # More retries before giving up - small models need patience
621
-        extracted_iterations = 0  # How many times we've extracted bracket-format tool calls
622
-        MAX_EXTRACTED_ITERATIONS = 3  # Limit extracted tool call loops
623
-        consecutive_errors = 0  # Track consecutive tool errors
624
-
625
-        # Adaptive token budgeting based on task complexity
626
-        complexity = estimate_complexity(task)
627
-        max_tokens, _ = get_token_budget(complexity)
628
-        # Use configured max_tokens as ceiling, complexity as floor
629
-        effective_max_tokens = min(self.config.max_tokens, max(max_tokens, 512))
630
-
631
-        # Rollback planning
632
-        rollback_plan = RollbackPlan() if self.config.reasoning.rollback else None
633
-
634
-        while iterations < self.config.max_iterations:
635
-            iterations += 1
636585
 
637
-            # On first iteration, add assistant prefilling to guide tool use
638
-            if iterations == 1 and len(self.messages) == 1:  # Just the user's message
639
-                # Check if task looks like it needs immediate action
640
-                task_lower = task.lower()
641
-                action_keywords = ['create', 'write', 'make', 'run', 'execute', 'build', 'install', 'delete', 'remove', 'add', 'edit', 'modify', 'update', 'fix']
642
-                if any(kw in task_lower for kw in action_keywords):
643
-                    # Prime with partial assistant response - start of tool call
644
-                    self.messages.append(Message(
645
-                        role=Role.ASSISTANT,
646
-                        content="[",
647
-                    ))
648
-                    try:
649
-                        with open("/tmp/loader_debug.log", "a") as f:
650
-                            f.write(f"[loop] Added assistant prefill '[' for action task\n")
651
-                    except Exception:
652
-                        pass
653
-
654
-            # Check for steering messages from user
655
-            steering_messages = self._drain_steering_queue()
656
-            for steer_msg in steering_messages:
657
-                await emit(AgentEvent(type="steering", content=steer_msg))
658
-                self.messages.append(Message(
659
-                    role=Role.USER,
660
-                    content=f"[USER INTERRUPTION]: {steer_msg}",
661
-                ))
662
-
663
-            # Get completion from LLM
664
-            await emit(AgentEvent(type="thinking"))
665
-
666
-            # Reset code block filter state for this LLM call
667
-            self.safeguards.code_filter.reset()
668
-
669
-            # Pass tools only for native tool calling
670
-            tools = None if self.use_react else self.registry.get_schemas()
671
-
672
-            # Use streaming or regular completion
673
-            pending_tool_calls_seen: set[str] = set()  # Track IDs of pending tool calls shown
674
-            if self.config.stream:
675
-                full_content = ""
676
-                full_content_unfiltered = ""  # Keep original for history
677
-                tool_calls: list[ToolCall] = []
678
-
679
-                async for chunk in self.backend.stream(
680
-                    messages=self._build_messages(),
681
-                    tools=tools,
682
-                    temperature=self.config.temperature,
683
-                    max_tokens=effective_max_tokens,
684
-                ):
685
-                    # Filter content through safeguards (removes code blocks)
686
-                    filtered_content = ""
687
-                    if chunk.content:
688
-                        filtered_content = self.safeguards.filter_stream_chunk(chunk.content)
689
-                        full_content_unfiltered += chunk.content
690
-
691
-                    # Emit stream events for filtered content OR for final chunk (to signal end)
692
-                    if filtered_content or chunk.is_done:
693
-                        await emit(AgentEvent(
694
-                            type="stream",
695
-                            content=filtered_content,
696
-                            is_stream_end=chunk.is_done,
697
-                        ))
698
-
699
-                    # Check if we should inject steering (bad patterns detected)
700
-                    if self.safeguards.should_steer():
701
-                        steering_msg = self.safeguards.get_steering_message()
702
-                        if steering_msg:
703
-                            # Queue steering for next iteration
704
-                            self._steering_queue.put_nowait(steering_msg)
705
-
706
-                    # Show pending tool calls as they're detected (ReAct mode interleaving)
707
-                    if chunk.pending_tool_call and chunk.pending_tool_call.id not in pending_tool_calls_seen:
708
-                        pending_tool_calls_seen.add(chunk.pending_tool_call.id)
709
-                        await emit(AgentEvent(
710
-                            type="tool_call",
711
-                            tool_name=chunk.pending_tool_call.name,
712
-                            tool_args=chunk.pending_tool_call.arguments,
713
-                        ))
714
-                    if chunk.is_done:
715
-                        full_content = chunk.full_content or full_content_unfiltered
716
-                        tool_calls = chunk.tool_calls
717
-                        # Debug log
718
-                        try:
719
-                            with open("/tmp/loader_debug.log", "a") as f:
720
-                                f.write(f"[loop] chunk.is_done: got {len(tool_calls)} tool_calls\n")
721
-                        except Exception:
722
-                            pass
723
-
724
-                content = full_content
725
-                response_content = full_content
726
-            else:
727
-                response = await self.backend.complete(
728
-                    messages=self._build_messages(),
729
-                    tools=tools,
730
-                    temperature=self.config.temperature,
731
-                    max_tokens=effective_max_tokens,
732
-                )
733
-                # Filter content through safeguards (removes code blocks)
734
-                response_content = response.content  # Keep original for history
735
-                content = self.safeguards.filter_complete_content(response.content)
736
-                tool_calls = response.tool_calls if not self.use_react else []
737
-
738
-                # Check if we should inject steering (bad patterns detected)
739
-                if self.safeguards.should_steer():
740
-                    steering_msg = self.safeguards.get_steering_message()
741
-                    if steering_msg:
742
-                        self._steering_queue.put_nowait(steering_msg)
743
-
744
-            # Handle empty responses (common with small models after clarifications)
745
-            if not content.strip():
746
-                empty_retry_count += 1
747
-                if empty_retry_count <= MAX_EMPTY_RETRIES:
748
-                    # Use progressively more direct prompts
749
-                    task_context = original_task or task
750
-                    retry_prompts = [
751
-                        # Retry 1: Gentle nudge with action focus
752
-                        f"Great! Now let me proceed with the task. I'll start by using my tools.",
753
-                        # Retry 2: More explicit about what to do
754
-                        f"I understand. Let me create that now using my tools (write, bash, etc.).",
755
-                        # Retry 3: Very direct instruction
756
-                        f"Proceeding with: {task_context[:80]}. I'll use the write tool to create the files.",
757
-                        # Retry 4: Action-first prompt
758
-                        f"Starting now. First step: create the necessary files and directories.",
759
-                        # Retry 5: Last attempt with full context
760
-                        f"Let me complete this task step by step. The goal is: {task_context[:100]}",
761
-                    ]
762
-                    prompt = retry_prompts[min(empty_retry_count - 1, len(retry_prompts) - 1)]
763
-                    self.messages.append(Message(
764
-                        role=Role.ASSISTANT,
765
-                        content=prompt,  # Add as assistant message to give model a "running start"
766
-                    ))
767
-                    continue
768
-                else:
769
-                    # Give up after max retries - but make the message less alarming
770
-                    await emit(AgentEvent(
771
-                        type="response",
772
-                        content="I need a bit more direction. What specifically would you like me to create or do?",
773
-                    ))
774
-                    break
775
-
776
-            # Get tool calls - either native or parsed from text
777
-            if self.use_react:
778
-                # Parse tool calls from text (ReAct mode)
779
-                parsed = parse_tool_calls(content)
780
-                tool_calls = parsed.tool_calls
781
-                content = parsed.content
782
-
783
-                # Check if this is a final answer
784
-                if parsed.is_final_answer and not tool_calls:
785
-                    final_response = content
786
-                    self.messages.append(Message(
787
-                        role=Role.ASSISTANT,
788
-                        content=response_content,  # Keep original for history
789
-                    ))
790
-                    await emit(AgentEvent(type="response", content=final_response))
791
-                    break
792
-
793
-            # If there are tool calls, execute them
794
-            if tool_calls:
795
-                # Debug log
796
-                try:
797
-                    with open("/tmp/loader_debug.log", "a") as f:
798
-                        f.write(f"[loop] executing {len(tool_calls)} tool_calls\n")
799
-                        for tc in tool_calls:
800
-                            f.write(f"[loop]   - {tc.name}: id={tc.id}, args_keys={list(tc.arguments.keys())}\n")
801
-                except Exception:
802
-                    pass
803
-
804
-                # Add assistant message with tool calls
805
-                self.messages.append(Message(
806
-                    role=Role.ASSISTANT,
807
-                    content=response_content,
808
-                    tool_calls=tool_calls,
809
-                ))
810
-
811
-                # Execute each tool (with recovery logic)
812
-                for tool_call in tool_calls:
813
-                    cfg = self.config.reasoning
814
-
815
-                    # Confidence scoring before execution
816
-                    if cfg.confidence_scoring:
817
-                        context = "\n".join(
818
-                            m.content[:500] for m in self.messages[-5:]
819
-                            if m.content
820
-                        )
821
-                        confidence = await self._assess_confidence(
822
-                            tool_call.name,
823
-                            tool_call.arguments,
824
-                            context,
825
-                        )
826
-                        await emit(AgentEvent(
827
-                            type="confidence",
828
-                            content=f"Confidence: {confidence.level.name} ({confidence.score}/5)",
829
-                            confidence=confidence,
830
-                            tool_name=tool_call.name,
831
-                        ))
832
-
833
-                        # If confidence is too low, ask LLM to reconsider
834
-                        if confidence.score < cfg.min_confidence_for_action:
835
-                            low_conf_msg = (
836
-                                f"[LOW CONFIDENCE WARNING] The planned action has low confidence "
837
-                                f"({confidence.level.name}).\n"
838
-                                f"Reasoning: {confidence.reasoning}\n"
839
-                                f"Risks: {', '.join(confidence.risks)}\n"
840
-                                f"Consider an alternative approach or gather more information first."
841
-                            )
842
-                            self.messages.append(Message(
843
-                                role=Role.USER,
844
-                                content=low_conf_msg,
845
-                            ))
846
-                            continue  # Skip this tool call, let LLM reconsider
847
-
848
-                    # Only emit tool_call if not already shown during streaming
849
-                    if tool_call.id not in pending_tool_calls_seen:
850
-                        try:
851
-                            with open("/tmp/loader_debug.log", "a") as f:
852
-                                f.write(f"[loop] emitting tool_call event for {tool_call.name}\n")
853
-                        except Exception:
854
-                            pass
855
-                        await emit(AgentEvent(
856
-                            type="tool_call",
857
-                            tool_name=tool_call.name,
858
-                            tool_args=tool_call.arguments,
859
-                        ))
860
-                    else:
861
-                        try:
862
-                            with open("/tmp/loader_debug.log", "a") as f:
863
-                                f.write(f"[loop] SKIPPING tool_call event for {tool_call.name} (already in pending_seen)\n")
864
-                        except Exception:
865
-                            pass
866
-
867
-                    # Track this action for completion checking
868
-                    action_desc = f"{tool_call.name}: {str(tool_call.arguments)[:100]}"
869
-                    actions_taken.append(action_desc)
870
-
871
-                    # Check for duplicate actions using safeguards
872
-                    is_dup, dup_reason = self.safeguards.check_duplicate(
873
-                        tool_call.name, tool_call.arguments
874
-                    )
875
-                    if is_dup:
876
-                        try:
877
-                            with open("/tmp/loader_debug.log", "a") as f:
878
-                                f.write(f"[loop] SKIPPING duplicate: {dup_reason}\n")
879
-                        except Exception:
880
-                            pass
881
-                        # Add a tool result indicating skip
882
-                        self.messages.append(Message(
883
-                            role=Role.TOOL,
884
-                            content=f"[Skipped - duplicate action: {dup_reason}]",
885
-                            tool_call_id=tool_call.id,
886
-                        ))
887
-                        continue  # Skip to next tool call
888
-
889
-                    # Pre-action validation
890
-                    validation = self.safeguards.validate_action(
891
-                        tool_call.name, tool_call.arguments
892
-                    )
893
-                    if not validation.valid:
894
-                        try:
895
-                            with open("/tmp/loader_debug.log", "a") as f:
896
-                                f.write(f"[loop] BLOCKED by validation: {validation.reason}\n")
897
-                        except Exception:
898
-                            pass
899
-                        # Add a tool result with the validation error
900
-                        error_msg = f"[Blocked - {validation.reason}]"
901
-                        if validation.suggestion:
902
-                            error_msg += f" Suggestion: {validation.suggestion}"
903
-                        self.messages.append(Message(
904
-                            role=Role.TOOL,
905
-                            content=error_msg,
906
-                            tool_call_id=tool_call.id,
907
-                        ))
908
-                        await emit(AgentEvent(
909
-                            type="tool_result",
910
-                            content=error_msg,
911
-                            tool_name=tool_call.name,
912
-                            is_error=True,
913
-                        ))
914
-                        continue  # Skip to next tool call
915
-
916
-                    # Rollback planning: create rollback action before destructive ops
917
-                    if rollback_plan and is_destructive_tool(tool_call.name, tool_call.arguments):
918
-                        async def read_file_for_backup(path: str) -> str:
919
-                            """Read file contents for backup."""
920
-                            result = await self.registry.execute("read", file_path=path)
921
-                            return result.output if not result.is_error else ""
922
-
923
-                        rollback_action = await create_rollback_plan_for_action(
924
-                            tool_call.name,
925
-                            tool_call.arguments,
926
-                            read_file_for_backup,
927
-                        )
928
-                        if rollback_action:
929
-                            rollback_plan.actions.append(rollback_action)
930
-                            if self.config.reasoning.show_rollback_plan:
931
-                                await emit(AgentEvent(
932
-                                    type="rollback",
933
-                                    content=f"Rollback tracked: {rollback_action.description}",
934
-                                    rollback_action=rollback_action,
935
-                                ))
936
-
937
-                    # Try to execute, handling confirmation if needed
938
-                    try:
939
-                        result = await self.registry.execute(
940
-                            tool_call.name,
941
-                            **tool_call.arguments,
942
-                        )
943
-                    except ConfirmationRequired as conf:
944
-                        # Emit confirmation event
945
-                        await emit(AgentEvent(
946
-                            type="confirmation",
947
-                            tool_name=conf.tool_name,
948
-                            confirm_message=conf.message,
949
-                            confirm_details=conf.details,
950
-                        ))
951
-
952
-                        # If we have a confirmation callback, ask user
953
-                        if on_confirmation:
954
-                            confirmed = await on_confirmation(
955
-                                conf.tool_name,
956
-                                conf.message,
957
-                                conf.details,
958
-                            )
959
-                            if confirmed:
960
-                                # Re-execute with skip_confirmation
961
-                                old_skip = self.registry.skip_confirmation
962
-                                self.registry.skip_confirmation = True
963
-                                try:
964
-                                    result = await self.registry.execute(
965
-                                        tool_call.name,
966
-                                        **tool_call.arguments,
967
-                                    )
968
-                                finally:
969
-                                    self.registry.skip_confirmation = old_skip
970
-                            else:
971
-                                # User declined - create a skip result
972
-                                from ..tools.base import ToolResult
973
-                                result = ToolResult(
974
-                                    output=f"Tool {tool_call.name} was declined by user",
975
-                                    is_error=False,
976
-                                )
977
-                        else:
978
-                            # No callback - treat as auto-confirmed (for non-TUI mode)
979
-                            old_skip = self.registry.skip_confirmation
980
-                            self.registry.skip_confirmation = True
981
-                            try:
982
-                                result = await self.registry.execute(
983
-                                    tool_call.name,
984
-                                    **tool_call.arguments,
985
-                                )
986
-                            finally:
987
-                                self.registry.skip_confirmation = old_skip
988
-
989
-                    # Handle errors with recovery
990
-                    if result.is_error and self.config.auto_recover:
991
-                        # Initialize or update recovery context
992
-                        if self._recovery_context is None:
993
-                            self._recovery_context = RecoveryContext(
994
-                                original_tool=tool_call.name,
995
-                                original_args=tool_call.arguments,
996
-                                max_retries=self.config.max_recovery_attempts,
997
-                            )
998
-
999
-                        # Check if this or a similar call was already tried (loop detection)
1000
-                        if self._recovery_context.is_similar_attempt(tool_call.name, tool_call.arguments):
1001
-                            await emit(AgentEvent(
1002
-                                type="error",
1003
-                                content=f"Loop detected: already tried a similar command. Try a DIFFERENT approach (e.g., read a config file first).",
1004
-                                tool_name=tool_call.name,
1005
-                            ))
1006
-                        else:
1007
-                            # Record this attempt
1008
-                            self._recovery_context.add_attempt(
1009
-                                tool_call.name,
1010
-                                tool_call.arguments,
1011
-                                result.output,
1012
-                            )
1013
-
1014
-                        # Can we retry?
1015
-                        if self._recovery_context.can_retry():
1016
-                            attempt_num = len(self._recovery_context.attempts)
1017
-                            await emit(AgentEvent(
1018
-                                type="recovery",
1019
-                                content=f"Tool failed, attempting recovery ({attempt_num}/{self._recovery_context.max_retries})",
1020
-                                tool_name=tool_call.name,
1021
-                                recovery_attempt=attempt_num,
1022
-                            ))
1023
-
1024
-                            # Add recovery prompt for LLM
1025
-                            recovery_prompt = format_recovery_prompt(
1026
-                                self._recovery_context,
1027
-                                tool_call.name,
1028
-                                tool_call.arguments,
1029
-                                result.output,
1030
-                            )
1031
-                            self.messages.append(Message(
1032
-                                role=Role.TOOL,
1033
-                                content=recovery_prompt,
1034
-                            ))
1035
-
1036
-                            # Continue to let LLM try an alternative
1037
-                            continue
1038
-                        else:
1039
-                            # Max retries exceeded
1040
-                            failure_msg = format_failure_message(self._recovery_context)
1041
-                            await emit(AgentEvent(
1042
-                                type="error",
1043
-                                content=failure_msg,
1044
-                                tool_name=tool_call.name,
1045
-                            ))
1046
-                            self._recovery_context = None
1047
-
1048
-                            # Add the final error result
1049
-                            result_text = format_tool_result(
1050
-                                tool_call.name,
1051
-                                failure_msg,
1052
-                                is_error=True,
1053
-                            )
1054
-                            self.messages.append(Message(
1055
-                                role=Role.TOOL,
1056
-                                content=result_text,
1057
-                            ))
1058
-                            continue
1059
-                    else:
1060
-                        # Success or no auto-recover - clear recovery context
1061
-                        if not result.is_error:
1062
-                            self._recovery_context = None
1063
-                            # Record successful action to prevent duplicates
1064
-                            self.safeguards.record_action(tool_call.name, tool_call.arguments)
1065
-
1066
-                            # Check for repetitive loop pattern
1067
-                            is_loop, loop_desc = self.safeguards.detect_loop()
1068
-                            if is_loop:
1069
-                                await emit(AgentEvent(
1070
-                                    type="error",
1071
-                                    content=f"Loop detected: {loop_desc}. Stopping to prevent repetitive behavior.",
1072
-                                ))
1073
-                                final_response = "I noticed I was repeating the same actions. Let me know what you'd like me to do differently."
1074
-                                self.messages.append(Message(
1075
-                                    role=Role.ASSISTANT,
1076
-                                    content=final_response,
1077
-                                ))
1078
-                                await emit(AgentEvent(type="response", content=final_response))
1079
-                                return final_response
1080
-
1081
-                    await emit(AgentEvent(
1082
-                        type="tool_result",
1083
-                        content=result.output,
1084
-                        tool_name=tool_call.name,
1085
-                        is_error=result.is_error,
1086
-                    ))
1087
-
1088
-                    # Post-action verification
1089
-                    if cfg.verification and not result.is_error:
1090
-                        verification = await self._verify_action(
1091
-                            tool_call.name,
1092
-                            tool_call.arguments,
1093
-                            result.output,
1094
-                        )
1095
-                        await emit(AgentEvent(
1096
-                            type="verification",
1097
-                            content=f"Verified: {verification.verified}",
1098
-                            verification=verification,
1099
-                            tool_name=tool_call.name,
1100
-                        ))
1101
-
1102
-                        if not verification.verified and verification.needs_correction:
1103
-                            # Add correction suggestion for LLM
1104
-                            correction_msg = (
1105
-                                f"[VERIFICATION FAILED] The action did not produce expected results.\n"
1106
-                                f"Discrepancies: {', '.join(verification.discrepancies)}\n"
1107
-                                f"Suggestion: {verification.correction_suggestion}"
1108
-                            )
1109
-                            self.messages.append(Message(
1110
-                                role=Role.USER,
1111
-                                content=correction_msg,
1112
-                            ))
1113
-                            # Don't add the tool result - let LLM try correction
1114
-                            continue
1115
-
1116
-                    # Add tool result message
1117
-                    result_text = format_tool_result(
1118
-                        tool_call.name,
1119
-                        result.output,
1120
-                        result.is_error,
1121
-                    )
1122
-                    self.messages.append(Message(
1123
-                        role=Role.TOOL,
1124
-                        content=result_text,
1125
-                    ))
1126
-
1127
-                # Continue the loop to get next response
1128
-                continue
1129
-
1130
-            # No tool calls - check if model outputted raw JSON tool calls as text
1131
-            # Some small models do this instead of using the proper API
1132
-            if not tool_calls:
1133
-                try:
1134
-                    with open("/tmp/loader_debug.log", "a") as f:
1135
-                        f.write(f"[loop] no tool_calls, checking for raw JSON/bracket format in content (len={len(content)})\n")
1136
-                except Exception:
1137
-                    pass
1138
-                raw_tool_calls = self._extract_raw_json_tool_calls(content)
1139
-                try:
1140
-                    with open("/tmp/loader_debug.log", "a") as f:
1141
-                        f.write(f"[loop] _extract_raw_json_tool_calls returned {len(raw_tool_calls)} calls\n")
1142
-                        for tc in raw_tool_calls:
1143
-                            f.write(f"[loop]   - {tc.name}: {list(tc.arguments.keys())}\n")
1144
-                except Exception:
1145
-                    pass
1146
-                if raw_tool_calls:
1147
-                    # Successfully extracted tool calls from raw JSON - use them
1148
-                    tool_calls = raw_tool_calls
1149
-                    # Clear the streamed content (it was raw JSON, looks ugly)
1150
-                    await emit(AgentEvent(type="clear_stream"))
1151
-
1152
-            # If we now have tool calls (from raw JSON extraction), execute them
1153
-            if tool_calls:
1154
-                extracted_iterations += 1
1155
-
1156
-                # Check if we've exceeded extraction limits
1157
-                if extracted_iterations > MAX_EXTRACTED_ITERATIONS:
1158
-                    # Model keeps outputting bracket-format calls - stop and let user continue
1159
-                    final_response = content
1160
-                    self.messages.append(Message(role=Role.ASSISTANT, content=response_content))
1161
-                    await emit(AgentEvent(
1162
-                        type="response",
1163
-                        content=final_response + "\n\nLet me know if you'd like me to continue or make changes."
1164
-                    ))
1165
-                    break
1166
-
1167
-                try:
1168
-                    with open("/tmp/loader_debug.log", "a") as f:
1169
-                        f.write(f"[loop] executing {len(tool_calls)} extracted tool calls (iteration {extracted_iterations})\n")
1170
-                except Exception:
1171
-                    pass
1172
-
1173
-                # Track errors in this batch
1174
-                batch_errors = 0
1175
-
1176
-                # This duplicates the tool execution logic above, but that's intentional
1177
-                # to handle the case where raw JSON tool calls are extracted
1178
-                for i, tc in enumerate(tool_calls):
1179
-                    # Skip browser/display commands that don't work in terminal
1180
-                    if tc.name == "bash":
1181
-                        cmd = tc.arguments.get("command", "")
1182
-                        if any(x in cmd for x in ["xdg-open", "open ", "firefox", "chrome", "browser"]):
1183
-                            try:
1184
-                                with open("/tmp/loader_debug.log", "a") as f:
1185
-                                    f.write(f"[loop] skipping browser command: {cmd[:50]}\n")
1186
-                            except Exception:
1187
-                                pass
1188
-                            continue
1189
-
1190
-                    # Use safeguards for duplicate checking
1191
-                    is_dup, dup_reason = self.safeguards.check_duplicate(tc.name, tc.arguments)
1192
-                    if is_dup:
1193
-                        try:
1194
-                            with open("/tmp/loader_debug.log", "a") as f:
1195
-                                f.write(f"[loop] skipping duplicate: {dup_reason}\n")
1196
-                        except Exception:
1197
-                            pass
1198
-                        continue
1199
-
1200
-                    # Pre-action validation
1201
-                    validation = self.safeguards.validate_action(tc.name, tc.arguments)
1202
-                    if not validation.valid:
1203
-                        try:
1204
-                            with open("/tmp/loader_debug.log", "a") as f:
1205
-                                f.write(f"[loop] BLOCKED by validation: {validation.reason}\n")
1206
-                        except Exception:
1207
-                            pass
1208
-                        error_msg = f"[Blocked - {validation.reason}]"
1209
-                        if validation.suggestion:
1210
-                            error_msg += f" Suggestion: {validation.suggestion}"
1211
-                        await emit(AgentEvent(
1212
-                            type="tool_result",
1213
-                            content=error_msg,
1214
-                            tool_name=tc.name,
1215
-                            is_error=True,
1216
-                        ))
1217
-                        self.messages.append(Message(
1218
-                            role=Role.TOOL,
1219
-                            content=error_msg,
1220
-                        ))
1221
-                        batch_errors += 1
1222
-                        continue
1223
-
1224
-                    # Small delay between tool executions for better UX
1225
-                    if i > 0:
1226
-                        await asyncio.sleep(0.4)
1227
-                    try:
1228
-                        with open("/tmp/loader_debug.log", "a") as f:
1229
-                            f.write(f"[loop] executing extracted tool: {tc.name} args={tc.arguments}\n")
1230
-                    except Exception:
1231
-                        pass
1232
-                    actions_taken.append(f"{tc.name}: {str(tc.arguments)[:50]}...")
1233
-                    await emit(AgentEvent(
1234
-                        type="tool_call",
1235
-                        tool_name=tc.name,
1236
-                        tool_args=tc.arguments,
1237
-                    ))
1238
-
1239
-                    # Execute the tool
1240
-                    is_error = False
1241
-                    try:
1242
-                        result = await self.registry.execute(tc.name, **tc.arguments)
1243
-                        result_text = result.output
1244
-                        is_error = result.is_error
1245
-                    except ConfirmationRequired as e:
1246
-                        # Emit confirmation event
1247
-                        await emit(AgentEvent(
1248
-                            type="confirmation",
1249
-                            tool_name=e.tool_name,
1250
-                            confirm_message=e.message,
1251
-                            confirm_details=e.details,
1252
-                        ))
1253
-                        if on_confirmation:
1254
-                            confirmed = await on_confirmation(tc.name, e.message, e.details)
1255
-                            if confirmed:
1256
-                                # Re-execute with skip_confirmation
1257
-                                old_skip = self.registry.skip_confirmation
1258
-                                self.registry.skip_confirmation = True
1259
-                                try:
1260
-                                    result = await self.registry.execute(tc.name, **tc.arguments)
1261
-                                    result_text = result.output
1262
-                                    is_error = result.is_error
1263
-                                finally:
1264
-                                    self.registry.skip_confirmation = old_skip
1265
-                            else:
1266
-                                result_text = "Tool execution cancelled by user."
1267
-                        else:
1268
-                            # No callback - auto-confirm for extracted tool calls
1269
-                            old_skip = self.registry.skip_confirmation
1270
-                            self.registry.skip_confirmation = True
1271
-                            try:
1272
-                                result = await self.registry.execute(tc.name, **tc.arguments)
1273
-                                result_text = result.output
1274
-                                is_error = result.is_error
1275
-                            finally:
1276
-                                self.registry.skip_confirmation = old_skip
1277
-                    except Exception as e:
1278
-                        result_text = f"Error: {e}"
1279
-                        is_error = True
1280
-
1281
-                    # Track errors
1282
-                    if is_error:
1283
-                        batch_errors += 1
1284
-                        consecutive_errors += 1
1285
-                    else:
1286
-                        consecutive_errors = 0  # Reset on success
1287
-                        # Record successful action to prevent duplicates
1288
-                        self.safeguards.record_action(tc.name, tc.arguments)
1289
-
1290
-                        # Check for repetitive loop pattern
1291
-                        is_loop, loop_desc = self.safeguards.detect_loop()
1292
-                        if is_loop:
1293
-                            await emit(AgentEvent(
1294
-                                type="error",
1295
-                                content=f"Loop detected: {loop_desc}. Stopping to prevent repetitive behavior.",
1296
-                            ))
1297
-                            final_response = "I noticed I was repeating the same actions. Let me know what you'd like me to do differently."
1298
-                            self.messages.append(Message(
1299
-                                role=Role.ASSISTANT,
1300
-                                content=final_response,
1301
-                            ))
1302
-                            await emit(AgentEvent(type="response", content=final_response))
1303
-                            return final_response
1304
-
1305
-                    await emit(AgentEvent(
1306
-                        type="tool_result",
1307
-                        content=result_text,
1308
-                        tool_name=tc.name,
1309
-                        is_error=is_error,
1310
-                    ))
1311
-
1312
-                    self.messages.append(Message(
1313
-                        role=Role.ASSISTANT,
1314
-                        content=response_content,
1315
-                    ))
1316
-                    self.messages.append(Message(
1317
-                        role=Role.TOOL,
1318
-                        content=result_text,
1319
-                    ))
1320
-
1321
-                # After executing batch, check if we should stop
1322
-                # Stop if: all tools in batch failed, or we have many consecutive errors
1323
-                if batch_errors == len(tool_calls) or consecutive_errors >= 3:
1324
-                    # All failed or too many consecutive errors - stop trying
1325
-                    final_response = "I ran into some issues. Let me know if you'd like me to try a different approach."
1326
-                    await emit(AgentEvent(type="response", content=final_response))
1327
-                    break
1328
-
1329
-                continue
1330
-
1331
-            # No tool calls - check if model is describing instead of acting
1332
-            # IMPORTANT: Check ORIGINAL content before safeguards filtered it!
1333
-            # Debug log
1334
-            try:
1335
-                has_unexecuted = self._contains_unexecuted_code(response_content)
1336
-                with open("/tmp/loader_debug.log", "a") as f:
1337
-                    f.write(f"[chatbot-check] iterations={iterations}, has_unexecuted={has_unexecuted}\n")
1338
-                    f.write(f"[chatbot-check] response_content (first 200): {response_content[:200]}\n")
1339
-                    f.write(f"[chatbot-check] filtered content (first 200): {content[:200]}\n")
1340
-            except Exception:
1341
-                pass
1342
-
1343
-            if self._contains_unexecuted_code(response_content) and iterations < self.config.max_iterations - 1:
1344
-                # Model outputted code blocks without using tools - nudge it
1345
-                try:
1346
-                    with open("/tmp/loader_debug.log", "a") as f:
1347
-                        f.write(f"[chatbot-check] TRIGGERING chatbot recovery\n")
1348
-                except Exception:
1349
-                    pass
1350
-                # Silently steer - don't show error to user (internal correction)
1351
-                self.messages.append(Message(
1352
-                    role=Role.ASSISTANT,
1353
-                    content=response_content,
1354
-                ))
1355
-                self.messages.append(Message(
1356
-                    role=Role.USER,
1357
-                    content="CRITICAL ERROR: You are PRETENDING to use tools instead of actually using them.\n\n"
1358
-                            "DO NOT write:\n"
1359
-                            "- 'Used bash tool with command...' (THIS IS FAKE)\n"
1360
-                            "- 'Created a file using the write tool...' (THIS IS FAKE)\n"
1361
-                            "- 'Here is what I did:' followed by descriptions\n"
1362
-                            "- Numbered steps or instructions\n"
1363
-                            "- Code blocks for me to copy\n\n"
1364
-                            "Your tool calls MUST go through the proper tool interface.\n"
1365
-                            "Writing 'Used bash tool...' does NOT execute anything!\n\n"
1366
-                            "ACTUALLY call the tools using the tool_call mechanism.\n"
1367
-                            "DO IT NOW - stop narrating and start executing.",
1368
-                ))
1369
-                continue
1370
-
1371
-            # No tool calls and early in the task - MAY be giving up too soon
1372
-            # But only intervene if we haven't done ANY work yet
1373
-            if not self.use_react and len(actions_taken) == 0 and iterations < self.config.max_iterations - 2:
1374
-                # Check if response looks like deflection without having done anything
1375
-                deflection_phrases = ["you can", "you should", "you could", "try running"]
1376
-                looks_like_deflection = any(p in content.lower() for p in deflection_phrases)
1377
-
1378
-                if looks_like_deflection:
1379
-                    self.messages.append(Message(
1380
-                        role=Role.ASSISTANT,
1381
-                        content=response_content,
1382
-                    ))
1383
-                    self.messages.append(Message(
1384
-                        role=Role.USER,
1385
-                        content="Please use your tools to execute the task rather than telling me what to do.",
1386
-                    ))
1387
-                    continue
1388
-
1389
-            # Self-critique before finalizing (if enabled and response has substance)
1390
-            cfg = self.config.reasoning
1391
-            if cfg.self_critique and len(content) > 100:
1392
-                # Check if we should critique this response
1393
-                is_code_response = "```" in content or any(
1394
-                    keyword in content.lower()
1395
-                    for keyword in ["def ", "function ", "class ", "import "]
1396
-                )
1397
-                if should_self_critique(content, is_code=is_code_response):
1398
-                    context = task
1399
-                    critique = await self._self_critique(content, context)
1400
-
1401
-                    await emit(AgentEvent(
1402
-                        type="critique",
1403
-                        content=f"Self-critique: {len(critique.issues_found)} issues found",
1404
-                        critique=critique,
1405
-                    ))
1406
-
1407
-                    if critique.can_revise():
1408
-                        # Ask for revision
1409
-                        revision_msg = (
1410
-                            f"[SELF-CRITIQUE] Review your response:\n"
1411
-                            f"Issues found: {', '.join(critique.issues_found)}\n"
1412
-                            f"Suggestions: {', '.join(critique.suggestions)}\n\n"
1413
-                            "Please provide an improved response addressing these issues."
1414
-                        )
1415
-                        self.messages.append(Message(
1416
-                            role=Role.ASSISTANT,
1417
-                            content=response_content,
1418
-                        ))
1419
-                        self.messages.append(Message(
1420
-                            role=Role.USER,
1421
-                            content=revision_msg,
1422
-                        ))
1423
-                        critique.revision_count += 1
1424
-                        continue  # Loop to get revised response
1425
-
1426
-            # Check for text loop (agent repeating the same response)
1427
-            is_text_loop, text_loop_desc = self.safeguards.detect_text_loop(content)
1428
-            if is_text_loop:
1429
-                await emit(AgentEvent(
1430
-                    type="error",
1431
-                    content=f"Text loop detected: {text_loop_desc}. Stopping.",
1432
-                ))
1433
-                final_response = "I seem to be repeating myself. Let me know if you'd like me to try a different approach."
1434
-                self.messages.append(Message(
1435
-                    role=Role.ASSISTANT,
1436
-                    content=final_response,
1437
-                ))
1438
-                await emit(AgentEvent(type="response", content=final_response))
1439
-                return final_response
1440
-
1441
-            # Record response for future loop detection
1442
-            self.safeguards.record_response(content)
1443
-
1444
-            # Task completion check - don't give up too early!
1445
-            # Use original_task if available (for multi-turn conversations)
1446
-            effective_task = original_task or task
1447
-            if cfg.completion_check and continuation_count < cfg.max_continuation_prompts:
1448
-                # Quick heuristic check first
1449
-                if cfg.use_quick_completion:
1450
-                    is_premature = detect_premature_completion(effective_task, content, actions_taken)
1451
-                else:
1452
-                    is_premature = False
1453
-
1454
-                if is_premature:
1455
-                    continuation_count += 1
1456
-                    continuation_prompt = get_continuation_prompt(effective_task, actions_taken, content)
1457
-
1458
-                    await emit(AgentEvent(
1459
-                        type="completion_check",
1460
-                        content=f"Task may be incomplete ({len(actions_taken)} actions taken)",
1461
-                        completion_check=TaskCompletionCheck(
1462
-                            original_task=effective_task,
1463
-                            is_complete=False,
1464
-                            accomplished=[a.split(":")[0] for a in actions_taken],
1465
-                            continuation_prompt=continuation_prompt,
1466
-                        ),
1467
-                    ))
1468
-
1469
-                    # Add the assistant's response and nudge to continue
1470
-                    self.messages.append(Message(
1471
-                        role=Role.ASSISTANT,
1472
-                        content=response_content,
1473
-                    ))
1474
-                    self.messages.append(Message(
1475
-                        role=Role.USER,
1476
-                        content=continuation_prompt,
1477
-                    ))
1478
-                    continue  # Loop to get continuation
1479
-
1480
-            # This is the final response
1481
-            final_response = content
1482
-
1483
-            # If we completed actions, add follow-up question to encourage continued conversation
1484
-            if actions_taken and final_response.strip():
1485
-                # Only add if the response doesn't already end with a question
1486
-                if not final_response.rstrip().endswith('?'):
1487
-                    final_response = final_response.rstrip() + "\n\nWould you like me to make any changes or additions?"
1488
-
1489
-            self.messages.append(Message(
1490
-                role=Role.ASSISTANT,
1491
-                content=response_content,
1492
-            ))
1493
-
1494
-            # Emit rollback plan summary if we tracked any actions
1495
-            if rollback_plan and rollback_plan.actions:
1496
-                await emit(AgentEvent(
1497
-                    type="rollback_summary",
1498
-                    content=f"Rollback plan: {len(rollback_plan.actions)} action(s) tracked",
1499
-                    rollback_plan=rollback_plan,
1500
-                ))
1501
-
1502
-            await emit(AgentEvent(type="response", content=final_response))
1503
-            break
1504
-
1505
-        return final_response
586
+        runtime = ConversationRuntime(self)
587
+        self.last_turn_summary = await runtime.run_turn(
588
+            task,
589
+            emit,
590
+            on_confirmation=on_confirmation,
591
+            original_task=original_task,
592
+        )
593
+        return self.last_turn_summary.final_response
1506594
 
1507595
     async def run_streaming(
1508596
         self,
@@ -1923,7 +1011,12 @@ class Agent:
19231011
     def clear_history(self) -> None:
19241012
         """Clear conversation history."""
19251013
         self.messages = []
1014
+        self.session = ConversationSession(
1015
+            system_message_factory=self._get_system_message,
1016
+            few_shot_factory=self._get_few_shot_examples,
1017
+            messages=self.messages,
1018
+        )
19261019
         self._recovery_context = None
19271020
         self._current_task = None
1928
-        self._executed_commands = set()  # Clear command dedup tracking
1021
+        self.last_turn_summary = None
19291022
         self.safeguards.reset()  # Reset all runtime safeguards
src/loader/llm/base.pymodified
@@ -1,9 +1,10 @@
11
 """Base classes for LLM backends."""
22
 
33
 from abc import ABC, abstractmethod
4
+from collections.abc import AsyncIterator
45
 from dataclasses import dataclass, field
56
 from enum import Enum
6
-from typing import AsyncIterator, Any
7
+from typing import Any
78
 
89
 
910
 class Role(str, Enum):
@@ -38,6 +39,29 @@ class Message:
3839
     tool_calls: list[ToolCall] = field(default_factory=list)
3940
     tool_results: list[ToolResult] = field(default_factory=list)
4041
 
42
+    @classmethod
43
+    def tool_result_message(
44
+        cls,
45
+        *,
46
+        tool_call_id: str,
47
+        display_content: str,
48
+        result_content: str,
49
+        is_error: bool = False,
50
+    ) -> "Message":
51
+        """Build a tool-result message with a typed tool result payload."""
52
+
53
+        return cls(
54
+            role=Role.TOOL,
55
+            content=display_content,
56
+            tool_results=[
57
+                ToolResult(
58
+                    tool_call_id=tool_call_id,
59
+                    content=result_content,
60
+                    is_error=is_error,
61
+                )
62
+            ],
63
+        )
64
+
4165
     def to_dict(self) -> dict[str, Any]:
4266
         """Convert to dict for API calls."""
4367
         result: dict[str, Any] = {
@@ -49,6 +73,10 @@ class Message:
4973
                 {"id": tc.id, "name": tc.name, "arguments": tc.arguments}
5074
                 for tc in self.tool_calls
5175
             ]
76
+        if self.tool_results:
77
+            primary_result = self.tool_results[0]
78
+            result["tool_call_id"] = primary_result.tool_call_id
79
+            result["is_error"] = primary_result.is_error
5280
         return result
5381
 
5482
 
src/loader/runtime/__init__.pymodified
@@ -1,6 +1,10 @@
11
 """Runtime primitives for Loader's turn engine."""
22
 
3
-from .capabilities import CapabilityProfile, resolve_capability_profile
3
+from .capabilities import (
4
+    CapabilityProfile,
5
+    resolve_backend_capability_profile,
6
+    resolve_capability_profile,
7
+)
48
 from .events import AgentEvent, TurnSummary
59
 from .session import ConversationSession
610
 from .tracing import RuntimeTraceEvent, RuntimeTracer
@@ -12,5 +16,6 @@ __all__ = [
1216
     "RuntimeTraceEvent",
1317
     "RuntimeTracer",
1418
     "TurnSummary",
19
+    "resolve_backend_capability_profile",
1520
     "resolve_capability_profile",
1621
 ]
src/loader/runtime/capabilities.pymodified
@@ -3,12 +3,24 @@
33
 from __future__ import annotations
44
 
55
 from dataclasses import dataclass, field
6
-from typing import Any, Literal
6
+from typing import Any, Literal, Protocol
77
 
88
 ToolCallFormat = Literal["native", "json_tag", "bracket"]
99
 VerificationStrictness = Literal["lax", "standard", "strict"]
1010
 
1111
 
12
+class SupportsCapabilityProfile(Protocol):
13
+    """Runtime interface for backends that can describe capabilities."""
14
+
15
+    def capability_profile(self) -> CapabilityProfile: ...
16
+
17
+
18
+class SupportsNativeTools(Protocol):
19
+    """Runtime interface for backends that can explicitly report tool support."""
20
+
21
+    def supports_native_tools(self) -> bool: ...
22
+
23
+
1224
 @dataclass(frozen=True)
1325
 class CapabilityProfile:
1426
     """Resolved model/runtime capability profile."""
@@ -203,3 +215,30 @@ def resolve_capability_profile(
203215
         verification_strictness="standard",
204216
         notes=["Unknown model family; defaulting to safe ReAct-style tool use."],
205217
     )
218
+
219
+
220
+def resolve_backend_capability_profile(backend: Any) -> CapabilityProfile:
221
+    """Resolve capabilities from the backend first, then fall back to model heuristics."""
222
+
223
+    explicit_profile = getattr(backend, "capability_profile", None)
224
+    if callable(explicit_profile):
225
+        profile = explicit_profile()
226
+        if isinstance(profile, CapabilityProfile):
227
+            return profile
228
+
229
+    model_name = getattr(backend, "model", backend.__class__.__name__)
230
+    explicit_native_tools = getattr(backend, "supports_native_tools", None)
231
+    if callable(explicit_native_tools):
232
+        supports_native_tools = bool(explicit_native_tools())
233
+        preferred_tool_call_format: ToolCallFormat = (
234
+            "native" if supports_native_tools else "json_tag"
235
+        )
236
+        return _profile(
237
+            model_name,
238
+            supports_native_tools=supports_native_tools,
239
+            preferred_tool_call_format=preferred_tool_call_format,
240
+            verification_strictness="standard",
241
+            notes=["Resolved from backend capability surface."],
242
+        )
243
+
244
+    return resolve_capability_profile(model_name)
src/loader/runtime/conversation.pyadded
@@ -0,0 +1,705 @@
1
+"""Typed turn engine for Loader runtime execution."""
2
+
3
+from __future__ import annotations
4
+
5
+from collections.abc import Awaitable, Callable
6
+from dataclasses import dataclass, field
7
+from typing import Any
8
+
9
+from ..agent.parsing import parse_tool_calls
10
+from ..agent.reasoning import (
11
+    RollbackPlan,
12
+    TaskCompletionCheck,
13
+    create_rollback_plan_for_action,
14
+    detect_premature_completion,
15
+    estimate_complexity,
16
+    get_continuation_prompt,
17
+    get_token_budget,
18
+    is_destructive_tool,
19
+    should_self_critique,
20
+)
21
+from ..agent.recovery import RecoveryContext, format_failure_message, format_recovery_prompt
22
+from ..llm.base import Message, Role, ToolCall
23
+from .events import AgentEvent, TurnSummary
24
+from .executor import ToolExecutionState, ToolExecutor
25
+from .tracing import RuntimeTracer
26
+
27
+EventSink = Callable[[AgentEvent], Awaitable[None]]
28
+ConfirmationHandler = Callable[[str, str, str], Awaitable[bool]] | None
29
+
30
+
31
+@dataclass
32
+class AssistantTurn:
33
+    """Assistant output for one iteration of the conversation loop."""
34
+
35
+    content: str
36
+    response_content: str
37
+    tool_calls: list[ToolCall]
38
+    pending_tool_calls_seen: set[str] = field(default_factory=set)
39
+    usage: dict[str, int] = field(default_factory=dict)
40
+
41
+
42
+class ConversationRuntime:
43
+    """Runs one explicit conversation turn against the current session."""
44
+
45
+    def __init__(self, agent: Any) -> None:
46
+        self.agent = agent
47
+        self.tracer = RuntimeTracer()
48
+        self.executor = ToolExecutor(agent.registry, agent.safeguards, self.tracer)
49
+
50
+    async def run_turn(
51
+        self,
52
+        task: str,
53
+        emit: EventSink,
54
+        on_confirmation: ConfirmationHandler = None,
55
+        original_task: str | None = None,
56
+    ) -> TurnSummary:
57
+        """Run one task turn and return a structured summary."""
58
+
59
+        iterations = 0
60
+        final_response = ""
61
+        actions_taken: list[str] = []
62
+        continuation_count = 0
63
+        empty_retry_count = 0
64
+        max_empty_retries = 5
65
+        extracted_iterations = 0
66
+        max_extracted_iterations = 3
67
+        consecutive_errors = 0
68
+
69
+        complexity = estimate_complexity(task)
70
+        max_tokens, _ = get_token_budget(complexity)
71
+        effective_max_tokens = min(self.agent.config.max_tokens, max(max_tokens, 512))
72
+
73
+        rollback_plan = RollbackPlan() if self.agent.config.reasoning.rollback else None
74
+        summary = TurnSummary(final_response="")
75
+
76
+        while iterations < self.agent.config.max_iterations:
77
+            iterations += 1
78
+            summary.iterations = iterations
79
+            self.tracer.record("turn.iteration_started", iteration=iterations)
80
+
81
+            if iterations == 1 and len(self.agent.messages) == 1:
82
+                task_lower = task.lower()
83
+                action_keywords = [
84
+                    "create",
85
+                    "write",
86
+                    "make",
87
+                    "run",
88
+                    "execute",
89
+                    "build",
90
+                    "install",
91
+                    "delete",
92
+                    "remove",
93
+                    "add",
94
+                    "edit",
95
+                    "modify",
96
+                    "update",
97
+                    "fix",
98
+                ]
99
+                if any(keyword in task_lower for keyword in action_keywords):
100
+                    self.agent.session.append(Message(role=Role.ASSISTANT, content="["))
101
+
102
+            steering_messages = self.agent._drain_steering_queue()
103
+            for steering_message in steering_messages:
104
+                await emit(AgentEvent(type="steering", content=steering_message))
105
+                self.agent.session.append(
106
+                    Message(
107
+                        role=Role.USER,
108
+                        content=f"[USER INTERRUPTION]: {steering_message}",
109
+                    )
110
+                )
111
+
112
+            await emit(AgentEvent(type="thinking"))
113
+            assistant_turn = await self._request_assistant_turn(
114
+                emit=emit,
115
+                max_tokens=effective_max_tokens,
116
+            )
117
+            self._merge_usage(summary.usage, assistant_turn.usage)
118
+
119
+            content = assistant_turn.content
120
+            response_content = assistant_turn.response_content
121
+            tool_calls = list(assistant_turn.tool_calls)
122
+            pending_tool_calls_seen = set(assistant_turn.pending_tool_calls_seen)
123
+
124
+            if not content.strip():
125
+                empty_retry_count += 1
126
+                if empty_retry_count <= max_empty_retries:
127
+                    task_context = original_task or task
128
+                    retry_prompts = [
129
+                        "Great! Now let me proceed with the task. I'll start by using my tools.",
130
+                        "I understand. Let me create that now using my tools (write, bash, etc.).",
131
+                        f"Proceeding with: {task_context[:80]}. I'll use the write tool to create the files.",
132
+                        "Starting now. First step: create the necessary files and directories.",
133
+                        f"Let me complete this task step by step. The goal is: {task_context[:100]}",
134
+                    ]
135
+                    prompt = retry_prompts[min(empty_retry_count - 1, len(retry_prompts) - 1)]
136
+                    self.agent.session.append(Message(role=Role.ASSISTANT, content=prompt))
137
+                    continue
138
+
139
+                final_response = (
140
+                    "I need a bit more direction. What specifically would you like me to create or do?"
141
+                )
142
+                summary.final_response = final_response
143
+                summary.failures.append("assistant returned empty output repeatedly")
144
+                await emit(AgentEvent(type="response", content=final_response))
145
+                break
146
+
147
+            if self.agent.use_react:
148
+                parsed = parse_tool_calls(content)
149
+                tool_calls = parsed.tool_calls
150
+                content = parsed.content
151
+
152
+                if parsed.is_final_answer and not tool_calls:
153
+                    assistant_message = Message(role=Role.ASSISTANT, content=response_content)
154
+                    self.agent.session.append(assistant_message)
155
+                    summary.assistant_messages.append(assistant_message)
156
+                    final_response = content
157
+                    summary.final_response = final_response
158
+                    self.tracer.record("turn.completed", reason="final_answer")
159
+                    await emit(AgentEvent(type="response", content=final_response))
160
+                    break
161
+
162
+            tool_source = "native"
163
+            if not tool_calls:
164
+                raw_tool_calls = self.agent._extract_raw_json_tool_calls(response_content)
165
+                if raw_tool_calls:
166
+                    tool_calls = raw_tool_calls
167
+                    tool_source = "raw_text"
168
+                    await emit(AgentEvent(type="clear_stream"))
169
+
170
+            if tool_calls:
171
+                if tool_source == "raw_text":
172
+                    extracted_iterations += 1
173
+                    if extracted_iterations > max_extracted_iterations:
174
+                        final_response = (
175
+                            content
176
+                            + "\n\nLet me know if you'd like me to continue or make changes."
177
+                        )
178
+                        assistant_message = Message(role=Role.ASSISTANT, content=response_content)
179
+                        self.agent.session.append(assistant_message)
180
+                        summary.assistant_messages.append(assistant_message)
181
+                        summary.final_response = final_response
182
+                        summary.failures.append("raw tool extraction exceeded iteration budget")
183
+                        await emit(AgentEvent(type="response", content=final_response))
184
+                        break
185
+
186
+                assistant_message = Message(
187
+                    role=Role.ASSISTANT,
188
+                    content=response_content,
189
+                    tool_calls=tool_calls,
190
+                )
191
+                self.agent.session.append(assistant_message)
192
+                summary.assistant_messages.append(assistant_message)
193
+                self.tracer.record(
194
+                    "assistant.tool_batch",
195
+                    tool_count=len(tool_calls),
196
+                    source=tool_source,
197
+                )
198
+
199
+                for tool_call in tool_calls:
200
+                    cfg = self.agent.config.reasoning
201
+
202
+                    if cfg.confidence_scoring:
203
+                        context = "\n".join(
204
+                            message.content[:500]
205
+                            for message in self.agent.messages[-5:]
206
+                            if message.content
207
+                        )
208
+                        confidence = await self.agent._assess_confidence(
209
+                            tool_call.name,
210
+                            tool_call.arguments,
211
+                            context,
212
+                        )
213
+                        await emit(
214
+                            AgentEvent(
215
+                                type="confidence",
216
+                                content=f"Confidence: {confidence.level.name} ({confidence.score}/5)",
217
+                                confidence=confidence,
218
+                                tool_name=tool_call.name,
219
+                            )
220
+                        )
221
+                        if confidence.score < cfg.min_confidence_for_action:
222
+                            low_confidence_message = (
223
+                                "[LOW CONFIDENCE WARNING] The planned action has low confidence "
224
+                                f"({confidence.level.name}).\n"
225
+                                f"Reasoning: {confidence.reasoning}\n"
226
+                                f"Risks: {', '.join(confidence.risks)}\n"
227
+                                "Consider an alternative approach or gather more information first."
228
+                            )
229
+                            self.agent.session.append(
230
+                                Message(role=Role.USER, content=low_confidence_message)
231
+                            )
232
+                            continue
233
+
234
+                    if tool_call.id not in pending_tool_calls_seen:
235
+                        await emit(
236
+                            AgentEvent(
237
+                                type="tool_call",
238
+                                tool_name=tool_call.name,
239
+                                tool_args=tool_call.arguments,
240
+                            )
241
+                        )
242
+
243
+                    actions_taken.append(f"{tool_call.name}: {str(tool_call.arguments)[:100]}")
244
+
245
+                    if rollback_plan and is_destructive_tool(tool_call.name, tool_call.arguments):
246
+
247
+                        async def read_file_for_backup(path: str) -> str:
248
+                            read_result = await self.agent.registry.execute("read", file_path=path)
249
+                            return read_result.output if not read_result.is_error else ""
250
+
251
+                        rollback_action = await create_rollback_plan_for_action(
252
+                            tool_call.name,
253
+                            tool_call.arguments,
254
+                            read_file_for_backup,
255
+                        )
256
+                        if rollback_action:
257
+                            rollback_plan.actions.append(rollback_action)
258
+                            if self.agent.config.reasoning.show_rollback_plan:
259
+                                await emit(
260
+                                    AgentEvent(
261
+                                        type="rollback",
262
+                                        content=f"Rollback tracked: {rollback_action.description}",
263
+                                        rollback_action=rollback_action,
264
+                                    )
265
+                                )
266
+
267
+                    outcome = await self.executor.execute_tool_call(
268
+                        tool_call,
269
+                        on_confirmation=on_confirmation,
270
+                        emit_confirmation=self._emit_confirmation(emit),
271
+                        source=tool_source,
272
+                    )
273
+
274
+                    if (
275
+                        outcome.state == ToolExecutionState.EXECUTED
276
+                        and outcome.is_error
277
+                        and self.agent.config.auto_recover
278
+                    ):
279
+                        recovery_result = await self._handle_recovery(tool_call, outcome, emit)
280
+                        if recovery_result is not None:
281
+                            summary.tool_result_messages.append(recovery_result)
282
+                            self.agent.session.append(recovery_result)
283
+                            continue
284
+
285
+                    if outcome.state == ToolExecutionState.EXECUTED and not outcome.is_error:
286
+                        self.agent._recovery_context = None
287
+                        is_loop, loop_description = self.agent.safeguards.detect_loop()
288
+                        if is_loop:
289
+                            final_response = (
290
+                                "I noticed I was repeating the same actions. "
291
+                                "Let me know what you'd like me to do differently."
292
+                            )
293
+                            summary.final_response = final_response
294
+                            summary.failures.append(loop_description)
295
+                            loop_message = Message(role=Role.ASSISTANT, content=final_response)
296
+                            self.agent.session.append(loop_message)
297
+                            summary.assistant_messages.append(loop_message)
298
+                            await emit(
299
+                                AgentEvent(
300
+                                    type="error",
301
+                                    content=(
302
+                                        f"Loop detected: {loop_description}. "
303
+                                        "Stopping to prevent repetitive behavior."
304
+                                    ),
305
+                                )
306
+                            )
307
+                            await emit(AgentEvent(type="response", content=final_response))
308
+                            return self._finalize_summary(summary)
309
+
310
+                    if outcome.is_error:
311
+                        consecutive_errors += 1
312
+                    else:
313
+                        consecutive_errors = 0
314
+
315
+                    await emit(
316
+                        AgentEvent(
317
+                            type="tool_result",
318
+                            content=outcome.event_content,
319
+                            tool_name=tool_call.name,
320
+                            is_error=outcome.is_error,
321
+                        )
322
+                    )
323
+
324
+                    if (
325
+                        cfg.verification
326
+                        and outcome.state == ToolExecutionState.EXECUTED
327
+                        and not outcome.is_error
328
+                    ):
329
+                        verification = await self.agent._verify_action(
330
+                            tool_call.name,
331
+                            tool_call.arguments,
332
+                            outcome.result_output,
333
+                        )
334
+                        await emit(
335
+                            AgentEvent(
336
+                                type="verification",
337
+                                content=f"Verified: {verification.verified}",
338
+                                verification=verification,
339
+                                tool_name=tool_call.name,
340
+                            )
341
+                        )
342
+                        if not verification.verified and verification.needs_correction:
343
+                            correction_message = (
344
+                                "[VERIFICATION FAILED] The action did not produce expected results.\n"
345
+                                f"Discrepancies: {', '.join(verification.discrepancies)}\n"
346
+                                f"Suggestion: {verification.correction_suggestion}"
347
+                            )
348
+                            self.agent.session.append(
349
+                                Message(role=Role.USER, content=correction_message)
350
+                            )
351
+                            continue
352
+
353
+                    self.agent.session.append(outcome.message)
354
+                    summary.tool_result_messages.append(outcome.message)
355
+
356
+                if consecutive_errors >= 3:
357
+                    final_response = (
358
+                        "I ran into some issues. Let me know if you'd like me to try a different approach."
359
+                    )
360
+                    summary.final_response = final_response
361
+                    summary.failures.append("three consecutive tool errors")
362
+                    await emit(AgentEvent(type="response", content=final_response))
363
+                    break
364
+
365
+                continue
366
+
367
+            if self.agent._contains_unexecuted_code(response_content):
368
+                if iterations < self.agent.config.max_iterations - 1:
369
+                    self.agent.session.append(Message(role=Role.ASSISTANT, content=response_content))
370
+                    self.agent.session.append(
371
+                        Message(
372
+                            role=Role.USER,
373
+                            content=(
374
+                                "CRITICAL ERROR: You are PRETENDING to use tools instead of actually "
375
+                                "using them.\n\n"
376
+                                "DO NOT write:\n"
377
+                                "- 'Used bash tool with command...' (THIS IS FAKE)\n"
378
+                                "- 'Created a file using the write tool...' (THIS IS FAKE)\n"
379
+                                "- 'Here is what I did:' followed by descriptions\n"
380
+                                "- Numbered steps or instructions\n"
381
+                                "- Code blocks for me to copy\n\n"
382
+                                "Your tool calls MUST go through the proper tool interface.\n"
383
+                                "Writing 'Used bash tool...' does NOT execute anything!\n\n"
384
+                                "ACTUALLY call the tools using the tool_call mechanism.\n"
385
+                                "DO IT NOW - stop narrating and start executing."
386
+                            ),
387
+                        )
388
+                    )
389
+                    continue
390
+
391
+            if (
392
+                not self.agent.use_react
393
+                and len(actions_taken) == 0
394
+                and iterations < self.agent.config.max_iterations - 2
395
+            ):
396
+                deflection_phrases = ["you can", "you should", "you could", "try running"]
397
+                if any(phrase in content.lower() for phrase in deflection_phrases):
398
+                    self.agent.session.append(Message(role=Role.ASSISTANT, content=response_content))
399
+                    self.agent.session.append(
400
+                        Message(
401
+                            role=Role.USER,
402
+                            content="Please use your tools to execute the task rather than telling me what to do.",
403
+                        )
404
+                    )
405
+                    continue
406
+
407
+            cfg = self.agent.config.reasoning
408
+            if cfg.self_critique and len(content) > 100:
409
+                is_code_response = "```" in content or any(
410
+                    keyword in content.lower()
411
+                    for keyword in ["def ", "function ", "class ", "import "]
412
+                )
413
+                if should_self_critique(content, is_code=is_code_response):
414
+                    critique = await self.agent._self_critique(content, task)
415
+                    await emit(
416
+                        AgentEvent(
417
+                            type="critique",
418
+                            content=f"Self-critique: {len(critique.issues_found)} issues found",
419
+                            critique=critique,
420
+                        )
421
+                    )
422
+                    if critique.can_revise():
423
+                        revision_message = (
424
+                            "[SELF-CRITIQUE] Review your response:\n"
425
+                            f"Issues found: {', '.join(critique.issues_found)}\n"
426
+                            f"Suggestions: {', '.join(critique.suggestions)}\n\n"
427
+                            "Please provide an improved response addressing these issues."
428
+                        )
429
+                        self.agent.session.append(Message(role=Role.ASSISTANT, content=response_content))
430
+                        self.agent.session.append(Message(role=Role.USER, content=revision_message))
431
+                        critique.revision_count += 1
432
+                        continue
433
+
434
+            is_text_loop, loop_description = self.agent.safeguards.detect_text_loop(content)
435
+            if is_text_loop:
436
+                final_response = (
437
+                    "I seem to be repeating myself. Let me know if you'd like me to try a different approach."
438
+                )
439
+                summary.final_response = final_response
440
+                summary.failures.append(loop_description)
441
+                final_message = Message(role=Role.ASSISTANT, content=final_response)
442
+                self.agent.session.append(final_message)
443
+                summary.assistant_messages.append(final_message)
444
+                await emit(
445
+                    AgentEvent(
446
+                        type="error",
447
+                        content=f"Text loop detected: {loop_description}. Stopping.",
448
+                    )
449
+                )
450
+                await emit(AgentEvent(type="response", content=final_response))
451
+                return self._finalize_summary(summary)
452
+
453
+            self.agent.safeguards.record_response(content)
454
+            effective_task = original_task or task
455
+            if cfg.completion_check and continuation_count < cfg.max_continuation_prompts:
456
+                is_premature = (
457
+                    detect_premature_completion(effective_task, content, actions_taken)
458
+                    if cfg.use_quick_completion
459
+                    else False
460
+                )
461
+                if is_premature:
462
+                    continuation_count += 1
463
+                    continuation_prompt = get_continuation_prompt(
464
+                        effective_task,
465
+                        actions_taken,
466
+                        content,
467
+                    )
468
+                    await emit(
469
+                        AgentEvent(
470
+                            type="completion_check",
471
+                            content=f"Task may be incomplete ({len(actions_taken)} actions taken)",
472
+                            completion_check=TaskCompletionCheck(
473
+                                original_task=effective_task,
474
+                                is_complete=False,
475
+                                accomplished=[action.split(":")[0] for action in actions_taken],
476
+                                continuation_prompt=continuation_prompt,
477
+                            ),
478
+                        )
479
+                    )
480
+                    self.agent.session.append(Message(role=Role.ASSISTANT, content=response_content))
481
+                    self.agent.session.append(Message(role=Role.USER, content=continuation_prompt))
482
+                    continue
483
+
484
+            final_response = content
485
+            if actions_taken and final_response.strip() and not final_response.rstrip().endswith("?"):
486
+                final_response = (
487
+                    final_response.rstrip()
488
+                    + "\n\nWould you like me to make any changes or additions?"
489
+                )
490
+
491
+            final_message = Message(role=Role.ASSISTANT, content=response_content)
492
+            self.agent.session.append(final_message)
493
+            summary.assistant_messages.append(final_message)
494
+
495
+            if rollback_plan and rollback_plan.actions:
496
+                await emit(
497
+                    AgentEvent(
498
+                        type="rollback_summary",
499
+                        content=f"Rollback plan: {len(rollback_plan.actions)} action(s) tracked",
500
+                        rollback_plan=rollback_plan,
501
+                    )
502
+                )
503
+
504
+            summary.final_response = final_response
505
+            await emit(AgentEvent(type="response", content=final_response))
506
+            break
507
+
508
+        return self._finalize_summary(summary)
509
+
510
+    async def _request_assistant_turn(
511
+        self,
512
+        *,
513
+        emit: EventSink,
514
+        max_tokens: int,
515
+    ) -> AssistantTurn:
516
+        self.agent.safeguards.code_filter.reset()
517
+        tools = None if self.agent.use_react else self.agent.registry.get_schemas()
518
+        self.tracer.record(
519
+            "assistant.requested",
520
+            use_react=self.agent.use_react,
521
+            stream=self.agent.config.stream,
522
+            max_tokens=max_tokens,
523
+        )
524
+
525
+        if self.agent.config.stream:
526
+            full_content = ""
527
+            full_content_unfiltered = ""
528
+            tool_calls: list[ToolCall] = []
529
+            pending_tool_calls_seen: set[str] = set()
530
+
531
+            async for chunk in self.agent.backend.stream(
532
+                messages=self.agent.session.build_request_messages(),
533
+                tools=tools,
534
+                temperature=self.agent.config.temperature,
535
+                max_tokens=max_tokens,
536
+            ):
537
+                filtered_content = ""
538
+                if chunk.content:
539
+                    filtered_content = self.agent.safeguards.filter_stream_chunk(chunk.content)
540
+                    full_content_unfiltered += chunk.content
541
+
542
+                if filtered_content or chunk.is_done:
543
+                    await emit(
544
+                        AgentEvent(
545
+                            type="stream",
546
+                            content=filtered_content,
547
+                            is_stream_end=chunk.is_done,
548
+                        )
549
+                    )
550
+
551
+                if self.agent.safeguards.should_steer():
552
+                    steering_message = self.agent.safeguards.get_steering_message()
553
+                    if steering_message:
554
+                        self.agent._steering_queue.put_nowait(steering_message)
555
+
556
+                if chunk.pending_tool_call and chunk.pending_tool_call.id not in pending_tool_calls_seen:
557
+                    pending_tool_calls_seen.add(chunk.pending_tool_call.id)
558
+                    await emit(
559
+                        AgentEvent(
560
+                            type="tool_call",
561
+                            tool_name=chunk.pending_tool_call.name,
562
+                            tool_args=chunk.pending_tool_call.arguments,
563
+                        )
564
+                    )
565
+
566
+                if chunk.is_done:
567
+                    full_content = chunk.full_content or full_content_unfiltered
568
+                    tool_calls = chunk.tool_calls
569
+
570
+            self.tracer.record(
571
+                "assistant.responded",
572
+                stream=True,
573
+                tool_call_count=len(tool_calls),
574
+                content_length=len(full_content),
575
+            )
576
+            return AssistantTurn(
577
+                content=full_content,
578
+                response_content=full_content,
579
+                tool_calls=tool_calls,
580
+                pending_tool_calls_seen=pending_tool_calls_seen,
581
+            )
582
+
583
+        response = await self.agent.backend.complete(
584
+            messages=self.agent.session.build_request_messages(),
585
+            tools=tools,
586
+            temperature=self.agent.config.temperature,
587
+            max_tokens=max_tokens,
588
+        )
589
+        response_content = response.content
590
+        content = self.agent.safeguards.filter_complete_content(response.content)
591
+        tool_calls = response.tool_calls if not self.agent.use_react else []
592
+        if self.agent.safeguards.should_steer():
593
+            steering_message = self.agent.safeguards.get_steering_message()
594
+            if steering_message:
595
+                self.agent._steering_queue.put_nowait(steering_message)
596
+        self.tracer.record(
597
+            "assistant.responded",
598
+            stream=False,
599
+            tool_call_count=len(tool_calls),
600
+            content_length=len(content),
601
+        )
602
+        return AssistantTurn(
603
+            content=content,
604
+            response_content=response_content,
605
+            tool_calls=tool_calls,
606
+            usage=response.usage,
607
+        )
608
+
609
+    async def _handle_recovery(
610
+        self,
611
+        tool_call: ToolCall,
612
+        outcome,
613
+        emit: EventSink,
614
+    ) -> Message | None:
615
+        if self.agent._recovery_context is None:
616
+            self.agent._recovery_context = RecoveryContext(
617
+                original_tool=tool_call.name,
618
+                original_args=tool_call.arguments,
619
+                max_retries=self.agent.config.max_recovery_attempts,
620
+            )
621
+
622
+        if self.agent._recovery_context.is_similar_attempt(tool_call.name, tool_call.arguments):
623
+            await emit(
624
+                AgentEvent(
625
+                    type="error",
626
+                    content=(
627
+                        "Loop detected: already tried a similar command. "
628
+                        "Try a DIFFERENT approach (e.g., read a config file first)."
629
+                    ),
630
+                    tool_name=tool_call.name,
631
+                )
632
+            )
633
+        else:
634
+            self.agent._recovery_context.add_attempt(
635
+                tool_call.name,
636
+                tool_call.arguments,
637
+                outcome.result_output,
638
+            )
639
+
640
+        if self.agent._recovery_context.can_retry():
641
+            attempt_number = len(self.agent._recovery_context.attempts)
642
+            await emit(
643
+                AgentEvent(
644
+                    type="recovery",
645
+                    content=(
646
+                        "Tool failed, attempting recovery "
647
+                        f"({attempt_number}/{self.agent._recovery_context.max_retries})"
648
+                    ),
649
+                    tool_name=tool_call.name,
650
+                    recovery_attempt=attempt_number,
651
+                )
652
+            )
653
+            recovery_prompt = format_recovery_prompt(
654
+                self.agent._recovery_context,
655
+                tool_call.name,
656
+                tool_call.arguments,
657
+                outcome.result_output,
658
+            )
659
+            return Message.tool_result_message(
660
+                tool_call_id=tool_call.id,
661
+                display_content=recovery_prompt,
662
+                result_content=recovery_prompt,
663
+                is_error=True,
664
+            )
665
+
666
+        failure_message = format_failure_message(self.agent._recovery_context)
667
+        await emit(
668
+            AgentEvent(
669
+                type="error",
670
+                content=failure_message,
671
+                tool_name=tool_call.name,
672
+            )
673
+        )
674
+        self.agent._recovery_context = None
675
+        return Message.tool_result_message(
676
+            tool_call_id=tool_call.id,
677
+            display_content=(
678
+                f"Observation [{tool_call.name}]: Error: {failure_message}"
679
+            ),
680
+            result_content=failure_message,
681
+            is_error=True,
682
+        )
683
+
684
+    def _finalize_summary(self, summary: TurnSummary) -> TurnSummary:
685
+        summary.trace = list(self.tracer.events)
686
+        return summary
687
+
688
+    @staticmethod
689
+    def _merge_usage(target: dict[str, int], update: dict[str, int]) -> None:
690
+        for key, value in update.items():
691
+            target[key] = target.get(key, 0) + value
692
+
693
+    @staticmethod
694
+    def _emit_confirmation(emit: EventSink):
695
+        async def _emit(tool_name: str, message: str, details: str) -> None:
696
+            await emit(
697
+                AgentEvent(
698
+                    type="confirmation",
699
+                    tool_name=tool_name,
700
+                    confirm_message=message,
701
+                    confirm_details=details,
702
+                )
703
+            )
704
+
705
+        return _emit
src/loader/runtime/executor.pyadded
@@ -0,0 +1,217 @@
1
+"""Unified tool execution path for runtime turns."""
2
+
3
+from __future__ import annotations
4
+
5
+from collections.abc import Awaitable, Callable
6
+from dataclasses import dataclass
7
+from enum import StrEnum
8
+
9
+from ..agent.parsing import format_tool_result
10
+from ..agent.recovery import ErrorCategory, categorize_error
11
+from ..llm.base import Message, ToolCall
12
+from ..tools.base import ConfirmationRequired, ToolRegistry
13
+from ..tools.base import ToolResult as RegistryToolResult
14
+from .tracing import RuntimeTracer
15
+
16
+BrowserConfirmation = Callable[[str, str, str], Awaitable[bool]] | None
17
+ConfirmationEmitter = Callable[[str, str, str], Awaitable[None]] | None
18
+
19
+
20
+class ToolExecutionState(StrEnum):
21
+    """Outcome states for one tool call."""
22
+
23
+    EXECUTED = "executed"
24
+    DUPLICATE = "duplicate"
25
+    BLOCKED = "blocked"
26
+    DECLINED = "declined"
27
+
28
+
29
+@dataclass
30
+class ToolExecutionOutcome:
31
+    """Structured outcome for one tool call."""
32
+
33
+    tool_call: ToolCall
34
+    state: ToolExecutionState
35
+    message: Message
36
+    event_content: str
37
+    is_error: bool
38
+    result_output: str
39
+    error_category: ErrorCategory | None = None
40
+    registry_result: RegistryToolResult | None = None
41
+
42
+
43
+class ToolExecutor:
44
+    """Centralizes duplicate checks, validation, execution, and result messages."""
45
+
46
+    def __init__(self, registry: ToolRegistry, safeguards, tracer: RuntimeTracer) -> None:
47
+        self.registry = registry
48
+        self.safeguards = safeguards
49
+        self.tracer = tracer
50
+
51
+    async def execute_tool_call(
52
+        self,
53
+        tool_call: ToolCall,
54
+        *,
55
+        on_confirmation: BrowserConfirmation = None,
56
+        emit_confirmation: ConfirmationEmitter = None,
57
+        source: str,
58
+    ) -> ToolExecutionOutcome:
59
+        """Execute a tool call through one consistent runtime path."""
60
+
61
+        self.tracer.record(
62
+            "tool.received",
63
+            tool_name=tool_call.name,
64
+            tool_call_id=tool_call.id,
65
+            source=source,
66
+        )
67
+
68
+        browser_block = self._browser_command_message(tool_call)
69
+        if browser_block is not None:
70
+            return self._blocked_outcome(tool_call, browser_block)
71
+
72
+        is_duplicate, duplicate_reason = self.safeguards.check_duplicate(
73
+            tool_call.name,
74
+            tool_call.arguments,
75
+        )
76
+        if is_duplicate:
77
+            self.tracer.record(
78
+                "tool.duplicate",
79
+                tool_name=tool_call.name,
80
+                tool_call_id=tool_call.id,
81
+                reason=duplicate_reason,
82
+            )
83
+            duplicate_message = f"[Skipped - duplicate action: {duplicate_reason}]"
84
+            return ToolExecutionOutcome(
85
+                tool_call=tool_call,
86
+                state=ToolExecutionState.DUPLICATE,
87
+                message=Message.tool_result_message(
88
+                    tool_call_id=tool_call.id,
89
+                    display_content=duplicate_message,
90
+                    result_content=duplicate_message,
91
+                ),
92
+                event_content=duplicate_message,
93
+                is_error=False,
94
+                result_output=duplicate_message,
95
+            )
96
+
97
+        validation = self.safeguards.validate_action(tool_call.name, tool_call.arguments)
98
+        if not validation.valid:
99
+            error_message = f"[Blocked - {validation.reason}]"
100
+            if validation.suggestion:
101
+                error_message += f" Suggestion: {validation.suggestion}"
102
+            self.tracer.record(
103
+                "tool.blocked",
104
+                tool_name=tool_call.name,
105
+                tool_call_id=tool_call.id,
106
+                reason=validation.reason,
107
+            )
108
+            return self._blocked_outcome(tool_call, error_message)
109
+
110
+        result = await self._execute_registry(
111
+            tool_call,
112
+            on_confirmation,
113
+            emit_confirmation,
114
+        )
115
+        result_text = format_tool_result(
116
+            tool_call.name,
117
+            result.output,
118
+            result.is_error,
119
+        )
120
+        if not result.is_error:
121
+            self.safeguards.record_action(tool_call.name, tool_call.arguments)
122
+
123
+        category = categorize_error(result.output) if result.is_error else None
124
+        state = ToolExecutionState.EXECUTED
125
+        if result.output == f"Tool {tool_call.name} was declined by user":
126
+            state = ToolExecutionState.DECLINED
127
+
128
+        self.tracer.record(
129
+            "tool.executed",
130
+            tool_name=tool_call.name,
131
+            tool_call_id=tool_call.id,
132
+            state=state,
133
+            is_error=result.is_error,
134
+        )
135
+        return ToolExecutionOutcome(
136
+            tool_call=tool_call,
137
+            state=state,
138
+            message=Message.tool_result_message(
139
+                tool_call_id=tool_call.id,
140
+                display_content=result_text,
141
+                result_content=result.output,
142
+                is_error=result.is_error,
143
+            ),
144
+            event_content=result.output,
145
+            is_error=result.is_error,
146
+            result_output=result.output,
147
+            error_category=category,
148
+            registry_result=result,
149
+        )
150
+
151
+    def _blocked_outcome(self, tool_call: ToolCall, message: str) -> ToolExecutionOutcome:
152
+        return ToolExecutionOutcome(
153
+            tool_call=tool_call,
154
+            state=ToolExecutionState.BLOCKED,
155
+            message=Message.tool_result_message(
156
+                tool_call_id=tool_call.id,
157
+                display_content=message,
158
+                result_content=message,
159
+                is_error=True,
160
+            ),
161
+            event_content=message,
162
+            is_error=True,
163
+            result_output=message,
164
+            error_category=categorize_error(message),
165
+        )
166
+
167
+    async def _execute_registry(
168
+        self,
169
+        tool_call: ToolCall,
170
+        on_confirmation: BrowserConfirmation,
171
+        emit_confirmation: ConfirmationEmitter,
172
+    ) -> RegistryToolResult:
173
+        try:
174
+            return await self.registry.execute(tool_call.name, **tool_call.arguments)
175
+        except ConfirmationRequired as confirmation:
176
+            self.tracer.record(
177
+                "tool.confirmation_requested",
178
+                tool_name=confirmation.tool_name,
179
+                tool_call_id=tool_call.id,
180
+            )
181
+            if emit_confirmation:
182
+                await emit_confirmation(
183
+                    confirmation.tool_name,
184
+                    confirmation.message,
185
+                    confirmation.details,
186
+                )
187
+            if on_confirmation:
188
+                confirmed = await on_confirmation(
189
+                    confirmation.tool_name,
190
+                    confirmation.message,
191
+                    confirmation.details,
192
+                )
193
+            else:
194
+                confirmed = True
195
+
196
+            if not confirmed:
197
+                return RegistryToolResult(
198
+                    output=f"Tool {tool_call.name} was declined by user",
199
+                    is_error=False,
200
+                )
201
+
202
+            previous_skip = self.registry.skip_confirmation
203
+            self.registry.skip_confirmation = True
204
+            try:
205
+                return await self.registry.execute(tool_call.name, **tool_call.arguments)
206
+            finally:
207
+                self.registry.skip_confirmation = previous_skip
208
+
209
+    def _browser_command_message(self, tool_call: ToolCall) -> str | None:
210
+        if tool_call.name != "bash":
211
+            return None
212
+
213
+        command = str(tool_call.arguments.get("command", ""))
214
+        browser_terms = ["xdg-open", "open ", "firefox", "chrome", "browser"]
215
+        if any(term in command for term in browser_terms):
216
+            return "[Blocked - Browser/display commands are not supported in the terminal runtime]"
217
+        return None