| 1 | """Public Loader agent facade. |
| 2 | |
| 3 | The long-term shell boundary is now intentional: |
| 4 | |
| 5 | - `Agent` owns public construction, workspace/config bootstrap, and compatibility |
| 6 | properties used by the CLI, UI, and tests. |
| 7 | - `Agent` delegates prompt/session lifecycle plus runtime entrypoints to |
| 8 | `loader.runtime.public_shell`. |
| 9 | - `Agent` does not directly own turn orchestration, workflow routing, completion |
| 10 | policy, or explore runtime behavior. |
| 11 | """ |
| 12 | |
| 13 | from collections.abc import AsyncIterator, Awaitable, Callable |
| 14 | from dataclasses import dataclass |
| 15 | from pathlib import Path |
| 16 | from typing import Any |
| 17 | |
| 18 | from ..context.project import ProjectContext, detect_project |
| 19 | from ..llm.base import LLMBackend, Message |
| 20 | from ..runtime.capabilities import resolve_backend_capability_profile |
| 21 | from ..runtime.events import AgentEvent, TurnSummary |
| 22 | from ..runtime.permissions import ( |
| 23 | PermissionMode, |
| 24 | build_permission_policy, |
| 25 | load_permission_rules, |
| 26 | ) |
| 27 | from ..runtime.public_shell import ( |
| 28 | SteeringMailbox, |
| 29 | build_fresh_runtime_session_install, |
| 30 | clear_runtime_shell_history, |
| 31 | get_runtime_shell_few_shot_examples, |
| 32 | get_runtime_shell_system_message, |
| 33 | refresh_runtime_shell_capability_profile, |
| 34 | resolve_runtime_shell_use_react, |
| 35 | resume_runtime_shell_session, |
| 36 | run_runtime_shell, |
| 37 | run_runtime_shell_explore, |
| 38 | set_runtime_shell_workflow_mode, |
| 39 | stream_runtime_shell, |
| 40 | ) |
| 41 | from ..runtime.safeguards import RuntimeSafeguards |
| 42 | from ..runtime.steering import SteeringDirective |
| 43 | from ..runtime.workflow import WorkflowMode |
| 44 | from ..tools.base import ToolRegistry, create_default_registry |
| 45 | |
| 46 | |
| 47 | @dataclass |
| 48 | class ReasoningConfig: |
| 49 | """Configuration for reasoning stages.""" |
| 50 | # Decomposition: break complex tasks into atomic subtasks |
| 51 | decomposition: bool = False |
| 52 | decomposition_threshold: int = 30 # Word count threshold for auto-decomposition |
| 53 | |
| 54 | # Self-critique: review output before finalizing |
| 55 | self_critique: bool = False |
| 56 | max_critique_revisions: int = 2 |
| 57 | |
| 58 | # Confidence scoring: rate certainty before actions |
| 59 | confidence_scoring: bool = False |
| 60 | min_confidence_for_action: int = 2 # Minimum ConfidenceLevel value to proceed |
| 61 | use_quick_confidence: bool = True # Use heuristics before LLM |
| 62 | |
| 63 | # Post-action verification: check results after execution |
| 64 | verification: bool = False |
| 65 | use_quick_verification: bool = True # Use heuristics before LLM |
| 66 | |
| 67 | # Task completion: prevent premature stopping |
| 68 | completion_check: bool = True # ON by default - prevents "giving up" |
| 69 | use_quick_completion: bool = True # Use heuristics before LLM |
| 70 | max_continuation_prompts: int = 5 # Max times to nudge agent to continue |
| 71 | |
| 72 | # Rollback planning: track how to undo destructive actions |
| 73 | rollback: bool = True # ON by default - track undo capability |
| 74 | show_rollback_plan: bool = False # Show rollback plan in output (verbose) |
| 75 | |
| 76 | |
| 77 | @dataclass |
| 78 | class AgentConfig: |
| 79 | """Configuration for the agent.""" |
| 80 | max_iterations: int = 200 # High cap; text loop detector is the real termination mechanism |
| 81 | temperature: float = 0.3 # Low for better instruction following |
| 82 | max_tokens: int = 2048 # Reduced from 4096, most responses are shorter |
| 83 | force_react: bool = False # Force ReAct even if model supports native tools |
| 84 | auto_context: bool = True # Auto-detect project context on startup |
| 85 | auto_plan: bool = False # Auto-plan complex tasks (disabled by default - confuses smaller models) |
| 86 | auto_recover: bool = True # Auto-recover from tool errors |
| 87 | max_recovery_attempts: int = 2 # Reduced from 3 |
| 88 | verification_retry_budget: int = 3 # Retry budget for verify/fix loop |
| 89 | clarify_max_rounds: int = 2 # Bounded clarify depth before carrying ambiguity forward |
| 90 | permission_mode: PermissionMode = PermissionMode.WORKSPACE_WRITE |
| 91 | workflow_mode_override: str | None = None |
| 92 | stream: bool = True # Stream LLM responses for real-time output |
| 93 | session_rotate_after_bytes: int = 256 * 1024 |
| 94 | session_auto_compaction_input_tokens_threshold: int = 100_000 # Upper bound; runtime clamps to model capacity |
| 95 | session_compaction_keep_last_messages: int = 4 |
| 96 | |
| 97 | # Reasoning stages configuration |
| 98 | reasoning: ReasoningConfig = None # type: ignore |
| 99 | |
| 100 | def __post_init__(self): |
| 101 | if self.reasoning is None: |
| 102 | self.reasoning = ReasoningConfig() |
| 103 | |
| 104 | |
| 105 | class Agent: |
| 106 | """Thin public facade over the runtime-owned shell and launcher.""" |
| 107 | |
| 108 | def __init__( |
| 109 | self, |
| 110 | backend: LLMBackend, |
| 111 | registry: ToolRegistry | None = None, |
| 112 | config: AgentConfig | None = None, |
| 113 | project_root: Path | str | None = None, |
| 114 | ): |
| 115 | self.backend = backend |
| 116 | self.config = config or AgentConfig() |
| 117 | self.project_root = Path(project_root or ".").expanduser().resolve() |
| 118 | self.registry = registry or create_default_registry(self.project_root) |
| 119 | self.registry.configure_workspace_root(self.project_root) |
| 120 | self.permission_config_status = load_permission_rules(self.project_root) |
| 121 | if not self.permission_config_status.valid: |
| 122 | raise ValueError( |
| 123 | "Invalid permission policy configuration at " |
| 124 | f"{self.permission_config_status.source_path}: " |
| 125 | f"{self.permission_config_status.error}" |
| 126 | ) |
| 127 | self.permission_policy = build_permission_policy( |
| 128 | active_mode=self.config.permission_mode, |
| 129 | workspace_root=self.project_root, |
| 130 | tool_requirements=self.registry.get_tool_requirements(), |
| 131 | rules=self.permission_config_status.rules, |
| 132 | ) |
| 133 | self.workflow_mode = WorkflowMode.EXECUTE.value |
| 134 | self.messages: list[Message] = [] |
| 135 | self.prompt_format: str | None = None |
| 136 | self.prompt_sections: list[str] = [] |
| 137 | self._system_message: Message | None = None |
| 138 | self._use_react: bool | None = None |
| 139 | self.capability_profile = resolve_backend_capability_profile(self.backend) |
| 140 | self.last_turn_summary: TurnSummary | None = None |
| 141 | self.steering = SteeringMailbox() |
| 142 | |
| 143 | # Track original task for multi-turn conversations |
| 144 | self._current_task: str | None = None |
| 145 | |
| 146 | # Runtime safeguards for filtering, steering, and deduplication |
| 147 | self.safeguards = RuntimeSafeguards() |
| 148 | |
| 149 | self.session = build_fresh_runtime_session_install(self).session |
| 150 | |
| 151 | # Load project context if enabled |
| 152 | self.project_context: ProjectContext | None = None |
| 153 | if self.config.auto_context: |
| 154 | self.project_context = detect_project(self.project_root) |
| 155 | |
| 156 | def resume_session(self, session_id: str | None = None) -> bool: |
| 157 | """Resume the latest or named persisted session.""" |
| 158 | return resume_runtime_shell_session(self, session_id=session_id) |
| 159 | |
| 160 | def steer(self, message: str) -> bool: |
| 161 | """Send a steering message to the agent during execution. |
| 162 | |
| 163 | Returns True if the agent is running and the message was queued, |
| 164 | False if the agent is not running. |
| 165 | """ |
| 166 | return self.steering.steer(message) |
| 167 | |
| 168 | @property |
| 169 | def is_running(self) -> bool: |
| 170 | """Check if the agent is currently running.""" |
| 171 | return self.steering.is_running |
| 172 | |
| 173 | @property |
| 174 | def current_task(self) -> str | None: |
| 175 | """Expose the current top-level task through the bootstrap contract.""" |
| 176 | |
| 177 | return self._current_task |
| 178 | |
| 179 | @current_task.setter |
| 180 | def current_task(self, value: str | None) -> None: |
| 181 | if self._current_task == value: |
| 182 | return |
| 183 | self._current_task = value |
| 184 | self._system_message = None |
| 185 | if hasattr(self, "session") and self.session is not None: |
| 186 | self.session.update_runtime_state(current_task=value) |
| 187 | |
| 188 | @property |
| 189 | def active_permission_mode(self) -> str: |
| 190 | """Return the current runtime permission mode.""" |
| 191 | return self.permission_policy.active_mode.as_str() |
| 192 | |
| 193 | @property |
| 194 | def active_permission_rule_counts(self) -> dict[str, int]: |
| 195 | """Return rule counts for the active permission policy.""" |
| 196 | |
| 197 | return self.permission_policy.rule_counts() |
| 198 | |
| 199 | @property |
| 200 | def use_react(self) -> bool: |
| 201 | """Determine whether to use ReAct prompting or native tools.""" |
| 202 | return resolve_runtime_shell_use_react(self) |
| 203 | |
| 204 | def _get_system_message(self) -> Message: |
| 205 | """Get the system message with current context.""" |
| 206 | return get_runtime_shell_system_message(self) |
| 207 | |
| 208 | def set_workflow_mode(self, workflow_mode: str) -> None: |
| 209 | """Update the active workflow mode used by the system prompt.""" |
| 210 | set_runtime_shell_workflow_mode(self, workflow_mode) |
| 211 | |
| 212 | def refresh_capability_profile(self) -> None: |
| 213 | """Refresh the runtime capability profile from the current backend.""" |
| 214 | refresh_runtime_shell_capability_profile(self) |
| 215 | |
| 216 | def queue_steering_message(self, message: str) -> None: |
| 217 | """Queue one runtime steering message.""" |
| 218 | |
| 219 | self.steering.queue(message) |
| 220 | |
| 221 | def queue_ephemeral_steering_message(self, message: str) -> None: |
| 222 | """Queue one UI-only runtime steering message.""" |
| 223 | |
| 224 | self.steering.queue_ephemeral(message) |
| 225 | |
| 226 | def drain_steering_messages(self) -> list[SteeringDirective]: |
| 227 | """Drain queued runtime steering messages.""" |
| 228 | |
| 229 | return self.steering.drain() |
| 230 | |
| 231 | def _get_few_shot_examples(self) -> list[Message]: |
| 232 | """Get few-shot examples demonstrating proper tool use.""" |
| 233 | return get_runtime_shell_few_shot_examples(self) |
| 234 | |
| 235 | async def run( |
| 236 | self, |
| 237 | user_message: str, |
| 238 | on_event: Callable[[AgentEvent], None] | Callable[[AgentEvent], Awaitable[None]] | None = None, |
| 239 | on_confirmation: Callable[ |
| 240 | [str, str, str, dict[str, Any] | None], |
| 241 | Awaitable[bool], |
| 242 | ] | None = None, |
| 243 | on_user_question: Callable[[str, list[str] | None], Awaitable[str]] | None = None, |
| 244 | use_plan: bool | None = None, |
| 245 | ) -> str: |
| 246 | """Run the agent with a user message. |
| 247 | |
| 248 | Args: |
| 249 | user_message: The user's input |
| 250 | on_event: Optional callback for streaming events (sync or async) |
| 251 | on_confirmation: Optional callback for tool confirmation. Takes |
| 252 | (tool_name, message, details, preview) and returns True to confirm. |
| 253 | on_user_question: Optional callback for AskUserQuestion. Takes (question, options) and returns the answer. |
| 254 | use_plan: Force planning on/off. None = auto-detect. |
| 255 | |
| 256 | Returns: |
| 257 | The final response text |
| 258 | """ |
| 259 | return await run_runtime_shell( |
| 260 | self, |
| 261 | user_message, |
| 262 | on_event=on_event, |
| 263 | on_confirmation=on_confirmation, |
| 264 | on_user_question=on_user_question, |
| 265 | use_plan=use_plan, |
| 266 | ) |
| 267 | |
| 268 | async def run_streaming( |
| 269 | self, |
| 270 | user_message: str, |
| 271 | ) -> AsyncIterator[AgentEvent]: |
| 272 | """Run the agent with streaming output from the primary runtime path.""" |
| 273 | async for event in stream_runtime_shell(self, user_message): |
| 274 | yield event |
| 275 | |
| 276 | async def run_explore( |
| 277 | self, |
| 278 | user_message: str, |
| 279 | on_event: Callable[[AgentEvent], None] | Callable[[AgentEvent], Awaitable[None]] | None = None, |
| 280 | *, |
| 281 | fresh: bool = False, |
| 282 | ) -> str: |
| 283 | """Run one read-only explore query outside the main workflow runtime.""" |
| 284 | return await run_runtime_shell_explore( |
| 285 | self, |
| 286 | user_message, |
| 287 | on_event=on_event, |
| 288 | fresh=fresh, |
| 289 | ) |
| 290 | |
| 291 | def clear_history(self) -> None: |
| 292 | """Clear conversation history.""" |
| 293 | clear_runtime_shell_history(self) |