@@ -7,16 +7,14 @@ from dataclasses import dataclass |
| 7 | 7 | from pathlib import Path |
| 8 | 8 | |
| 9 | 9 | from ..context.project import ProjectContext, detect_project |
| 10 | | -from ..llm.base import LLMBackend, Message, Role, ToolCall |
| 10 | +from ..llm.base import LLMBackend, Message, Role |
| 11 | 11 | from ..runtime.bootstrap import build_runtime_context |
| 12 | 12 | from ..runtime.capabilities import resolve_backend_capability_profile |
| 13 | 13 | from ..runtime.context import RuntimeContext |
| 14 | 14 | from ..runtime.conversation import ConversationRuntime |
| 15 | 15 | from ..runtime.deliberation import ( |
| 16 | 16 | DECOMPOSITION_PROMPT, |
| 17 | | - SELF_CRITIQUE_PROMPT, |
| 18 | 17 | parse_decomposition, |
| 19 | | - parse_self_critique, |
| 20 | 18 | should_decompose, |
| 21 | 19 | ) |
| 22 | 20 | from ..runtime.dod import DefinitionOfDoneStore |
@@ -28,19 +26,11 @@ from ..runtime.permissions import ( |
| 28 | 26 | load_permission_rules, |
| 29 | 27 | ) |
| 30 | 28 | from ..runtime.prompt_history import PromptSnapshot |
| 31 | | -from ..runtime.reasoning_types import SelfCritique, TaskDecomposition |
| 29 | +from ..runtime.reasoning_types import TaskDecomposition |
| 32 | 30 | from ..runtime.session import ConversationSession |
| 33 | 31 | from ..runtime.task_classification import is_conversational |
| 34 | 32 | from ..runtime.workflow import WorkflowMode |
| 35 | 33 | from ..tools.base import ToolRegistry, create_default_registry |
| 36 | | -from .parsing import parse_tool_calls |
| 37 | | -from .planner import ( |
| 38 | | - PLANNING_PROMPT, |
| 39 | | - SHOULD_PLAN_PROMPT, |
| 40 | | - Plan, |
| 41 | | - parse_plan, |
| 42 | | - should_plan, |
| 43 | | -) |
| 44 | 34 | from .prompts import build_system_prompt_result |
| 45 | 35 | from .safeguards import RuntimeSafeguards |
| 46 | 36 | |
@@ -377,34 +367,6 @@ class Agent: |
| 377 | 367 | Message(role=Role.ASSISTANT, content="Done."), |
| 378 | 368 | ] |
| 379 | 369 | |
| 380 | | - async def _should_plan(self, task: str) -> bool: |
| 381 | | - """Ask LLM if this task needs planning.""" |
| 382 | | - if not self.config.auto_plan: |
| 383 | | - return False |
| 384 | | - |
| 385 | | - prompt = SHOULD_PLAN_PROMPT.format(task=task) |
| 386 | | - response = await self.backend.complete( |
| 387 | | - messages=[Message(role=Role.USER, content=prompt)], |
| 388 | | - tools=None, |
| 389 | | - temperature=0.3, |
| 390 | | - max_tokens=20, |
| 391 | | - ) |
| 392 | | - return should_plan(response.content) |
| 393 | | - |
| 394 | | - async def _create_plan(self, task: str) -> Plan: |
| 395 | | - """Generate a plan for the task.""" |
| 396 | | - prompt = PLANNING_PROMPT.format(task=task) |
| 397 | | - response = await self.backend.complete( |
| 398 | | - messages=[ |
| 399 | | - self._get_system_message(), |
| 400 | | - Message(role=Role.USER, content=prompt), |
| 401 | | - ], |
| 402 | | - tools=None, |
| 403 | | - temperature=0.5, |
| 404 | | - max_tokens=500, |
| 405 | | - ) |
| 406 | | - return parse_plan(response.content, goal=task) |
| 407 | | - |
| 408 | 370 | # === Reasoning Stage Methods === |
| 409 | 371 | |
| 410 | 372 | async def _decompose_task(self, task: str) -> TaskDecomposition: |
@@ -421,17 +383,6 @@ class Agent: |
| 421 | 383 | ) |
| 422 | 384 | return parse_decomposition(response.content, task) |
| 423 | 385 | |
| 424 | | - async def _self_critique(self, response: str, context: str) -> SelfCritique: |
| 425 | | - """Perform self-critique on a response.""" |
| 426 | | - prompt = SELF_CRITIQUE_PROMPT.format(response=response, context=context) |
| 427 | | - critique_response = await self.backend.complete( |
| 428 | | - messages=[Message(role=Role.USER, content=prompt)], |
| 429 | | - tools=None, |
| 430 | | - temperature=0.3, |
| 431 | | - max_tokens=500, |
| 432 | | - ) |
| 433 | | - return parse_self_critique(critique_response.content, response) |
| 434 | | - |
| 435 | 386 | async def _handle_conversational( |
| 436 | 387 | self, |
| 437 | 388 | user_message: str, |
@@ -711,340 +662,6 @@ class Agent: |
| 711 | 662 | self.last_turn_summary = await runtime.run_query(user_message, emit) |
| 712 | 663 | return self.last_turn_summary.final_response |
| 713 | 664 | |
| 714 | | - def _contains_unexecuted_code(self, content: str) -> bool: |
| 715 | | - """Detect if response contains code blocks that should be tool calls. |
| 716 | | - |
| 717 | | - Returns True if the response looks like chatbot-style advice with |
| 718 | | - code blocks, rather than an actual final answer. |
| 719 | | - """ |
| 720 | | - import re |
| 721 | | - |
| 722 | | - # Check for raw JSON tool call attempts (model outputting tool calls as text) |
| 723 | | - # This happens when small models try to call tools but output JSON instead |
| 724 | | - json_tool_patterns = [ |
| 725 | | - r'\{"name"\s*:\s*"(write|read|edit|bash|glob|grep)"', # Tool call JSON |
| 726 | | - r'"name"\s*:\s*"(write|read|edit|bash|glob|grep)".*"(?:parameters|arguments)"', |
| 727 | | - ] |
| 728 | | - for pattern in json_tool_patterns: |
| 729 | | - if re.search(pattern, content): |
| 730 | | - return True |
| 731 | | - |
| 732 | | - # Check for bracket format: [calls bash tool with: ...] or [USE write tool: ...] |
| 733 | | - bracket_patterns = [ |
| 734 | | - r'\[calls?\s+\w+\s+tool\s+with:', |
| 735 | | - r'\[USE\s+\w+\s+tool:', |
| 736 | | - ] |
| 737 | | - for pattern in bracket_patterns: |
| 738 | | - if re.search(pattern, content, re.IGNORECASE): |
| 739 | | - return True |
| 740 | | - |
| 741 | | - # Check for hallucinated/narrated tool uses - model DESCRIBES using tools |
| 742 | | - # but doesn't actually call them (past tense narration) |
| 743 | | - hallucination_patterns = [ |
| 744 | | - r'used\s+`?(?:bash|write|read|edit|glob|grep)`?\s+tool', # "Used bash tool..." |
| 745 | | - r'used\s+the\s+`?(?:bash|write|read|edit|glob|grep)`?\s+tool', # "Used the bash tool..." |
| 746 | | - r'using\s+the\s+`?(?:bash|write|read|edit|glob|grep)`?\s+tool', # "...using the write tool" |
| 747 | | - r'with\s+file_path\s*=\s*[`\'"]', # "with file_path=`..." (narrated parameter) |
| 748 | | - r'with\s+command\s*[`\'"]', # "with command `..." (narrated bash) |
| 749 | | - r'i\s+(ran|executed|created|wrote|read)\s+(the\s+)?(command|file)', # "I ran the command" |
| 750 | | - r'\*\s*used\s+`', # "* Used `bash`..." (bullet point narration) |
| 751 | | - r'here\s+is\s+what\s+i\s+did:', # "Here is what I did:" |
| 752 | | - ] |
| 753 | | - for pattern in hallucination_patterns: |
| 754 | | - if re.search(pattern, content, re.IGNORECASE): |
| 755 | | - return True |
| 756 | | - |
| 757 | | - # Look for markdown code blocks |
| 758 | | - code_blocks = re.findall(r'```(\w*)\n(.*?)```', content, re.DOTALL) |
| 759 | | - |
| 760 | | - if not code_blocks: |
| 761 | | - return False |
| 762 | | - |
| 763 | | - # Check if any code blocks look like commands or file contents |
| 764 | | - action_indicators = [ |
| 765 | | - 'bash', 'sh', 'shell', 'cmd', 'powershell', # Shell code |
| 766 | | - 'mkdir', 'cd ', 'npm ', 'pip ', 'git ', # Commands in code |
| 767 | | - 'python', 'html', 'css', 'javascript', 'js', 'ts', # File content |
| 768 | | - ] |
| 769 | | - |
| 770 | | - chatbot_phrases = [ |
| 771 | | - 'you can run', 'you can create', 'you can use', |
| 772 | | - 'run this', 'create this', 'save this', |
| 773 | | - 'here\'s how', 'here is', 'copy this', |
| 774 | | - 'execute this', 'paste this', |
| 775 | | - ] |
| 776 | | - |
| 777 | | - # Tutorial/instruction patterns |
| 778 | | - tutorial_patterns = [ |
| 779 | | - r'^\s*\d+\.\s+(open|create|navigate|run|execute|make)', # Numbered instructions |
| 780 | | - r'(first|second|third|next|then),?\s+(open|create|navigate)', # Sequenced steps |
| 781 | | - r'open your (terminal|command|shell)', # Tutorial starter |
| 782 | | - r'navigate to (the|your|~/)', # Navigation instruction |
| 783 | | - r'here\'s how you can (quickly|easily)?', # How-to preamble |
| 784 | | - r'you can (start by|begin by|follow these)', # Tutorial start |
| 785 | | - ] |
| 786 | | - |
| 787 | | - content_lower = content.lower() |
| 788 | | - |
| 789 | | - # Check for tutorial patterns |
| 790 | | - for pattern in tutorial_patterns: |
| 791 | | - if re.search(pattern, content_lower, re.MULTILINE | re.IGNORECASE): |
| 792 | | - return True |
| 793 | | - |
| 794 | | - # If chatbot phrases present with code blocks, it's describing not doing |
| 795 | | - for phrase in chatbot_phrases: |
| 796 | | - if phrase in content_lower: |
| 797 | | - return True |
| 798 | | - |
| 799 | | - # Check code block languages that suggest action needed |
| 800 | | - for lang, _ in code_blocks: |
| 801 | | - if lang.lower() in action_indicators: |
| 802 | | - return True |
| 803 | | - |
| 804 | | - return False |
| 805 | | - |
| 806 | | - def _extract_raw_json_tool_calls(self, content: str) -> list[ToolCall]: |
| 807 | | - """Try to extract tool calls from raw JSON or bracket format in content. |
| 808 | | - |
| 809 | | - Some small models output tool calls as raw JSON text or bracket format |
| 810 | | - instead of using the proper tool calling API. This method tries to |
| 811 | | - parse and recover them. |
| 812 | | - """ |
| 813 | | - import json |
| 814 | | - import os |
| 815 | | - import re |
| 816 | | - |
| 817 | | - allowed_tool_names = [tool.name for tool in self.registry.list_tools()] |
| 818 | | - parsed = parse_tool_calls( |
| 819 | | - content, |
| 820 | | - allowed_tool_names=allowed_tool_names, |
| 821 | | - ) |
| 822 | | - if parsed.tool_calls: |
| 823 | | - return parsed.tool_calls |
| 824 | | - |
| 825 | | - tool_calls = [] |
| 826 | | - tool_names = [name.casefold() for name in allowed_tool_names] |
| 827 | | - |
| 828 | | - # Debug log |
| 829 | | - def debug(msg): |
| 830 | | - try: |
| 831 | | - with open("/tmp/loader_debug.log", "a") as f: |
| 832 | | - f.write(f"[extract] {msg}\n") |
| 833 | | - except Exception: |
| 834 | | - pass |
| 835 | | - |
| 836 | | - debug(f"checking content len={len(content)}") |
| 837 | | - |
| 838 | | - # First, try to extract bracket format: [calls bash tool with: ...] |
| 839 | | - # or [USE bash tool: ...] or similar variations |
| 840 | | - # Note: Using (.+?) with re.DOTALL to capture content that may span patterns |
| 841 | | - # The ] at end acts as anchor, but we need to handle ] inside content |
| 842 | | - # Also handle formats without colon: [calls bash tool with command="..."] |
| 843 | | - bracket_patterns = [ |
| 844 | | - # With colon after "with" |
| 845 | | - r'\[calls?\s+(\w+)\s+tool\s+with:\s*(.+?)\](?=\s*(?:\n|$|[A-Z]|Done|Created|Error))', |
| 846 | | - r'\[USE\s+(\w+)\s+tool:\s*(.+?)\](?=\s*(?:\n|$|[A-Z]|Done|Created|Error))', |
| 847 | | - r'\[calls?\s+(\w+)\s+tool\s+with:\s*([^\]]+)\]', |
| 848 | | - r'\[USE\s+(\w+)\s+tool:\s*([^\]]+)\]', |
| 849 | | - # Without colon - direct key=value format: [calls bash tool with command="..."] |
| 850 | | - r'\[calls?\s+(\w+)\s+tool\s+with\s+(\w+\s*=.+?)\](?=\s*(?:\n|$|[A-Z]|Done|Created|Error|Directly))', |
| 851 | | - r'\[calls?\s+(\w+)\s+tool\s+with\s+([^\]]+)\]', |
| 852 | | - # Inline format: [calls write tool with file_path="..." and inline content "..."] |
| 853 | | - r'\[calls?\s+(\w+)\s+tool\s+with\s+(.+?)\](?=\s*(?:\n|$|Directly|Done))', |
| 854 | | - ] |
| 855 | | - |
| 856 | | - for pattern in bracket_patterns: |
| 857 | | - debug(f"trying pattern: {pattern}") |
| 858 | | - for match in re.finditer(pattern, content, re.IGNORECASE): |
| 859 | | - tool_name = match.group(1).lower() |
| 860 | | - args_str = match.group(2).strip() |
| 861 | | - debug(f" matched: tool={tool_name}, args={args_str[:50]}...") |
| 862 | | - |
| 863 | | - if tool_name.casefold() not in tool_names: |
| 864 | | - debug(f" skipping - tool_name '{tool_name}' not in tool_names") |
| 865 | | - continue |
| 866 | | - |
| 867 | | - # Skip if we already have a tool call at this position (avoid duplicates) |
| 868 | | - match_start = match.start() |
| 869 | | - if any(tc.id.endswith(f"_pos{match_start}") for tc in tool_calls): |
| 870 | | - debug(f" skipping - already extracted at position {match_start}") |
| 871 | | - continue |
| 872 | | - |
| 873 | | - try: |
| 874 | | - # Parse the arguments based on tool type |
| 875 | | - if tool_name == "bash": |
| 876 | | - # bash tool: extract command, handling various formats |
| 877 | | - # Model might output: "mkdir -p /foo" or "command='mkdir -p /foo'" |
| 878 | | - cmd = args_str |
| 879 | | - # If it has command= prefix, extract just the command value |
| 880 | | - cmd_match = re.search(r'command\s*[=:]\s*["\']?([^"\']+)["\']?', args_str) |
| 881 | | - if cmd_match: |
| 882 | | - cmd = cmd_match.group(1).strip() |
| 883 | | - # Also handle case where model outputs "cmd, command='cmd'" - take first part |
| 884 | | - elif ',' in args_str and 'command=' in args_str: |
| 885 | | - cmd = args_str.split(',')[0].strip() |
| 886 | | - # Expand ~ in command |
| 887 | | - cmd = os.path.expanduser(cmd) |
| 888 | | - tool_calls.append(ToolCall( |
| 889 | | - id=f"bracket_{tool_name}_{len(tool_calls)}_pos{match_start}", |
| 890 | | - name=tool_name, |
| 891 | | - arguments={"command": cmd}, |
| 892 | | - )) |
| 893 | | - elif tool_name == "write": |
| 894 | | - # write tool: file_path=..., content="..." |
| 895 | | - # Handle quoted file paths |
| 896 | | - file_path_match = re.search(r'file_path[=:]\s*["\']?([^"\'`,\s]+)["\']?', args_str) |
| 897 | | - |
| 898 | | - # For content, find the content= part and extract everything after it |
| 899 | | - # Handle both quoted and unquoted content |
| 900 | | - # Also handle "inline content" format: and inline content "..." |
| 901 | | - content_start = re.search(r'(?:inline\s+)?content[=:]\s*', args_str, re.IGNORECASE) |
| 902 | | - if not content_start: |
| 903 | | - # Also try: and inline content "..." |
| 904 | | - content_start = re.search(r'and\s+inline\s+content\s+', args_str, re.IGNORECASE) |
| 905 | | - |
| 906 | | - file_content = "" |
| 907 | | - if content_start: |
| 908 | | - rest = args_str[content_start.end():] |
| 909 | | - # Check if content starts with a quote |
| 910 | | - if rest.startswith('"'): |
| 911 | | - # Find matching end quote (handle escaped quotes) |
| 912 | | - end_idx = len(rest) - 1 |
| 913 | | - # Walk backward to find the last quote |
| 914 | | - while end_idx > 0 and rest[end_idx] != '"': |
| 915 | | - end_idx -= 1 |
| 916 | | - if end_idx > 0: |
| 917 | | - file_content = rest[1:end_idx] |
| 918 | | - elif rest.startswith("'"): |
| 919 | | - end_idx = len(rest) - 1 |
| 920 | | - while end_idx > 0 and rest[end_idx] != "'": |
| 921 | | - end_idx -= 1 |
| 922 | | - if end_idx > 0: |
| 923 | | - file_content = rest[1:end_idx] |
| 924 | | - else: |
| 925 | | - # No quotes - take everything |
| 926 | | - file_content = rest.strip() |
| 927 | | - |
| 928 | | - debug(f" write: file_path={file_path_match.group(1) if file_path_match else None}, content_len={len(file_content)}") |
| 929 | | - |
| 930 | | - if file_path_match: |
| 931 | | - file_path = file_path_match.group(1).strip('"\'') |
| 932 | | - file_path = os.path.expanduser(file_path) # Expand ~ |
| 933 | | - tool_calls.append(ToolCall( |
| 934 | | - id=f"bracket_{tool_name}_{len(tool_calls)}_pos{match_start}", |
| 935 | | - name=tool_name, |
| 936 | | - arguments={"file_path": file_path, "content": file_content}, |
| 937 | | - )) |
| 938 | | - elif tool_name == "read": |
| 939 | | - # read tool: file_path |
| 940 | | - file_path = args_str.split(',')[0].split('=')[-1].strip().strip('"\'') |
| 941 | | - file_path = os.path.expanduser(file_path) |
| 942 | | - tool_calls.append(ToolCall( |
| 943 | | - id=f"bracket_{tool_name}_{len(tool_calls)}_pos{match_start}", |
| 944 | | - name=tool_name, |
| 945 | | - arguments={"file_path": file_path}, |
| 946 | | - )) |
| 947 | | - elif tool_name == "edit": |
| 948 | | - # edit tool: file_path=..., old_string="...", new_string="..." |
| 949 | | - file_path_match = re.search(r'file_path[=:]\s*["\']?([^"\'`,]+)["\']?', args_str) |
| 950 | | - old_match = re.search(r'old_string[=:]\s*["\'](.+?)["\']', args_str) |
| 951 | | - new_match = re.search(r'new_string[=:]\s*["\'](.+?)["\']', args_str) |
| 952 | | - |
| 953 | | - if file_path_match and old_match and new_match: |
| 954 | | - file_path = os.path.expanduser(file_path_match.group(1).strip('"\'')) |
| 955 | | - tool_calls.append(ToolCall( |
| 956 | | - id=f"bracket_{tool_name}_{len(tool_calls)}_pos{match_start}", |
| 957 | | - name=tool_name, |
| 958 | | - arguments={ |
| 959 | | - "file_path": file_path, |
| 960 | | - "old_string": old_match.group(1), |
| 961 | | - "new_string": new_match.group(1), |
| 962 | | - }, |
| 963 | | - )) |
| 964 | | - elif tool_name in ("glob", "grep"): |
| 965 | | - # glob/grep: pattern - expand ~ if it looks like a path |
| 966 | | - pattern = args_str |
| 967 | | - if '~' in pattern: |
| 968 | | - pattern = os.path.expanduser(pattern) |
| 969 | | - tool_calls.append(ToolCall( |
| 970 | | - id=f"bracket_{tool_name}_{len(tool_calls)}_pos{match_start}", |
| 971 | | - name=tool_name, |
| 972 | | - arguments={"pattern": pattern}, |
| 973 | | - )) |
| 974 | | - except Exception: |
| 975 | | - continue |
| 976 | | - |
| 977 | | - # If we found bracket-format calls, return them |
| 978 | | - if tool_calls: |
| 979 | | - return tool_calls |
| 980 | | - |
| 981 | | - # Otherwise, try to find JSON objects starting with {"name": "tool_name" |
| 982 | | - # This is tricky because the content field may contain arbitrary text |
| 983 | | - |
| 984 | | - for tool_name in tool_names: |
| 985 | | - # Look for the start of a tool call JSON |
| 986 | | - pattern = rf'\{{\s*"name"\s*:\s*"{tool_name}"\s*,\s*"(?:parameters|arguments)"\s*:\s*\{{' |
| 987 | | - for match in re.finditer(pattern, content): |
| 988 | | - start = match.start() |
| 989 | | - |
| 990 | | - # Try to find the matching closing braces by parsing |
| 991 | | - # Start from the beginning of the JSON object |
| 992 | | - try: |
| 993 | | - # Find the complete JSON by tracking brace depth |
| 994 | | - brace_count = 0 |
| 995 | | - in_string = False |
| 996 | | - escape_next = False |
| 997 | | - end = start |
| 998 | | - |
| 999 | | - for i, char in enumerate(content[start:], start): |
| 1000 | | - if escape_next: |
| 1001 | | - escape_next = False |
| 1002 | | - continue |
| 1003 | | - |
| 1004 | | - if char == '\\' and in_string: |
| 1005 | | - escape_next = True |
| 1006 | | - continue |
| 1007 | | - |
| 1008 | | - if char == '"' and not escape_next: |
| 1009 | | - in_string = not in_string |
| 1010 | | - continue |
| 1011 | | - |
| 1012 | | - if not in_string: |
| 1013 | | - if char == '{': |
| 1014 | | - brace_count += 1 |
| 1015 | | - elif char == '}': |
| 1016 | | - brace_count -= 1 |
| 1017 | | - if brace_count == 0: |
| 1018 | | - end = i + 1 |
| 1019 | | - break |
| 1020 | | - |
| 1021 | | - if brace_count == 0 and end > start: |
| 1022 | | - json_str = content[start:end] |
| 1023 | | - try: |
| 1024 | | - # Try to parse as-is first |
| 1025 | | - data = json.loads(json_str) |
| 1026 | | - except json.JSONDecodeError: |
| 1027 | | - # Model may have output literal newlines in strings |
| 1028 | | - # Escape them so JSON parser accepts it |
| 1029 | | - try: |
| 1030 | | - fixed = json_str.replace('\n', '\\n').replace('\r', '\\r').replace('\t', '\\t') |
| 1031 | | - data = json.loads(fixed) |
| 1032 | | - except json.JSONDecodeError: |
| 1033 | | - continue |
| 1034 | | - |
| 1035 | | - if "name" in data and ("parameters" in data or "arguments" in data): |
| 1036 | | - args = data.get("arguments") or data.get("parameters", {}) |
| 1037 | | - tool_calls.append(ToolCall( |
| 1038 | | - id=f"raw_{data['name']}_{len(tool_calls)}", |
| 1039 | | - name=data["name"], |
| 1040 | | - arguments=args, |
| 1041 | | - )) |
| 1042 | | - |
| 1043 | | - except Exception: |
| 1044 | | - continue |
| 1045 | | - |
| 1046 | | - return tool_calls |
| 1047 | | - |
| 1048 | 665 | def clear_history(self) -> None: |
| 1049 | 666 | """Clear conversation history.""" |
| 1050 | 667 | self.messages = [] |