@@ -588,6 +588,9 @@ class Agent: |
| 588 | 588 | continuation_count = 0 # How many times we've nudged to continue |
| 589 | 589 | empty_retry_count = 0 # How many times we've retried on empty response |
| 590 | 590 | MAX_EMPTY_RETRIES = 5 # More retries before giving up - small models need patience |
| 591 | + extracted_iterations = 0 # How many times we've extracted bracket-format tool calls |
| 592 | + MAX_EXTRACTED_ITERATIONS = 3 # Limit extracted tool call loops |
| 593 | + consecutive_errors = 0 # Track consecutive tool errors |
| 591 | 594 | |
| 592 | 595 | # Adaptive token budgeting based on task complexity |
| 593 | 596 | complexity = estimate_complexity(task) |
@@ -1014,11 +1017,28 @@ class Agent: |
| 1014 | 1017 | |
| 1015 | 1018 | # If we now have tool calls (from raw JSON extraction), execute them |
| 1016 | 1019 | if tool_calls: |
| 1020 | + extracted_iterations += 1 |
| 1021 | + |
| 1022 | + # Check if we've exceeded extraction limits |
| 1023 | + if extracted_iterations > MAX_EXTRACTED_ITERATIONS: |
| 1024 | + # Model keeps outputting bracket-format calls - stop and report |
| 1025 | + final_response = content |
| 1026 | + self.messages.append(Message(role=Role.ASSISTANT, content=response_content)) |
| 1027 | + await emit(AgentEvent( |
| 1028 | + type="response", |
| 1029 | + content=final_response + "\n\n(Stopping here - task appears complete.)" |
| 1030 | + )) |
| 1031 | + break |
| 1032 | + |
| 1017 | 1033 | try: |
| 1018 | 1034 | with open("/tmp/loader_debug.log", "a") as f: |
| 1019 | | - f.write(f"[loop] executing {len(tool_calls)} extracted tool calls\n") |
| 1035 | + f.write(f"[loop] executing {len(tool_calls)} extracted tool calls (iteration {extracted_iterations})\n") |
| 1020 | 1036 | except Exception: |
| 1021 | 1037 | pass |
| 1038 | + |
| 1039 | + # Track errors in this batch |
| 1040 | + batch_errors = 0 |
| 1041 | + |
| 1022 | 1042 | # This duplicates the tool execution logic above, but that's intentional |
| 1023 | 1043 | # to handle the case where raw JSON tool calls are extracted |
| 1024 | 1044 | for i, tc in enumerate(tool_calls): |
@@ -1079,6 +1099,13 @@ class Agent: |
| 1079 | 1099 | result_text = f"Error: {e}" |
| 1080 | 1100 | is_error = True |
| 1081 | 1101 | |
| 1102 | + # Track errors |
| 1103 | + if is_error: |
| 1104 | + batch_errors += 1 |
| 1105 | + consecutive_errors += 1 |
| 1106 | + else: |
| 1107 | + consecutive_errors = 0 # Reset on success |
| 1108 | + |
| 1082 | 1109 | await emit(AgentEvent( |
| 1083 | 1110 | type="tool_result", |
| 1084 | 1111 | content=result_text, |
@@ -1095,6 +1122,14 @@ class Agent: |
| 1095 | 1122 | content=result_text, |
| 1096 | 1123 | )) |
| 1097 | 1124 | |
| 1125 | + # After executing batch, check if we should stop |
| 1126 | + # Stop if: all tools in batch failed, or we have many consecutive errors |
| 1127 | + if batch_errors == len(tool_calls) or consecutive_errors >= 3: |
| 1128 | + # All failed or too many consecutive errors - stop trying |
| 1129 | + final_response = "I've completed what I can. Some operations encountered errors." |
| 1130 | + await emit(AgentEvent(type="response", content=final_response)) |
| 1131 | + break |
| 1132 | + |
| 1098 | 1133 | continue |
| 1099 | 1134 | |
| 1100 | 1135 | # No tool calls - check if model is describing instead of acting |