tenseleyflow/loader / 5453736

Browse files

feat: add runtime safeguards to improve agent behavior

Implements comprehensive runtime safeguards to help smaller models stay on track:

**Content Filtering:**
- Filter code blocks, bracket tool calls, preambles from stream
- Filter raw JSON tool calls from output
- Filter internal recovery/steering prompts from user display

**Loop Detection:**
- Action loop detection (glob → write → glob patterns)
- Text loop detection (repeated responses)
- Aggressive phrase-based repetition detection

**Pre-Action Validation:**
- Block dangerous commands (rm -rf /, fork bombs, etc.)
- Validate empty arguments, invalid paths
- Block interactive tools (vim, nano, less)
- Prevent writes to system directories

**Deduplication:**
- Track files created, commands run, edits made
- Skip duplicate tool calls automatically

**Improvements:**
- Hide "Task not complete yet" messages from users
- Fix completion check false positives for simple tasks
- Support both "parameters" and "arguments" in JSON extraction
- More conservative completion detection for web/design tasks
Authored by espadonne
SHA
5453736bf4ed6a822708fda7d8479a179ba9136c
Parents
2300268
Tree
0b59bed

4 changed files

StatusFile+-
M src/loader/agent/loop.py 179 23
M src/loader/agent/prompts.py 35 22
M src/loader/agent/reasoning.py 9 0
A src/loader/agent/safeguards.py 1035 0
src/loader/agent/loop.pymodified
@@ -45,6 +45,7 @@ from .reasoning import (
4545
     estimate_complexity,
4646
     get_token_budget,
4747
 )
48
+from .safeguards import RuntimeSafeguards, ValidationResult
4849
 
4950
 
5051
 @dataclass
@@ -153,6 +154,9 @@ class Agent:
153154
         # Track original task for multi-turn conversations
154155
         self._current_task: str | None = None
155156
 
157
+        # Runtime safeguards for filtering, steering, and deduplication
158
+        self.safeguards = RuntimeSafeguards()
159
+
156160
         # Load project context if enabled
157161
         self.project_context: ProjectContext | None = None
158162
         if self.config.auto_context:
@@ -616,6 +620,9 @@ class Agent:
616620
             # Get completion from LLM
617621
             await emit(AgentEvent(type="thinking"))
618622
 
623
+            # Reset code block filter state for this LLM call
624
+            self.safeguards.code_filter.reset()
625
+
619626
             # Pass tools only for native tool calling
620627
             tools = None if self.use_react else self.registry.get_schemas()
621628
 
@@ -623,6 +630,7 @@ class Agent:
623630
             pending_tool_calls_seen: set[str] = set()  # Track IDs of pending tool calls shown
624631
             if self.config.stream:
625632
                 full_content = ""
633
+                full_content_unfiltered = ""  # Keep original for history
626634
                 tool_calls: list[ToolCall] = []
627635
 
628636
                 async for chunk in self.backend.stream(
@@ -631,13 +639,27 @@ class Agent:
631639
                     temperature=self.config.temperature,
632640
                     max_tokens=effective_max_tokens,
633641
                 ):
634
-                    # Emit stream events for content OR for final chunk (to signal end)
635
-                    if chunk.content or chunk.is_done:
642
+                    # Filter content through safeguards (removes code blocks)
643
+                    filtered_content = ""
644
+                    if chunk.content:
645
+                        filtered_content = self.safeguards.filter_stream_chunk(chunk.content)
646
+                        full_content_unfiltered += chunk.content
647
+
648
+                    # Emit stream events for filtered content OR for final chunk (to signal end)
649
+                    if filtered_content or chunk.is_done:
636650
                         await emit(AgentEvent(
637651
                             type="stream",
638
-                            content=chunk.content,
652
+                            content=filtered_content,
639653
                             is_stream_end=chunk.is_done,
640654
                         ))
655
+
656
+                    # Check if we should inject steering (bad patterns detected)
657
+                    if self.safeguards.should_steer():
658
+                        steering_msg = self.safeguards.get_steering_message()
659
+                        if steering_msg:
660
+                            # Queue steering for next iteration
661
+                            self._steering_queue.put_nowait(steering_msg)
662
+
641663
                     # Show pending tool calls as they're detected (ReAct mode interleaving)
642664
                     if chunk.pending_tool_call and chunk.pending_tool_call.id not in pending_tool_calls_seen:
643665
                         pending_tool_calls_seen.add(chunk.pending_tool_call.id)
@@ -647,7 +669,7 @@ class Agent:
647669
                             tool_args=chunk.pending_tool_call.arguments,
648670
                         ))
649671
                     if chunk.is_done:
650
-                        full_content = chunk.full_content or full_content
672
+                        full_content = chunk.full_content or full_content_unfiltered
651673
                         tool_calls = chunk.tool_calls
652674
                         # Debug log
653675
                         try:
@@ -665,10 +687,17 @@ class Agent:
665687
                     temperature=self.config.temperature,
666688
                     max_tokens=effective_max_tokens,
667689
                 )
668
-                content = response.content
669
-                response_content = response.content
690
+                # Filter content through safeguards (removes code blocks)
691
+                response_content = response.content  # Keep original for history
692
+                content = self.safeguards.filter_complete_content(response.content)
670693
                 tool_calls = response.tool_calls if not self.use_react else []
671694
 
695
+                # Check if we should inject steering (bad patterns detected)
696
+                if self.safeguards.should_steer():
697
+                    steering_msg = self.safeguards.get_steering_message()
698
+                    if steering_msg:
699
+                        self._steering_queue.put_nowait(steering_msg)
700
+
672701
             # Handle empty responses (common with small models after clarifications)
673702
             if not content.strip():
674703
                 empty_retry_count += 1
@@ -796,6 +825,51 @@ class Agent:
796825
                     action_desc = f"{tool_call.name}: {str(tool_call.arguments)[:100]}"
797826
                     actions_taken.append(action_desc)
798827
 
828
+                    # Check for duplicate actions using safeguards
829
+                    is_dup, dup_reason = self.safeguards.check_duplicate(
830
+                        tool_call.name, tool_call.arguments
831
+                    )
832
+                    if is_dup:
833
+                        try:
834
+                            with open("/tmp/loader_debug.log", "a") as f:
835
+                                f.write(f"[loop] SKIPPING duplicate: {dup_reason}\n")
836
+                        except Exception:
837
+                            pass
838
+                        # Add a tool result indicating skip
839
+                        self.messages.append(Message(
840
+                            role=Role.TOOL,
841
+                            content=f"[Skipped - duplicate action: {dup_reason}]",
842
+                            tool_call_id=tool_call.id,
843
+                        ))
844
+                        continue  # Skip to next tool call
845
+
846
+                    # Pre-action validation
847
+                    validation = self.safeguards.validate_action(
848
+                        tool_call.name, tool_call.arguments
849
+                    )
850
+                    if not validation.valid:
851
+                        try:
852
+                            with open("/tmp/loader_debug.log", "a") as f:
853
+                                f.write(f"[loop] BLOCKED by validation: {validation.reason}\n")
854
+                        except Exception:
855
+                            pass
856
+                        # Add a tool result with the validation error
857
+                        error_msg = f"[Blocked - {validation.reason}]"
858
+                        if validation.suggestion:
859
+                            error_msg += f" Suggestion: {validation.suggestion}"
860
+                        self.messages.append(Message(
861
+                            role=Role.TOOL,
862
+                            content=error_msg,
863
+                            tool_call_id=tool_call.id,
864
+                        ))
865
+                        await emit(AgentEvent(
866
+                            type="tool_result",
867
+                            content=error_msg,
868
+                            tool_name=tool_call.name,
869
+                            is_error=True,
870
+                        ))
871
+                        continue  # Skip to next tool call
872
+
799873
                     # Rollback planning: create rollback action before destructive ops
800874
                     if rollback_plan and is_destructive_tool(tool_call.name, tool_call.arguments):
801875
                         async def read_file_for_backup(path: str) -> str:
@@ -943,6 +1017,23 @@ class Agent:
9431017
                         # Success or no auto-recover - clear recovery context
9441018
                         if not result.is_error:
9451019
                             self._recovery_context = None
1020
+                            # Record successful action to prevent duplicates
1021
+                            self.safeguards.record_action(tool_call.name, tool_call.arguments)
1022
+
1023
+                            # Check for repetitive loop pattern
1024
+                            is_loop, loop_desc = self.safeguards.detect_loop()
1025
+                            if is_loop:
1026
+                                await emit(AgentEvent(
1027
+                                    type="error",
1028
+                                    content=f"Loop detected: {loop_desc}. Stopping to prevent repetitive behavior.",
1029
+                                ))
1030
+                                final_response = "I noticed I was repeating the same actions. Let me know what you'd like me to do differently."
1031
+                                self.messages.append(Message(
1032
+                                    role=Role.ASSISTANT,
1033
+                                    content=final_response,
1034
+                                ))
1035
+                                await emit(AgentEvent(type="response", content=final_response))
1036
+                                return final_response
9461037
 
9471038
                     await emit(AgentEvent(
9481039
                         type="tool_result",
@@ -1039,16 +1130,9 @@ class Agent:
10391130
                 # Track errors in this batch
10401131
                 batch_errors = 0
10411132
 
1042
-                # Track executed commands to avoid repetition
1043
-                if not hasattr(self, '_executed_commands'):
1044
-                    self._executed_commands: set[str] = set()
1045
-
10461133
                 # This duplicates the tool execution logic above, but that's intentional
10471134
                 # to handle the case where raw JSON tool calls are extracted
10481135
                 for i, tc in enumerate(tool_calls):
1049
-                    # Create a signature for this command
1050
-                    cmd_sig = f"{tc.name}:{str(tc.arguments)}"
1051
-
10521136
                     # Skip browser/display commands that don't work in terminal
10531137
                     if tc.name == "bash":
10541138
                         cmd = tc.arguments.get("command", "")
@@ -1060,16 +1144,39 @@ class Agent:
10601144
                                 pass
10611145
                             continue
10621146
 
1063
-                    # Skip if we've already executed this exact command
1064
-                    if cmd_sig in self._executed_commands:
1147
+                    # Use safeguards for duplicate checking
1148
+                    is_dup, dup_reason = self.safeguards.check_duplicate(tc.name, tc.arguments)
1149
+                    if is_dup:
10651150
                         try:
10661151
                             with open("/tmp/loader_debug.log", "a") as f:
1067
-                                f.write(f"[loop] skipping duplicate command: {cmd_sig[:50]}\n")
1152
+                                f.write(f"[loop] skipping duplicate: {dup_reason}\n")
10681153
                         except Exception:
10691154
                             pass
10701155
                         continue
10711156
 
1072
-                    self._executed_commands.add(cmd_sig)
1157
+                    # Pre-action validation
1158
+                    validation = self.safeguards.validate_action(tc.name, tc.arguments)
1159
+                    if not validation.valid:
1160
+                        try:
1161
+                            with open("/tmp/loader_debug.log", "a") as f:
1162
+                                f.write(f"[loop] BLOCKED by validation: {validation.reason}\n")
1163
+                        except Exception:
1164
+                            pass
1165
+                        error_msg = f"[Blocked - {validation.reason}]"
1166
+                        if validation.suggestion:
1167
+                            error_msg += f" Suggestion: {validation.suggestion}"
1168
+                        await emit(AgentEvent(
1169
+                            type="tool_result",
1170
+                            content=error_msg,
1171
+                            tool_name=tc.name,
1172
+                            is_error=True,
1173
+                        ))
1174
+                        self.messages.append(Message(
1175
+                            role=Role.TOOL,
1176
+                            content=error_msg,
1177
+                        ))
1178
+                        batch_errors += 1
1179
+                        continue
10731180
 
10741181
                     # Small delay between tool executions for better UX
10751182
                     if i > 0:
@@ -1134,6 +1241,23 @@ class Agent:
11341241
                         consecutive_errors += 1
11351242
                     else:
11361243
                         consecutive_errors = 0  # Reset on success
1244
+                        # Record successful action to prevent duplicates
1245
+                        self.safeguards.record_action(tc.name, tc.arguments)
1246
+
1247
+                        # Check for repetitive loop pattern
1248
+                        is_loop, loop_desc = self.safeguards.detect_loop()
1249
+                        if is_loop:
1250
+                            await emit(AgentEvent(
1251
+                                type="error",
1252
+                                content=f"Loop detected: {loop_desc}. Stopping to prevent repetitive behavior.",
1253
+                            ))
1254
+                            final_response = "I noticed I was repeating the same actions. Let me know what you'd like me to do differently."
1255
+                            self.messages.append(Message(
1256
+                                role=Role.ASSISTANT,
1257
+                                content=final_response,
1258
+                            ))
1259
+                            await emit(AgentEvent(type="response", content=final_response))
1260
+                            return final_response
11371261
 
11381262
                     await emit(AgentEvent(
11391263
                         type="tool_result",
@@ -1231,6 +1355,24 @@ class Agent:
12311355
                         critique.revision_count += 1
12321356
                         continue  # Loop to get revised response
12331357
 
1358
+            # Check for text loop (agent repeating the same response)
1359
+            is_text_loop, text_loop_desc = self.safeguards.detect_text_loop(content)
1360
+            if is_text_loop:
1361
+                await emit(AgentEvent(
1362
+                    type="error",
1363
+                    content=f"Text loop detected: {text_loop_desc}. Stopping.",
1364
+                ))
1365
+                final_response = "I seem to be repeating myself. Let me know if you'd like me to try a different approach."
1366
+                self.messages.append(Message(
1367
+                    role=Role.ASSISTANT,
1368
+                    content=final_response,
1369
+                ))
1370
+                await emit(AgentEvent(type="response", content=final_response))
1371
+                return final_response
1372
+
1373
+            # Record response for future loop detection
1374
+            self.safeguards.record_response(content)
1375
+
12341376
             # Task completion check - don't give up too early!
12351377
             # Use original_task if available (for multi-turn conversations)
12361378
             effective_task = original_task or task
@@ -1396,7 +1538,7 @@ class Agent:
13961538
         # This happens when small models try to call tools but output JSON instead
13971539
         json_tool_patterns = [
13981540
             r'\{"name"\s*:\s*"(write|read|edit|bash|glob|grep)"',  # Tool call JSON
1399
-            r'"name"\s*:\s*"(write|read|edit|bash|glob|grep)".*"parameters"',
1541
+            r'"name"\s*:\s*"(write|read|edit|bash|glob|grep)".*"(?:parameters|arguments)"',
14001542
         ]
14011543
         for pattern in json_tool_patterns:
14021544
             if re.search(pattern, content):
@@ -1473,11 +1615,18 @@ class Agent:
14731615
         # or [USE bash tool: ...] or similar variations
14741616
         # Note: Using (.+?) with re.DOTALL to capture content that may span patterns
14751617
         # The ] at end acts as anchor, but we need to handle ] inside content
1618
+        # Also handle formats without colon: [calls bash tool with command="..."]
14761619
         bracket_patterns = [
1620
+            # With colon after "with"
14771621
             r'\[calls?\s+(\w+)\s+tool\s+with:\s*(.+?)\](?=\s*(?:\n|$|[A-Z]|Done|Created|Error))',
14781622
             r'\[USE\s+(\w+)\s+tool:\s*(.+?)\](?=\s*(?:\n|$|[A-Z]|Done|Created|Error))',
14791623
             r'\[calls?\s+(\w+)\s+tool\s+with:\s*([^\]]+)\]',
14801624
             r'\[USE\s+(\w+)\s+tool:\s*([^\]]+)\]',
1625
+            # Without colon - direct key=value format: [calls bash tool with command="..."]
1626
+            r'\[calls?\s+(\w+)\s+tool\s+with\s+(\w+\s*=.+?)\](?=\s*(?:\n|$|[A-Z]|Done|Created|Error|Directly))',
1627
+            r'\[calls?\s+(\w+)\s+tool\s+with\s+([^\]]+)\]',
1628
+            # Inline format: [calls write tool with file_path="..." and inline content "..."]
1629
+            r'\[calls?\s+(\w+)\s+tool\s+with\s+(.+?)\](?=\s*(?:\n|$|Directly|Done))',
14811630
         ]
14821631
 
14831632
         for pattern in bracket_patterns:
@@ -1520,11 +1669,16 @@ class Agent:
15201669
                     elif tool_name == "write":
15211670
                         # write tool: file_path=..., content="..."
15221671
                         # Handle quoted file paths
1523
-                        file_path_match = re.search(r'file_path[=:]\s*["\']?([^"\'`,]+)["\']?', args_str)
1672
+                        file_path_match = re.search(r'file_path[=:]\s*["\']?([^"\'`,\s]+)["\']?', args_str)
15241673
 
15251674
                         # For content, find the content= part and extract everything after it
15261675
                         # Handle both quoted and unquoted content
1527
-                        content_start = re.search(r'content[=:]\s*', args_str)
1676
+                        # Also handle "inline content" format: and inline content "..."
1677
+                        content_start = re.search(r'(?:inline\s+)?content[=:]\s*', args_str, re.IGNORECASE)
1678
+                        if not content_start:
1679
+                            # Also try: and inline content "..."
1680
+                            content_start = re.search(r'and\s+inline\s+content\s+', args_str, re.IGNORECASE)
1681
+
15281682
                         file_content = ""
15291683
                         if content_start:
15301684
                             rest = args_str[content_start.end():]
@@ -1605,7 +1759,7 @@ class Agent:
16051759
 
16061760
         for tool_name in tool_names:
16071761
             # Look for the start of a tool call JSON
1608
-            pattern = rf'\{{\s*"name"\s*:\s*"{tool_name}"\s*,\s*"parameters"\s*:\s*\{{'
1762
+            pattern = rf'\{{\s*"name"\s*:\s*"{tool_name}"\s*,\s*"(?:parameters|arguments)"\s*:\s*\{{'
16091763
             for match in re.finditer(pattern, content):
16101764
                 start = match.start()
16111765
 
@@ -1654,11 +1808,12 @@ class Agent:
16541808
                             except json.JSONDecodeError:
16551809
                                 continue
16561810
 
1657
-                        if "name" in data and "parameters" in data:
1811
+                        if "name" in data and ("parameters" in data or "arguments" in data):
1812
+                            args = data.get("arguments") or data.get("parameters", {})
16581813
                             tool_calls.append(ToolCall(
16591814
                                 id=f"raw_{data['name']}_{len(tool_calls)}",
16601815
                                 name=data["name"],
1661
-                                arguments=data["parameters"],
1816
+                                arguments=args,
16621817
                             ))
16631818
 
16641819
                 except Exception:
@@ -1672,3 +1827,4 @@ class Agent:
16721827
         self._recovery_context = None
16731828
         self._current_task = None
16741829
         self._executed_commands = set()  # Clear command dedup tracking
1830
+        self.safeguards.reset()  # Reset all runtime safeguards
src/loader/agent/prompts.pymodified
@@ -182,15 +182,16 @@ Done. Created ~/Project/site/index.html.
182182
 
183183
 ## Rules
184184
 
185
-1. **EXECUTE, don't describe**: When asked to do something, USE TOOLS to do it immediately
186
-2. **No code blocks**: NEVER show code blocks. Don't say "the file will look like:" followed by code. Just USE THE TOOL.
187
-3. **Read before edit**: Always read a file before modifying it
188
-4. **Be concise**: Brief reasoning, then action, then short summary. No lengthy explanations.
189
-5. **NO PLACEHOLDERS**: NEVER use "..." or ellipsis as placeholder content. Always write COMPLETE, REAL content.
190
-6. **STOP WHEN DONE**: Once you've created the requested files, STOP. Don't keep verifying or re-reading.
191
-7. **No browser commands**: Don't use xdg-open, open, or browser commands - they don't work here.
192
-8. **No repetition**: Never execute the same command twice.
193
-9. **No previews**: Don't show "after editing, the file will contain:" - the tool shows results automatically.
185
+1. **EXECUTE, don't describe**: USE TOOLS immediately. No explanations first.
186
+2. **No code blocks EVER**: NEVER show ```. No bash blocks, no html blocks, no code blocks of any kind.
187
+3. **No narration**: Don't say "I will call the write tool" - JUST CALL IT. No announcing actions.
188
+4. **One action, then done**: Do one thing. Confirm it worked. Stop or continue. Don't repeat yourself.
189
+5. **Read before edit**: Always read a file before modifying it
190
+6. **NO PLACEHOLDERS**: Never use "..." as content. Write COMPLETE content.
191
+7. **STOP WHEN DONE**: File created? Stop. Don't verify, re-read, or do it again.
192
+8. **No browser commands**: xdg-open, open, browser commands don't work here.
193
+9. **Never repeat**: Created a file? Don't create it again. Ran a command? Don't run it again.
194
+10. **Stay focused**: Complete the user's request. Don't add extra steps or explanations.
194195
 
195196
 ## Examples of Correct Behavior
196197
 
@@ -213,10 +214,21 @@ Added the function to utils.py.
213214
 
214215
 ## What NOT To Do
215216
 
216
-- Do NOT say "you can run this command: ..."
217
-- Do NOT say "create a file with this content: ..."
218
-- Do NOT show code in markdown blocks for the user to copy
219
-- Do NOT explain how to do something - just DO IT
217
+- Do NOT say "I will use the write tool..." - JUST USE IT
218
+- Do NOT show code blocks (```) - EVER
219
+- Do NOT narrate: "Now I'll create..." "Next, I'll..." - JUST DO IT
220
+- Do NOT explain how to do something - DO IT
221
+- Do NOT show the same content twice (once as preview, once in tool)
222
+- Do NOT repeat actions you already completed
223
+
224
+## CRITICAL: No Redundancy
225
+
226
+Do NOT duplicate your work:
227
+- Show code block → then use tool (WRONG - just use the tool)
228
+- Describe action → narrate tool → use tool (WRONG - just use the tool)
229
+- Create file → create same file again (WRONG - do it once)
230
+
231
+Each action should happen ONCE. Use tools directly without preamble.
220232
 
221233
 You are an AGENT that EXECUTES tasks, not a chatbot that gives advice.
222234
 """
@@ -256,15 +268,16 @@ Wait for the result, then continue or finish.
256268
 
257269
 ## Rules
258270
 
259
-1. USE TOOLS to do things - never just describe
260
-2. Read files before editing them
261
-3. Be concise: brief intro, tool call, short summary. No lengthy explanations.
262
-4. If a tool fails, try a different approach
263
-5. **NO PLACEHOLDERS**: NEVER use "..." as content. Write COMPLETE, REAL content always.
264
-6. **STOP WHEN DONE**: Once files are created, STOP. Don't keep verifying or re-reading.
265
-7. **No browser commands**: Don't use xdg-open or open - they don't work here.
266
-8. **No repetition**: Never execute the same command twice.
267
-9. **No code blocks or previews**: Don't show "the file will look like:" - just use the tool.
271
+1. **USE TOOLS immediately** - No describing, no explaining, just do it
272
+2. **No code blocks EVER** - Never use ```. No bash blocks, html blocks, nothing
273
+3. **No narration** - Don't say "I'll call..." - JUST CALL IT
274
+4. **One action, then done** - Do one thing, confirm, stop or continue
275
+5. **Read before edit** - Always read files before modifying
276
+6. **NO PLACEHOLDERS** - Never use "..." as content. Write COMPLETE content.
277
+7. **STOP WHEN DONE** - File created? Stop. Don't verify or re-create.
278
+8. **No browser commands** - xdg-open doesn't work here
279
+9. **Never repeat** - Did something? Don't do it again.
280
+10. **Stay focused** - Complete the request, nothing more.
268281
 
269282
 ## Examples
270283
 
src/loader/agent/reasoning.pymodified
@@ -758,9 +758,18 @@ def detect_premature_completion(task: str, response: str, actions_taken: list[st
758758
         "create a file", "write a file", "make a file",
759759
         "add a function", "edit the", "fix the", "update the",
760760
         "read the", "show me", "list",
761
+        # Web page / design tasks are also typically simple
762
+        "design a webpage", "create a webpage", "make a webpage",
763
+        "create a page", "design a page", "create an html",
764
+        "make an html", "write an html", "help me design",
765
+        "create a simple", "make a simple", "write a simple",
761766
     ]
762767
     is_simple = any(ind in task_lower for ind in simple_creation)
763768
 
769
+    # If we already created/wrote files, the task is probably done
770
+    if "write" in str(actions_taken).lower() and len(actions_taken) >= 1:
771
+        return False  # File was written, trust it's done
772
+
764773
     # If it's a simple task with at least one action, it's probably done
765774
     if is_simple and len(actions_taken) >= 1:
766775
         return False
src/loader/agent/safeguards.pyadded
1035 lines changed — click to load
@@ -0,0 +1,1035 @@
1
+"""Runtime safeguards to improve agent behavior.
2
+
3
+These safeguards help keep the agent on track when models don't follow
4
+instructions perfectly. They work at runtime to filter, detect, and correct
5
+problematic patterns.
6
+"""
7
+
8
+import re
9
+from dataclasses import dataclass, field
10
+from pathlib import Path
11
+
12
+
13
+@dataclass
14
+class FilterResult:
15
+    """Result of filtering content."""
16
+    content: str  # Filtered content
17
+    was_filtered: bool  # Whether any filtering occurred
18
+    removed_blocks: list[str] = field(default_factory=list)  # What was removed
19
+
20
+
21
+class CodeBlockFilter:
22
+    """Filters markdown code blocks and bracket tool calls from streamed content.
23
+
24
+    Handles both complete blocks (```...```) and partial blocks that span
25
+    multiple stream chunks. Also filters [calls X tool with ...] patterns.
26
+    """
27
+
28
+    def __init__(self):
29
+        self._buffer = ""
30
+        self._in_code_block = False
31
+        self._block_lang = ""
32
+        self._current_block = ""
33
+        self._in_bracket = False
34
+        self._bracket_content = ""
35
+        self._in_json_tool = False
36
+        self._json_brace_count = 0
37
+
38
+    def reset(self):
39
+        """Reset filter state."""
40
+        self._buffer = ""
41
+        self._in_code_block = False
42
+        self._block_lang = ""
43
+        self._current_block = ""
44
+        self._in_bracket = False
45
+        self._bracket_content = ""
46
+        self._in_json_tool = False
47
+        self._json_brace_count = 0
48
+
49
+    def _is_bracket_tool_start(self, text: str) -> bool:
50
+        """Check if text looks like start of a bracket tool call."""
51
+        # Patterns like: [calls, [call, [USE
52
+        return bool(re.match(r'\[(?:calls?|USE)\s', text, re.IGNORECASE))
53
+
54
+    def filter_chunk(self, chunk: str) -> FilterResult:
55
+        """Filter a streaming chunk, removing code blocks and bracket tool calls.
56
+
57
+        Returns filtered content. Handles partial blocks across chunks.
58
+        """
59
+        if not chunk:
60
+            return FilterResult(content="", was_filtered=False)
61
+
62
+        result_parts = []
63
+        removed = []
64
+        was_filtered = False
65
+
66
+        # Process character by character to handle streaming
67
+        self._buffer += chunk
68
+
69
+        while self._buffer:
70
+            # Handle bracket tool calls: [calls X tool with ...]
71
+            if self._in_bracket:
72
+                # Look for closing ]
73
+                end_idx = self._buffer.find(']')
74
+                if end_idx >= 0:
75
+                    self._bracket_content += self._buffer[:end_idx]
76
+                    removed.append(f"[{self._bracket_content}]")
77
+                    self._buffer = self._buffer[end_idx + 1:]
78
+                    self._in_bracket = False
79
+                    self._bracket_content = ""
80
+                    was_filtered = True
81
+                else:
82
+                    # Still in bracket, consume all
83
+                    self._bracket_content += self._buffer
84
+                    self._buffer = ""
85
+                    was_filtered = True
86
+                continue
87
+
88
+            # Check for bracket start: [calls, [USE, or [output (fake outputs)
89
+            bracket_match = re.search(r'\[(?=(?:calls?|USE|output)\s*[:\s])', self._buffer, re.IGNORECASE)
90
+            if bracket_match:
91
+                # Output everything before the bracket
92
+                result_parts.append(self._buffer[:bracket_match.start()])
93
+                self._buffer = self._buffer[bracket_match.start() + 1:]  # Skip the [
94
+                self._in_bracket = True
95
+                was_filtered = True
96
+                continue
97
+
98
+            # Handle JSON tool calls: {"name": "write", "arguments": {...}}
99
+            if self._in_json_tool:
100
+                # Track braces to find the end
101
+                for i, char in enumerate(self._buffer):
102
+                    if char == '{':
103
+                        self._json_brace_count += 1
104
+                    elif char == '}':
105
+                        self._json_brace_count -= 1
106
+                        if self._json_brace_count == 0:
107
+                            # Found end of JSON
108
+                            removed.append(self._buffer[:i + 1])
109
+                            self._buffer = self._buffer[i + 1:]
110
+                            self._in_json_tool = False
111
+                            was_filtered = True
112
+                            break
113
+                else:
114
+                    # Still in JSON, consume all
115
+                    self._buffer = ""
116
+                    was_filtered = True
117
+                continue
118
+
119
+            # Check for JSON tool call start: {"name": "write" etc
120
+            json_tool_match = re.search(
121
+                r'\{\s*"name"\s*:\s*"(?:write|read|edit|bash|glob|grep)"',
122
+                self._buffer
123
+            )
124
+            if json_tool_match:
125
+                # Output everything before the JSON
126
+                result_parts.append(self._buffer[:json_tool_match.start()])
127
+                self._buffer = self._buffer[json_tool_match.start():]
128
+                self._in_json_tool = True
129
+                self._json_brace_count = 0  # Will count starting from {
130
+                was_filtered = True
131
+                continue
132
+
133
+            # Check for preamble patterns and filter the line
134
+            preamble_match = re.search(
135
+                r'(Here is a JSON response|Here are the function calls|'
136
+                r'Here is the response with|I will respond with|'
137
+                r'The following JSON|Below is the)',
138
+                self._buffer, re.IGNORECASE
139
+            )
140
+            if preamble_match:
141
+                # Find end of line and remove whole line
142
+                line_start = self._buffer.rfind('\n', 0, preamble_match.start()) + 1
143
+                line_end = self._buffer.find('\n', preamble_match.end())
144
+                if line_end == -1:
145
+                    # Line continues to end of buffer - wait for more
146
+                    if line_start > 0:
147
+                        result_parts.append(self._buffer[:line_start])
148
+                    self._buffer = self._buffer[line_start:]
149
+                    break
150
+                else:
151
+                    # Remove the whole line
152
+                    result_parts.append(self._buffer[:line_start])
153
+                    removed.append(self._buffer[line_start:line_end])
154
+                    self._buffer = self._buffer[line_end:]
155
+                    was_filtered = True
156
+                    continue
157
+            if self._in_code_block:
158
+                # Look for closing ```
159
+                end_match = re.search(r'```', self._buffer)
160
+                if end_match:
161
+                    # Found end of code block
162
+                    block_content = self._buffer[:end_match.start()]
163
+                    self._current_block += block_content
164
+                    removed.append(f"```{self._block_lang}\n{self._current_block}```")
165
+                    self._buffer = self._buffer[end_match.end():]
166
+                    self._in_code_block = False
167
+                    self._current_block = ""
168
+                    self._block_lang = ""
169
+                    was_filtered = True
170
+                else:
171
+                    # Still in code block, consume all
172
+                    self._current_block += self._buffer
173
+                    self._buffer = ""
174
+                    was_filtered = True
175
+            else:
176
+                # Look for opening ```
177
+                start_match = re.search(r'```(\w*)\n?', self._buffer)
178
+                if start_match:
179
+                    # Found start of code block
180
+                    # Output everything before the block
181
+                    result_parts.append(self._buffer[:start_match.start()])
182
+                    self._block_lang = start_match.group(1)
183
+                    self._buffer = self._buffer[start_match.end():]
184
+                    self._in_code_block = True
185
+                    was_filtered = True
186
+                else:
187
+                    # Check if buffer ends with partial ``` marker
188
+                    if self._buffer.endswith('`') or self._buffer.endswith('``'):
189
+                        # Hold back potential partial marker
190
+                        split_point = len(self._buffer) - self._buffer[::-1].index('`') - 1
191
+                        if split_point > 0:
192
+                            # Find where backticks start
193
+                            for i in range(len(self._buffer) - 1, -1, -1):
194
+                                if self._buffer[i] != '`':
195
+                                    result_parts.append(self._buffer[:i+1])
196
+                                    self._buffer = self._buffer[i+1:]
197
+                                    break
198
+                        break
199
+                    else:
200
+                        # No code block markers, output all
201
+                        result_parts.append(self._buffer)
202
+                        self._buffer = ""
203
+
204
+        return FilterResult(
205
+            content="".join(result_parts),
206
+            was_filtered=was_filtered,
207
+            removed_blocks=removed,
208
+        )
209
+
210
+    def filter_complete(self, content: str) -> FilterResult:
211
+        """Filter complete content (non-streaming), removing code blocks, bracket tool calls, and preambles."""
212
+        removed = []
213
+
214
+        # Pattern to match code blocks
215
+        code_pattern = r'```\w*\n?[\s\S]*?```'
216
+        removed.extend(re.findall(code_pattern, content))
217
+        filtered = re.sub(code_pattern, '', content)
218
+
219
+        # Pattern to match bracket-format tool calls: [calls X tool with ...] and fake outputs
220
+        bracket_patterns = [
221
+            r'\[calls?\s+\w+\s+tool\s+with[:\s][^\]]+\]',
222
+            r'\[USE\s+\w+\s+tool[:\s][^\]]+\]',
223
+            r'\[output[:\s][^\]]+\]',  # Fake outputs from model
224
+        ]
225
+        for pattern in bracket_patterns:
226
+            matches = re.findall(pattern, filtered, re.IGNORECASE)
227
+            removed.extend(matches)
228
+            filtered = re.sub(pattern, '', filtered, flags=re.IGNORECASE)
229
+
230
+        # Pattern to match JSON tool calls: {"name": "write", "arguments": {...}}
231
+        # Use a function to handle nested braces properly
232
+        def remove_json_tool_calls(text: str) -> tuple[str, list[str]]:
233
+            json_removed = []
234
+            tool_pattern = r'\{\s*"name"\s*:\s*"(?:write|read|edit|bash|glob|grep)"'
235
+            result = text
236
+            while True:
237
+                match = re.search(tool_pattern, result)
238
+                if not match:
239
+                    break
240
+                # Find matching closing brace
241
+                start = match.start()
242
+                brace_count = 0
243
+                end = start
244
+                for i, char in enumerate(result[start:], start):
245
+                    if char == '{':
246
+                        brace_count += 1
247
+                    elif char == '}':
248
+                        brace_count -= 1
249
+                        if brace_count == 0:
250
+                            end = i + 1
251
+                            break
252
+                if end > start:
253
+                    json_removed.append(result[start:end])
254
+                    result = result[:start] + result[end:]
255
+                else:
256
+                    break  # Couldn't find matching brace
257
+            return result, json_removed
258
+
259
+        filtered, json_matches = remove_json_tool_calls(filtered)
260
+        removed.extend(json_matches)
261
+
262
+        # Pattern to match preamble lines (remove entire line)
263
+        preamble_patterns = [
264
+            r'^.*Here is a JSON response.*$',
265
+            r'^.*Here are the function calls.*$',
266
+            r'^.*Here is the response with.*$',
267
+            r'^.*I will respond with.*$',
268
+            r'^.*The following (JSON|function calls|tool calls).*$',
269
+            r'^.*Below (is|are) the (JSON|function|tool).*$',
270
+        ]
271
+        for pattern in preamble_patterns:
272
+            matches = re.findall(pattern, filtered, re.IGNORECASE | re.MULTILINE)
273
+            removed.extend(matches)
274
+            filtered = re.sub(pattern, '', filtered, flags=re.IGNORECASE | re.MULTILINE)
275
+
276
+        # Filter internal recovery/system prompts (multiline blocks)
277
+        internal_prompt_patterns = [
278
+            # Recovery prompts
279
+            r'## TOOL FAILURE - INVESTIGATE AND ADAPT[\s\S]*?What will you do\?',
280
+            r'## REQUIRED: Choose ONE[\s\S]*?(?=\n\n|\Z)',
281
+            r'## CRITICAL RULES:[\s\S]*?(?=\n\n|\Z)',
282
+            r'## Current attempt:.*$',
283
+            r'\*\*Your next action should gather information[\s\S]*?What will you do\?',
284
+            # Observation prefixes
285
+            r'^Observation \[[\w]+\]:.*$',
286
+        ]
287
+        for pattern in internal_prompt_patterns:
288
+            matches = re.findall(pattern, filtered, re.MULTILINE)
289
+            removed.extend(matches)
290
+            filtered = re.sub(pattern, '', filtered, flags=re.MULTILINE)
291
+
292
+        # Clean up multiple blank lines left behind
293
+        filtered = re.sub(r'\n{3,}', '\n\n', filtered)
294
+
295
+        return FilterResult(
296
+            content=filtered.strip(),
297
+            was_filtered=bool(removed),
298
+            removed_blocks=removed,
299
+        )
300
+
301
+
302
+@dataclass
303
+class PatternMatch:
304
+    """A detected problematic pattern."""
305
+    pattern_type: str  # 'code_block', 'narration', 'preview', 'repetition'
306
+    match_text: str
307
+    severity: str  # 'low', 'medium', 'high'
308
+
309
+
310
+class PatternDetector:
311
+    """Detects problematic patterns in agent output.
312
+
313
+    Patterns include:
314
+    - Code blocks (which should be tool calls instead)
315
+    - Narration ("I will call...", "Now I'll...")
316
+    - Previews ("The file will look like:", "After editing:")
317
+    - Repetitive commands
318
+    """
319
+
320
+    # Narration patterns - model announcing what it will do instead of doing it
321
+    NARRATION_PATTERNS = [
322
+        (r"I('ll| will) (use|call|execute|run) the (\w+) tool", "narration", "high"),
323
+        (r"Let me (use|call|execute|run) the (\w+) tool", "narration", "high"),
324
+        (r"Now I('ll| will) (create|write|edit|run|execute)", "narration", "medium"),
325
+        (r"I('m going to| am going to) (use|call|create|write)", "narration", "medium"),
326
+        (r"First,? I('ll| will) (use|call|create)", "narration", "medium"),
327
+        (r"Next,? I('ll| will) (use|call|create)", "narration", "medium"),
328
+    ]
329
+
330
+    # Preview patterns - model showing content instead of using tools
331
+    PREVIEW_PATTERNS = [
332
+        (r"(The|This) file will (look like|contain|have):", "preview", "high"),
333
+        (r"After editing,? (the file|it) will (look like|contain):", "preview", "high"),
334
+        (r"Here('s| is) (the|what) (content|code|file):", "preview", "high"),
335
+        (r"Save this (to|as|in) [\w./]+:", "preview", "high"),
336
+        (r"Create a file (with|containing):", "preview", "medium"),
337
+        (r"(The|Your) [\w./]+ (should|will) (look like|contain):", "preview", "medium"),
338
+    ]
339
+
340
+    # Preamble patterns - model describing JSON/function calls instead of using them
341
+    PREAMBLE_PATTERNS = [
342
+        (r"Here is a JSON response", "preamble", "high"),
343
+        (r"Here are the function calls", "preamble", "high"),
344
+        (r"Here is the response with", "preamble", "high"),
345
+        (r"I will respond with", "preamble", "high"),
346
+        (r"The following (JSON|function calls|tool calls)", "preamble", "high"),
347
+        (r"Below (is|are) the (JSON|function|tool)", "preamble", "high"),
348
+    ]
349
+
350
+    # Code block patterns
351
+    CODE_BLOCK_PATTERNS = [
352
+        (r'```\w+\n', "code_block", "high"),
353
+        (r'```\n', "code_block", "medium"),
354
+    ]
355
+
356
+    def __init__(self):
357
+        self._all_patterns = (
358
+            self.NARRATION_PATTERNS +
359
+            self.PREVIEW_PATTERNS +
360
+            self.PREAMBLE_PATTERNS +
361
+            self.CODE_BLOCK_PATTERNS
362
+        )
363
+        self._recent_detections: list[PatternMatch] = []
364
+
365
+    def reset(self):
366
+        """Reset detection state."""
367
+        self._recent_detections = []
368
+
369
+    def detect(self, content: str) -> list[PatternMatch]:
370
+        """Detect problematic patterns in content."""
371
+        matches = []
372
+
373
+        for pattern, ptype, severity in self._all_patterns:
374
+            for match in re.finditer(pattern, content, re.IGNORECASE):
375
+                matches.append(PatternMatch(
376
+                    pattern_type=ptype,
377
+                    match_text=match.group(0),
378
+                    severity=severity,
379
+                ))
380
+
381
+        self._recent_detections.extend(matches)
382
+        return matches
383
+
384
+    def has_high_severity(self, content: str) -> bool:
385
+        """Check if content has high-severity patterns."""
386
+        matches = self.detect(content)
387
+        return any(m.severity == "high" for m in matches)
388
+
389
+    def get_steering_message(self, matches: list[PatternMatch]) -> str | None:
390
+        """Generate a steering message based on detected patterns.
391
+
392
+        Returns None if no steering needed.
393
+        """
394
+        if not matches:
395
+            return None
396
+
397
+        # Prioritize high severity
398
+        high_severity = [m for m in matches if m.severity == "high"]
399
+        if not high_severity:
400
+            return None
401
+
402
+        # Generate appropriate steering message
403
+        pattern_types = set(m.pattern_type for m in high_severity)
404
+
405
+        if "preamble" in pattern_types:
406
+            return (
407
+                "[STOP] Do not describe JSON or function calls. "
408
+                "Just USE the tools directly. No preambles."
409
+            )
410
+        elif "code_block" in pattern_types or "preview" in pattern_types:
411
+            return (
412
+                "[REMINDER] Do not show code blocks or previews. "
413
+                "Use tools directly to create/edit files. "
414
+                "No ```code```, just call the tool."
415
+            )
416
+        elif "narration" in pattern_types:
417
+            return (
418
+                "[REMINDER] Don't announce tool calls. "
419
+                "Just use the tool directly without narration."
420
+            )
421
+
422
+        return None
423
+
424
+
425
+class ActionTracker:
426
+    """Tracks completed actions to prevent duplicates and detect loops.
427
+
428
+    Tracks:
429
+    - Files created (by path)
430
+    - Files edited (by path + edit signature)
431
+    - Commands executed (by command string)
432
+    - Directories created (by path)
433
+    - Action sequence for loop detection
434
+    - Response hashes for text loop detection
435
+    """
436
+
437
+    MAX_SEQUENCE_LENGTH = 20  # Track last N actions
438
+    LOOP_PATTERN_MIN = 2  # Minimum pattern length to detect
439
+    LOOP_REPEAT_THRESHOLD = 2  # How many times pattern must repeat
440
+    MAX_RESPONSE_HISTORY = 5  # Track last N responses for text loops
441
+
442
+    def __init__(self):
443
+        self._files_created: set[str] = set()
444
+        self._files_edited: dict[str, list[str]] = {}  # path -> list of edit sigs
445
+        self._commands_run: set[str] = set()
446
+        self._dirs_created: set[str] = set()
447
+        self._action_sequence: list[str] = []  # For loop detection
448
+        self._response_history: list[str] = []  # For text loop detection
449
+
450
+    def reset(self):
451
+        """Reset all tracking."""
452
+        self._files_created.clear()
453
+        self._files_edited.clear()
454
+        self._commands_run.clear()
455
+        self._dirs_created.clear()
456
+        self._action_sequence.clear()
457
+        self._response_history.clear()
458
+
459
+    def _normalize_path(self, path: str) -> str:
460
+        """Normalize a file path for comparison."""
461
+        # Expand ~ and resolve to absolute
462
+        expanded = Path(path).expanduser()
463
+        try:
464
+            return str(expanded.resolve())
465
+        except Exception:
466
+            return str(expanded)
467
+
468
+    def _make_edit_signature(self, old_string: str, new_string: str) -> str:
469
+        """Create a signature for an edit operation."""
470
+        # Use hash of old+new to detect same edit
471
+        return f"{hash(old_string)}:{hash(new_string)}"
472
+
473
+    def would_duplicate_file_create(self, file_path: str) -> bool:
474
+        """Check if creating this file would be a duplicate."""
475
+        norm_path = self._normalize_path(file_path)
476
+        return norm_path in self._files_created
477
+
478
+    def would_duplicate_edit(self, file_path: str, old_string: str, new_string: str) -> bool:
479
+        """Check if this edit would be a duplicate."""
480
+        norm_path = self._normalize_path(file_path)
481
+        sig = self._make_edit_signature(old_string, new_string)
482
+        return sig in self._files_edited.get(norm_path, [])
483
+
484
+    def would_duplicate_command(self, command: str) -> bool:
485
+        """Check if this command would be a duplicate."""
486
+        # Normalize whitespace
487
+        norm_cmd = " ".join(command.split())
488
+        return norm_cmd in self._commands_run
489
+
490
+    def would_duplicate_mkdir(self, dir_path: str) -> bool:
491
+        """Check if creating this directory would be a duplicate."""
492
+        norm_path = self._normalize_path(dir_path)
493
+        return norm_path in self._dirs_created
494
+
495
+    def record_file_create(self, file_path: str) -> None:
496
+        """Record that a file was created."""
497
+        norm_path = self._normalize_path(file_path)
498
+        self._files_created.add(norm_path)
499
+
500
+    def record_edit(self, file_path: str, old_string: str, new_string: str) -> None:
501
+        """Record that an edit was made."""
502
+        norm_path = self._normalize_path(file_path)
503
+        sig = self._make_edit_signature(old_string, new_string)
504
+        if norm_path not in self._files_edited:
505
+            self._files_edited[norm_path] = []
506
+        self._files_edited[norm_path].append(sig)
507
+
508
+    def record_command(self, command: str) -> None:
509
+        """Record that a command was run."""
510
+        norm_cmd = " ".join(command.split())
511
+        self._commands_run.add(norm_cmd)
512
+
513
+        # Also track mkdir commands specially
514
+        mkdir_match = re.match(r'mkdir\s+(-p\s+)?(.+)', norm_cmd)
515
+        if mkdir_match:
516
+            dir_path = mkdir_match.group(2).strip().strip('"\'')
517
+            self._dirs_created.add(self._normalize_path(dir_path))
518
+
519
+    def record_mkdir(self, dir_path: str) -> None:
520
+        """Record that a directory was created."""
521
+        norm_path = self._normalize_path(dir_path)
522
+        self._dirs_created.add(norm_path)
523
+
524
+    def check_tool_call(self, tool_name: str, arguments: dict) -> tuple[bool, str]:
525
+        """Check if a tool call would be a duplicate.
526
+
527
+        Returns (is_duplicate, reason).
528
+        """
529
+        if tool_name == "write":
530
+            file_path = arguments.get("file_path", "")
531
+            if self.would_duplicate_file_create(file_path):
532
+                return True, f"File already created: {file_path}"
533
+
534
+        elif tool_name == "edit":
535
+            file_path = arguments.get("file_path", "")
536
+            old_string = arguments.get("old_string", "")
537
+            new_string = arguments.get("new_string", "")
538
+            if self.would_duplicate_edit(file_path, old_string, new_string):
539
+                return True, f"Same edit already applied to: {file_path}"
540
+
541
+        elif tool_name == "bash":
542
+            command = arguments.get("command", "")
543
+            if self.would_duplicate_command(command):
544
+                return True, f"Command already executed: {command[:50]}..."
545
+
546
+        return False, ""
547
+
548
+    def record_tool_call(self, tool_name: str, arguments: dict) -> None:
549
+        """Record a tool call as completed."""
550
+        # Track in action sequence for loop detection
551
+        self._action_sequence.append(tool_name)
552
+        if len(self._action_sequence) > self.MAX_SEQUENCE_LENGTH:
553
+            self._action_sequence.pop(0)
554
+
555
+        if tool_name == "write":
556
+            file_path = arguments.get("file_path", "")
557
+            if file_path:
558
+                self.record_file_create(file_path)
559
+
560
+        elif tool_name == "edit":
561
+            file_path = arguments.get("file_path", "")
562
+            old_string = arguments.get("old_string", "")
563
+            new_string = arguments.get("new_string", "")
564
+            if file_path:
565
+                self.record_edit(file_path, old_string, new_string)
566
+
567
+        elif tool_name == "bash":
568
+            command = arguments.get("command", "")
569
+            if command:
570
+                self.record_command(command)
571
+
572
+    def detect_loop(self) -> tuple[bool, str]:
573
+        """Detect if the agent is in a repetitive loop.
574
+
575
+        Returns (is_loop, pattern_description).
576
+        """
577
+        seq = self._action_sequence
578
+        if len(seq) < self.LOOP_PATTERN_MIN * self.LOOP_REPEAT_THRESHOLD:
579
+            return False, ""
580
+
581
+        # Check for repeating patterns of length 2, 3, 4
582
+        for pattern_len in range(self.LOOP_PATTERN_MIN, min(6, len(seq) // 2 + 1)):
583
+            # Get the most recent pattern
584
+            pattern = seq[-pattern_len:]
585
+
586
+            # Count how many times this pattern appears consecutively
587
+            repeats = 1
588
+            for i in range(len(seq) - pattern_len * 2, -1, -pattern_len):
589
+                if seq[i:i + pattern_len] == pattern:
590
+                    repeats += 1
591
+                else:
592
+                    break
593
+
594
+            if repeats >= self.LOOP_REPEAT_THRESHOLD:
595
+                pattern_str = " → ".join(pattern)
596
+                return True, f"Repeating pattern detected ({repeats}x): {pattern_str}"
597
+
598
+        return False, ""
599
+
600
+    def _normalize_response(self, response: str) -> str:
601
+        """Normalize a response for comparison.
602
+
603
+        Strips whitespace, lowercases, and takes first ~200 chars
604
+        to create a signature for detecting similar responses.
605
+        """
606
+        # Take first part of response for comparison
607
+        normalized = response.strip().lower()[:200]
608
+        # Remove common variable parts like paths, numbers
609
+        normalized = re.sub(r'/[\w/.-]+', '<PATH>', normalized)
610
+        normalized = re.sub(r'\d+', '<NUM>', normalized)
611
+        return normalized
612
+
613
+    def record_response(self, response: str) -> None:
614
+        """Record a response for text loop detection."""
615
+        normalized = self._normalize_response(response)
616
+        self._response_history.append(normalized)
617
+        if len(self._response_history) > self.MAX_RESPONSE_HISTORY:
618
+            self._response_history.pop(0)
619
+
620
+    def detect_text_loop(self, response: str) -> tuple[bool, str]:
621
+        """Detect if the agent is repeating the same response.
622
+
623
+        Returns (is_loop, description).
624
+        """
625
+        if len(self._response_history) < 1:
626
+            return False, ""
627
+
628
+        normalized = self._normalize_response(response)
629
+
630
+        # Check if this response matches recent ones (exact match)
631
+        exact_matches = sum(1 for r in self._response_history if r == normalized)
632
+        if exact_matches >= 1:
633
+            return True, f"Agent repeated the same response {exact_matches + 1} times"
634
+
635
+        # Check for common repetitive phrases that indicate looping
636
+        repetitive_phrases = [
637
+            "apologies for any confusion",
638
+            "let me proceed",
639
+            "i will now use the",
640
+            "let's proceed with creating",
641
+            "i'll create the",
642
+        ]
643
+        response_lower = response.lower()
644
+        for phrase in repetitive_phrases:
645
+            if phrase in response_lower:
646
+                # Check if this phrase appeared in recent responses
647
+                phrase_count = sum(1 for r in self._response_history if phrase in r)
648
+                if phrase_count >= 1:
649
+                    return True, f"Agent is stuck repeating '{phrase}'"
650
+
651
+        # Check for high similarity (not exact match)
652
+        current_words = set(normalized.split())
653
+        similarity_matches = 0
654
+        for prev in self._response_history[-3:]:
655
+            prev_words = set(prev.split())
656
+            if len(current_words) > 5 and len(prev_words) > 5:
657
+                overlap = len(current_words & prev_words)
658
+                similarity = overlap / max(len(current_words), len(prev_words))
659
+                if similarity > 0.7:  # Lower threshold
660
+                    similarity_matches += 1
661
+
662
+        if similarity_matches >= 1:
663
+            return True, "Agent responses are highly repetitive"
664
+
665
+        return False, ""
666
+
667
+
668
+@dataclass
669
+class ValidationResult:
670
+    """Result of pre-action validation."""
671
+    valid: bool
672
+    reason: str = ""
673
+    suggestion: str = ""
674
+    severity: str = "warning"  # 'warning', 'error', 'block'
675
+
676
+
677
+class PreActionValidator:
678
+    """Validates tool calls before execution to catch problematic actions.
679
+
680
+    Catches:
681
+    - Empty/missing required arguments
682
+    - Invalid file paths
683
+    - Dangerous bash commands
684
+    - Writing empty content
685
+    - Nonsensical operations
686
+    """
687
+
688
+    # Dangerous bash patterns that should be blocked
689
+    DANGEROUS_PATTERNS = [
690
+        (r'rm\s+(-[rf]+\s+)?/', "Dangerous: removing from root directory"),
691
+        (r'rm\s+-rf\s+~', "Dangerous: removing home directory"),
692
+        (r'>\s*/dev/sd[a-z]', "Dangerous: writing directly to disk device"),
693
+        (r'mkfs\.', "Dangerous: formatting filesystem"),
694
+        (r'dd\s+.*of=/dev/', "Dangerous: dd to device"),
695
+        (r'chmod\s+-R\s+777\s+/', "Dangerous: making everything world-writable"),
696
+        (r':\(\)\s*\{\s*:\|:\s*&\s*\}\s*;', "Dangerous: fork bomb"),
697
+    ]
698
+
699
+    # Suspicious patterns that warrant a warning
700
+    SUSPICIOUS_PATTERNS = [
701
+        (r'rm\s+-rf\s+', "Warning: recursive force delete"),
702
+        (r'>\s*/etc/', "Warning: overwriting system config"),
703
+        (r'curl\s+.*\|\s*sh', "Warning: piping curl to shell"),
704
+        (r'wget\s+.*\|\s*sh', "Warning: piping wget to shell"),
705
+        (r'eval\s+', "Warning: using eval"),
706
+        (r'sudo\s+', "Warning: using sudo"),
707
+    ]
708
+
709
+    def validate(self, tool_name: str, arguments: dict) -> ValidationResult:
710
+        """Validate a tool call before execution.
711
+
712
+        Returns ValidationResult indicating if the action is valid.
713
+        """
714
+        if tool_name == "bash":
715
+            return self._validate_bash(arguments)
716
+        elif tool_name == "write":
717
+            return self._validate_write(arguments)
718
+        elif tool_name == "edit":
719
+            return self._validate_edit(arguments)
720
+        elif tool_name == "read":
721
+            return self._validate_read(arguments)
722
+        elif tool_name in ("glob", "grep"):
723
+            return self._validate_search(tool_name, arguments)
724
+
725
+        return ValidationResult(valid=True)
726
+
727
+    def _validate_bash(self, arguments: dict) -> ValidationResult:
728
+        """Validate bash command."""
729
+        command = arguments.get("command", "")
730
+
731
+        if not command or not command.strip():
732
+            return ValidationResult(
733
+                valid=False,
734
+                reason="Empty command",
735
+                suggestion="Provide a valid command to execute",
736
+                severity="error",
737
+            )
738
+
739
+        # Check for dangerous patterns
740
+        for pattern, reason in self.DANGEROUS_PATTERNS:
741
+            if re.search(pattern, command):
742
+                return ValidationResult(
743
+                    valid=False,
744
+                    reason=reason,
745
+                    suggestion="This command is too dangerous to execute",
746
+                    severity="block",
747
+                )
748
+
749
+        # Check for suspicious patterns (allow but warn)
750
+        for pattern, reason in self.SUSPICIOUS_PATTERNS:
751
+            if re.search(pattern, command):
752
+                return ValidationResult(
753
+                    valid=True,  # Allow but flag
754
+                    reason=reason,
755
+                    severity="warning",
756
+                )
757
+
758
+        # Check for commands that won't work in non-interactive mode
759
+        interactive_patterns = [
760
+            (r'\bnano\b', "nano requires interactive terminal"),
761
+            (r'\bvim?\b', "vim requires interactive terminal"),
762
+            (r'\bemacs\b', "emacs requires interactive terminal"),
763
+            (r'\bless\b', "less requires interactive terminal"),
764
+            (r'\bmore\b', "more requires interactive terminal"),
765
+            (r'\btop\b', "top requires interactive terminal"),
766
+            (r'\bhtop\b', "htop requires interactive terminal"),
767
+        ]
768
+        for pattern, reason in interactive_patterns:
769
+            if re.search(pattern, command):
770
+                return ValidationResult(
771
+                    valid=False,
772
+                    reason=reason,
773
+                    suggestion="Use non-interactive alternatives (cat, head, tail for viewing; sed for editing)",
774
+                    severity="error",
775
+                )
776
+
777
+        return ValidationResult(valid=True)
778
+
779
+    def _validate_write(self, arguments: dict) -> ValidationResult:
780
+        """Validate write operation."""
781
+        file_path = arguments.get("file_path", "")
782
+        content = arguments.get("content", "")
783
+
784
+        if not file_path or not file_path.strip():
785
+            return ValidationResult(
786
+                valid=False,
787
+                reason="Empty file path",
788
+                suggestion="Provide a valid file path",
789
+                severity="error",
790
+            )
791
+
792
+        # Check for path issues
793
+        path_result = self._validate_path(file_path)
794
+        if not path_result.valid:
795
+            return path_result
796
+
797
+        # Warn about empty content (might be intentional)
798
+        if content is None or (isinstance(content, str) and not content.strip()):
799
+            return ValidationResult(
800
+                valid=True,  # Allow but warn
801
+                reason="Writing empty content to file",
802
+                severity="warning",
803
+            )
804
+
805
+        # Check for writing to sensitive locations
806
+        sensitive_paths = ['/etc/', '/usr/', '/bin/', '/sbin/', '/boot/', '/sys/', '/proc/']
807
+        for sensitive in sensitive_paths:
808
+            if file_path.startswith(sensitive):
809
+                return ValidationResult(
810
+                    valid=False,
811
+                    reason=f"Cannot write to system directory: {sensitive}",
812
+                    suggestion="Write to a user directory instead",
813
+                    severity="block",
814
+                )
815
+
816
+        return ValidationResult(valid=True)
817
+
818
+    def _validate_edit(self, arguments: dict) -> ValidationResult:
819
+        """Validate edit operation."""
820
+        file_path = arguments.get("file_path", "")
821
+        old_string = arguments.get("old_string", "")
822
+        new_string = arguments.get("new_string", "")
823
+
824
+        if not file_path or not file_path.strip():
825
+            return ValidationResult(
826
+                valid=False,
827
+                reason="Empty file path",
828
+                suggestion="Provide a valid file path",
829
+                severity="error",
830
+            )
831
+
832
+        # Check for path issues
833
+        path_result = self._validate_path(file_path)
834
+        if not path_result.valid:
835
+            return path_result
836
+
837
+        # old_string can be empty (for prepending), but warn
838
+        if old_string is None:
839
+            return ValidationResult(
840
+                valid=False,
841
+                reason="old_string is None",
842
+                suggestion="Provide the text to replace (can be empty string for prepend)",
843
+                severity="error",
844
+            )
845
+
846
+        # new_string can legitimately be empty (for deletion)
847
+        if new_string is None:
848
+            return ValidationResult(
849
+                valid=False,
850
+                reason="new_string is None",
851
+                suggestion="Provide the replacement text (can be empty string for deletion)",
852
+                severity="error",
853
+            )
854
+
855
+        # Check if old and new are identical
856
+        if old_string == new_string:
857
+            return ValidationResult(
858
+                valid=False,
859
+                reason="old_string and new_string are identical - no change would occur",
860
+                suggestion="Provide different old and new strings",
861
+                severity="error",
862
+            )
863
+
864
+        return ValidationResult(valid=True)
865
+
866
+    def _validate_read(self, arguments: dict) -> ValidationResult:
867
+        """Validate read operation."""
868
+        file_path = arguments.get("file_path", "")
869
+
870
+        if not file_path or not file_path.strip():
871
+            return ValidationResult(
872
+                valid=False,
873
+                reason="Empty file path",
874
+                suggestion="Provide a valid file path",
875
+                severity="error",
876
+            )
877
+
878
+        return self._validate_path(file_path)
879
+
880
+    def _validate_search(self, tool_name: str, arguments: dict) -> ValidationResult:
881
+        """Validate glob/grep operations."""
882
+        pattern = arguments.get("pattern", "")
883
+
884
+        if not pattern or not pattern.strip():
885
+            return ValidationResult(
886
+                valid=False,
887
+                reason=f"Empty {tool_name} pattern",
888
+                suggestion="Provide a valid search pattern",
889
+                severity="error",
890
+            )
891
+
892
+        return ValidationResult(valid=True)
893
+
894
+    def _validate_path(self, file_path: str) -> ValidationResult:
895
+        """Validate a file path for common issues."""
896
+        # Check for null bytes (security issue)
897
+        if '\x00' in file_path:
898
+            return ValidationResult(
899
+                valid=False,
900
+                reason="Path contains null byte",
901
+                suggestion="Remove null bytes from path",
902
+                severity="block",
903
+            )
904
+
905
+        # Check for path traversal attempts outside reasonable bounds
906
+        # (Some traversal is fine for relative paths)
907
+        if '/../../../' in file_path or file_path.count('..') > 5:
908
+            return ValidationResult(
909
+                valid=False,
910
+                reason="Excessive path traversal",
911
+                suggestion="Use a direct path instead",
912
+                severity="warning",
913
+            )
914
+
915
+        return ValidationResult(valid=True)
916
+
917
+
918
+class RuntimeSafeguards:
919
+    """Combined runtime safeguards for the agent.
920
+
921
+    Usage:
922
+        safeguards = RuntimeSafeguards()
923
+
924
+        # For streaming:
925
+        filtered = safeguards.filter_stream_chunk(chunk)
926
+        if safeguards.should_steer():
927
+            steering_msg = safeguards.get_steering_message()
928
+
929
+        # Before tool execution:
930
+        is_dup, reason = safeguards.check_duplicate(tool_name, args)
931
+        if is_dup:
932
+            skip this tool call
933
+
934
+        # Pre-action validation:
935
+        validation = safeguards.validate_action(tool_name, args)
936
+        if not validation.valid:
937
+            skip or warn
938
+
939
+        # After tool execution:
940
+        safeguards.record_action(tool_name, args)
941
+    """
942
+
943
+    def __init__(self):
944
+        self.code_filter = CodeBlockFilter()
945
+        self.pattern_detector = PatternDetector()
946
+        self.action_tracker = ActionTracker()
947
+        self.validator = PreActionValidator()
948
+        self._pending_steering: str | None = None
949
+        self._accumulated_content = ""
950
+
951
+    def reset(self):
952
+        """Reset all safeguards for a new conversation."""
953
+        self.code_filter.reset()
954
+        self.pattern_detector.reset()
955
+        self.action_tracker.reset()
956
+        self._pending_steering = None
957
+        self._accumulated_content = ""
958
+
959
+    def filter_stream_chunk(self, chunk: str) -> str:
960
+        """Filter a streaming chunk, removing code blocks.
961
+
962
+        Also detects patterns for potential steering.
963
+        """
964
+        # Filter code blocks
965
+        result = self.code_filter.filter_chunk(chunk)
966
+
967
+        # Accumulate for pattern detection
968
+        self._accumulated_content += chunk
969
+
970
+        # Check for patterns periodically (every 200 chars)
971
+        if len(self._accumulated_content) > 200:
972
+            matches = self.pattern_detector.detect(self._accumulated_content)
973
+            if matches:
974
+                steering = self.pattern_detector.get_steering_message(matches)
975
+                if steering:
976
+                    self._pending_steering = steering
977
+            self._accumulated_content = self._accumulated_content[-100:]  # Keep last 100 chars for context
978
+
979
+        return result.content
980
+
981
+    def filter_complete_content(self, content: str) -> str:
982
+        """Filter complete content (non-streaming)."""
983
+        result = self.code_filter.filter_complete(content)
984
+
985
+        # Also detect patterns
986
+        matches = self.pattern_detector.detect(content)
987
+        if matches:
988
+            steering = self.pattern_detector.get_steering_message(matches)
989
+            if steering:
990
+                self._pending_steering = steering
991
+
992
+        return result.content
993
+
994
+    def should_steer(self) -> bool:
995
+        """Check if we should inject a steering message."""
996
+        return self._pending_steering is not None
997
+
998
+    def get_steering_message(self) -> str | None:
999
+        """Get pending steering message and clear it."""
1000
+        msg = self._pending_steering
1001
+        self._pending_steering = None
1002
+        return msg
1003
+
1004
+    def check_duplicate(self, tool_name: str, arguments: dict) -> tuple[bool, str]:
1005
+        """Check if a tool call would be a duplicate."""
1006
+        return self.action_tracker.check_tool_call(tool_name, arguments)
1007
+
1008
+    def record_action(self, tool_name: str, arguments: dict) -> None:
1009
+        """Record a completed tool action."""
1010
+        self.action_tracker.record_tool_call(tool_name, arguments)
1011
+
1012
+    def detect_loop(self) -> tuple[bool, str]:
1013
+        """Detect if the agent is in a repetitive loop.
1014
+
1015
+        Returns (is_loop, pattern_description).
1016
+        """
1017
+        return self.action_tracker.detect_loop()
1018
+
1019
+    def validate_action(self, tool_name: str, arguments: dict) -> ValidationResult:
1020
+        """Validate a tool action before execution.
1021
+
1022
+        Returns ValidationResult with validity and any warnings/errors.
1023
+        """
1024
+        return self.validator.validate(tool_name, arguments)
1025
+
1026
+    def record_response(self, response: str) -> None:
1027
+        """Record a response for text loop detection."""
1028
+        self.action_tracker.record_response(response)
1029
+
1030
+    def detect_text_loop(self, response: str) -> tuple[bool, str]:
1031
+        """Detect if the agent is repeating the same response.
1032
+
1033
+        Returns (is_loop, description).
1034
+        """
1035
+        return self.action_tracker.detect_text_loop(response)