tenseleyflow/loader / 59ebe9c

Browse files

Move runtime safeguards into runtime package

Authored by espadonne
SHA
59ebe9c31ca7aa37bc086061f34ca0bd1b228f97
Parents
5c8a424
Tree
88e3bc3

2 changed files

StatusFile+-
M src/loader/agent/safeguards.py 19 593
C src/loader/runtime/safeguards.py 0 0
src/loader/agent/safeguards.pymodified
@@ -1,595 +1,21 @@
1
-"""Runtime safeguards to improve agent behavior.
2
-
3
-These safeguards help keep the agent on track when models don't follow
4
-instructions perfectly. They work at runtime to filter, detect, and correct
5
-problematic patterns.
6
-"""
7
-
8
-import re
9
-from dataclasses import dataclass, field
10
-from pathlib import Path
11
-
12
-from ..runtime.safeguard_services import (
13
-    ActionTracker,
14
-    PreActionValidator,
15
-    ValidationResult,
1
+"""Compatibility exports for runtime-owned safeguards."""
2
+
3
+from ..runtime.safeguard_services import ActionTracker, PreActionValidator, ValidationResult
4
+from ..runtime.safeguards import (
5
+    CodeBlockFilter,
6
+    FilterResult,
7
+    PatternDetector,
8
+    PatternMatch,
9
+    RuntimeSafeguards,
1610
 )
1711
 
18
-
19
-@dataclass
20
-class FilterResult:
21
-    """Result of filtering content."""
22
-    content: str  # Filtered content
23
-    was_filtered: bool  # Whether any filtering occurred
24
-    removed_blocks: list[str] = field(default_factory=list)  # What was removed
25
-
26
-
27
-class CodeBlockFilter:
28
-    """Filters markdown code blocks and bracket tool calls from streamed content.
29
-
30
-    Handles both complete blocks (```...```) and partial blocks that span
31
-    multiple stream chunks. Also filters [calls X tool with ...] patterns.
32
-
33
-    Candidate for removal once the typed runtime makes tool-call leakage
34
-    structurally impossible.
35
-    """
36
-
37
-    def __init__(self):
38
-        self._buffer = ""
39
-        self._in_code_block = False
40
-        self._block_lang = ""
41
-        self._current_block = ""
42
-        self._in_bracket = False
43
-        self._bracket_content = ""
44
-        self._in_json_tool = False
45
-        self._json_brace_count = 0
46
-
47
-    def reset(self):
48
-        """Reset filter state."""
49
-        self._buffer = ""
50
-        self._in_code_block = False
51
-        self._block_lang = ""
52
-        self._current_block = ""
53
-        self._in_bracket = False
54
-        self._bracket_content = ""
55
-        self._in_json_tool = False
56
-        self._json_brace_count = 0
57
-
58
-    def _is_bracket_tool_start(self, text: str) -> bool:
59
-        """Check if text looks like start of a bracket tool call."""
60
-        # Patterns like: [calls, [call, [USE
61
-        return bool(re.match(r'\[(?:calls?|USE)\s', text, re.IGNORECASE))
62
-
63
-    def filter_chunk(self, chunk: str) -> FilterResult:
64
-        """Filter a streaming chunk, removing code blocks and bracket tool calls.
65
-
66
-        Returns filtered content. Handles partial blocks across chunks.
67
-        """
68
-        if not chunk:
69
-            return FilterResult(content="", was_filtered=False)
70
-
71
-        result_parts = []
72
-        removed = []
73
-        was_filtered = False
74
-
75
-        # Process character by character to handle streaming
76
-        self._buffer += chunk
77
-
78
-        while self._buffer:
79
-            # Handle bracket tool calls: [calls X tool with ...]
80
-            if self._in_bracket:
81
-                # Look for closing ]
82
-                end_idx = self._buffer.find(']')
83
-                if end_idx >= 0:
84
-                    self._bracket_content += self._buffer[:end_idx]
85
-                    removed.append(f"[{self._bracket_content}]")
86
-                    self._buffer = self._buffer[end_idx + 1:]
87
-                    self._in_bracket = False
88
-                    self._bracket_content = ""
89
-                    was_filtered = True
90
-                else:
91
-                    # Still in bracket, consume all
92
-                    self._bracket_content += self._buffer
93
-                    self._buffer = ""
94
-                    was_filtered = True
95
-                continue
96
-
97
-            # Check for bracket start: [calls, [USE, or [output (fake outputs)
98
-            bracket_match = re.search(r'\[(?=(?:calls?|USE|output)\s*[:\s])', self._buffer, re.IGNORECASE)
99
-            if bracket_match:
100
-                # Output everything before the bracket
101
-                result_parts.append(self._buffer[:bracket_match.start()])
102
-                self._buffer = self._buffer[bracket_match.start() + 1:]  # Skip the [
103
-                self._in_bracket = True
104
-                was_filtered = True
105
-                continue
106
-
107
-            # Handle JSON tool calls: {"name": "write", "arguments": {...}}
108
-            if self._in_json_tool:
109
-                # Track braces to find the end
110
-                for i, char in enumerate(self._buffer):
111
-                    if char == '{':
112
-                        self._json_brace_count += 1
113
-                    elif char == '}':
114
-                        self._json_brace_count -= 1
115
-                        if self._json_brace_count == 0:
116
-                            # Found end of JSON
117
-                            removed.append(self._buffer[:i + 1])
118
-                            self._buffer = self._buffer[i + 1:]
119
-                            self._in_json_tool = False
120
-                            was_filtered = True
121
-                            break
122
-                else:
123
-                    # Still in JSON, consume all
124
-                    self._buffer = ""
125
-                    was_filtered = True
126
-                continue
127
-
128
-            # Check for JSON tool call start: {"name": "write" etc
129
-            json_tool_match = re.search(
130
-                r'\{\s*"name"\s*:\s*"(?:write|read|edit|bash|glob|grep)"',
131
-                self._buffer
132
-            )
133
-            if json_tool_match:
134
-                # Output everything before the JSON
135
-                result_parts.append(self._buffer[:json_tool_match.start()])
136
-                self._buffer = self._buffer[json_tool_match.start():]
137
-                self._in_json_tool = True
138
-                self._json_brace_count = 0  # Will count starting from {
139
-                was_filtered = True
140
-                continue
141
-
142
-            # Check for hallucinated tool narration and filter the line
143
-            hallucination_match = re.search(
144
-                r'([Uu]sed\s+`?(?:bash|write|read|edit|glob|grep)`?\s+tool|'
145
-                r'[Uu]sing\s+the\s+`?(?:bash|write|read|edit|glob|grep)`?\s+tool|'
146
-                r'with\s+file_path\s*=\s*[`\'"]|'
147
-                r'with\s+command\s*[`\'"]|'
148
-                r'[Hh]ere\s+is\s+what\s+[Ii]\s+did:)',
149
-                self._buffer
150
-            )
151
-            if hallucination_match:
152
-                # Find end of line and remove whole line
153
-                line_start = self._buffer.rfind('\n', 0, hallucination_match.start()) + 1
154
-                line_end = self._buffer.find('\n', hallucination_match.end())
155
-                if line_end == -1:
156
-                    # Line continues to end of buffer - wait for more
157
-                    if line_start > 0:
158
-                        result_parts.append(self._buffer[:line_start])
159
-                    self._buffer = self._buffer[line_start:]
160
-                    break
161
-                else:
162
-                    # Remove the whole line
163
-                    result_parts.append(self._buffer[:line_start])
164
-                    removed.append(self._buffer[line_start:line_end])
165
-                    self._buffer = self._buffer[line_end:]
166
-                    was_filtered = True
167
-                    continue
168
-
169
-            # Check for preamble patterns and filter the line
170
-            preamble_match = re.search(
171
-                r'(Here is a JSON response|Here are the function calls|'
172
-                r'Here is the response with|I will respond with|'
173
-                r'The following JSON|Below is the)',
174
-                self._buffer, re.IGNORECASE
175
-            )
176
-            if preamble_match:
177
-                # Find end of line and remove whole line
178
-                line_start = self._buffer.rfind('\n', 0, preamble_match.start()) + 1
179
-                line_end = self._buffer.find('\n', preamble_match.end())
180
-                if line_end == -1:
181
-                    # Line continues to end of buffer - wait for more
182
-                    if line_start > 0:
183
-                        result_parts.append(self._buffer[:line_start])
184
-                    self._buffer = self._buffer[line_start:]
185
-                    break
186
-                else:
187
-                    # Remove the whole line
188
-                    result_parts.append(self._buffer[:line_start])
189
-                    removed.append(self._buffer[line_start:line_end])
190
-                    self._buffer = self._buffer[line_end:]
191
-                    was_filtered = True
192
-                    continue
193
-            if self._in_code_block:
194
-                # Look for closing ```
195
-                end_match = re.search(r'```', self._buffer)
196
-                if end_match:
197
-                    # Found end of code block
198
-                    block_content = self._buffer[:end_match.start()]
199
-                    self._current_block += block_content
200
-                    removed.append(f"```{self._block_lang}\n{self._current_block}```")
201
-                    self._buffer = self._buffer[end_match.end():]
202
-                    self._in_code_block = False
203
-                    self._current_block = ""
204
-                    self._block_lang = ""
205
-                    was_filtered = True
206
-                else:
207
-                    # Still in code block, consume all
208
-                    self._current_block += self._buffer
209
-                    self._buffer = ""
210
-                    was_filtered = True
211
-            else:
212
-                # Look for opening ```
213
-                start_match = re.search(r'```(\w*)\n?', self._buffer)
214
-                if start_match:
215
-                    # Found start of code block
216
-                    # Output everything before the block
217
-                    result_parts.append(self._buffer[:start_match.start()])
218
-                    self._block_lang = start_match.group(1)
219
-                    self._buffer = self._buffer[start_match.end():]
220
-                    self._in_code_block = True
221
-                    was_filtered = True
222
-                else:
223
-                    # Check if buffer ends with partial ``` marker
224
-                    if self._buffer.endswith('`') or self._buffer.endswith('``'):
225
-                        # Hold back potential partial marker
226
-                        split_point = len(self._buffer) - self._buffer[::-1].index('`') - 1
227
-                        if split_point > 0:
228
-                            # Find where backticks start
229
-                            for i in range(len(self._buffer) - 1, -1, -1):
230
-                                if self._buffer[i] != '`':
231
-                                    result_parts.append(self._buffer[:i+1])
232
-                                    self._buffer = self._buffer[i+1:]
233
-                                    break
234
-                        break
235
-                    else:
236
-                        # No code block markers, output all
237
-                        result_parts.append(self._buffer)
238
-                        self._buffer = ""
239
-
240
-        return FilterResult(
241
-            content="".join(result_parts),
242
-            was_filtered=was_filtered,
243
-            removed_blocks=removed,
244
-        )
245
-
246
-    def filter_complete(self, content: str) -> FilterResult:
247
-        """Filter complete content (non-streaming), removing code blocks, bracket tool calls, and preambles."""
248
-        removed = []
249
-
250
-        # Pattern to match code blocks
251
-        code_pattern = r'```\w*\n?[\s\S]*?```'
252
-        removed.extend(re.findall(code_pattern, content))
253
-        filtered = re.sub(code_pattern, '', content)
254
-
255
-        # Pattern to match bracket-format tool calls: [calls X tool with ...] and fake outputs
256
-        bracket_patterns = [
257
-            r'\[calls?\s+\w+\s+tool\s+with[:\s][^\]]+\]',
258
-            r'\[USE\s+\w+\s+tool[:\s][^\]]+\]',
259
-            r'\[output[:\s][^\]]+\]',  # Fake outputs from model
260
-        ]
261
-        for pattern in bracket_patterns:
262
-            matches = re.findall(pattern, filtered, re.IGNORECASE)
263
-            removed.extend(matches)
264
-            filtered = re.sub(pattern, '', filtered, flags=re.IGNORECASE)
265
-
266
-        # Pattern to match JSON tool calls: {"name": "write", "arguments": {...}}
267
-        # Use a function to handle nested braces properly
268
-        def remove_json_tool_calls(text: str) -> tuple[str, list[str]]:
269
-            json_removed = []
270
-            tool_pattern = r'\{\s*"name"\s*:\s*"(?:write|read|edit|bash|glob|grep)"'
271
-            result = text
272
-            while True:
273
-                match = re.search(tool_pattern, result)
274
-                if not match:
275
-                    break
276
-                # Find matching closing brace
277
-                start = match.start()
278
-                brace_count = 0
279
-                end = start
280
-                for i, char in enumerate(result[start:], start):
281
-                    if char == '{':
282
-                        brace_count += 1
283
-                    elif char == '}':
284
-                        brace_count -= 1
285
-                        if brace_count == 0:
286
-                            end = i + 1
287
-                            break
288
-                if end > start:
289
-                    json_removed.append(result[start:end])
290
-                    result = result[:start] + result[end:]
291
-                else:
292
-                    break  # Couldn't find matching brace
293
-            return result, json_removed
294
-
295
-        filtered, json_matches = remove_json_tool_calls(filtered)
296
-        removed.extend(json_matches)
297
-
298
-        # Pattern to match preamble lines (remove entire line)
299
-        preamble_patterns = [
300
-            r'^.*Here is a JSON response.*$',
301
-            r'^.*Here are the function calls.*$',
302
-            r'^.*Here is the response with.*$',
303
-            r'^.*I will respond with.*$',
304
-            r'^.*The following (JSON|function calls|tool calls).*$',
305
-            r'^.*Below (is|are) the (JSON|function|tool).*$',
306
-        ]
307
-        for pattern in preamble_patterns:
308
-            matches = re.findall(pattern, filtered, re.IGNORECASE | re.MULTILINE)
309
-            removed.extend(matches)
310
-            filtered = re.sub(pattern, '', filtered, flags=re.IGNORECASE | re.MULTILINE)
311
-
312
-        # Pattern to match hallucinated/narrated tool uses (remove entire line)
313
-        # These are lines where model describes using tools instead of actually calling them
314
-        hallucination_patterns = [
315
-            r'^.*[Uu]sed\s+`?(?:bash|write|read|edit|glob|grep)`?\s+tool.*$',  # "Used bash tool..."
316
-            r'^.*[Uu]sing\s+the\s+`?(?:bash|write|read|edit|glob|grep)`?\s+tool.*$',  # "...using the write tool"
317
-            r'^.*with\s+file_path\s*=\s*[`\'"][^`\'"]+[`\'"].*$',  # Narrated file_path parameter
318
-            r'^.*with\s+command\s*[`\'"][^`\'"]+[`\'"].*$',  # Narrated bash command
319
-            r'^\s*\*\s*[Uu]sed\s+`.*$',  # "* Used `bash`..." (bullet point narration)
320
-            r'^.*[Hh]ere\s+is\s+what\s+[Ii]\s+did:.*$',  # "Here is what I did:"
321
-            r'^\s*\d+\.\s+[Uu]sed\s+.*tool.*$',  # "1. Used bash tool..."
322
-            r'^\s*\d+\.\s+[Cc]reated\s+.*using\s+the\s+.*tool.*$',  # "1. Created... using the write tool"
323
-        ]
324
-        for pattern in hallucination_patterns:
325
-            matches = re.findall(pattern, filtered, re.MULTILINE)
326
-            removed.extend(matches)
327
-            filtered = re.sub(pattern, '', filtered, flags=re.MULTILINE)
328
-
329
-        # Filter internal recovery/system prompts (multiline blocks)
330
-        internal_prompt_patterns = [
331
-            # Recovery prompts
332
-            r'## TOOL FAILURE - INVESTIGATE AND ADAPT[\s\S]*?What will you do\?',
333
-            r'## REQUIRED: Choose ONE[\s\S]*?(?=\n\n|\Z)',
334
-            r'## CRITICAL RULES:[\s\S]*?(?=\n\n|\Z)',
335
-            r'## Current attempt:.*$',
336
-            r'\*\*Your next action should gather information[\s\S]*?What will you do\?',
337
-            # Observation prefixes
338
-            r'^Observation \[[\w]+\]:.*$',
339
-        ]
340
-        for pattern in internal_prompt_patterns:
341
-            matches = re.findall(pattern, filtered, re.MULTILINE)
342
-            removed.extend(matches)
343
-            filtered = re.sub(pattern, '', filtered, flags=re.MULTILINE)
344
-
345
-        # Clean up multiple blank lines left behind
346
-        filtered = re.sub(r'\n{3,}', '\n\n', filtered)
347
-
348
-        return FilterResult(
349
-            content=filtered.strip(),
350
-            was_filtered=bool(removed),
351
-            removed_blocks=removed,
352
-        )
353
-
354
-
355
-@dataclass
356
-class PatternMatch:
357
-    """A detected problematic pattern."""
358
-    pattern_type: str  # 'code_block', 'narration', 'preview', 'repetition'
359
-    match_text: str
360
-    severity: str  # 'low', 'medium', 'high'
361
-
362
-
363
-class PatternDetector:
364
-    """Detects problematic patterns in agent output.
365
-
366
-    Patterns include:
367
-    - Code blocks (which should be tool calls instead)
368
-    - Narration ("I will call...", "Now I'll...")
369
-    - Previews ("The file will look like:", "After editing:")
370
-    - Repetitive commands
371
-    """
372
-
373
-    # Narration patterns - model announcing what it will do instead of doing it
374
-    NARRATION_PATTERNS = [
375
-        (r"I('ll| will) (use|call|execute|run) the (\w+) tool", "narration", "high"),
376
-        (r"Let me (use|call|execute|run) the (\w+) tool", "narration", "high"),
377
-        (r"Now I('ll| will) (create|write|edit|run|execute)", "narration", "medium"),
378
-        (r"I('m going to| am going to) (use|call|create|write)", "narration", "medium"),
379
-        (r"First,? I('ll| will) (use|call|create)", "narration", "medium"),
380
-        (r"Next,? I('ll| will) (use|call|create)", "narration", "medium"),
381
-    ]
382
-
383
-    # Preview patterns - model showing content instead of using tools
384
-    PREVIEW_PATTERNS = [
385
-        (r"(The|This) file will (look like|contain|have):", "preview", "high"),
386
-        (r"After editing,? (the file|it) will (look like|contain):", "preview", "high"),
387
-        (r"Here('s| is) (the|what) (content|code|file):", "preview", "high"),
388
-        (r"Save this (to|as|in) [\w./]+:", "preview", "high"),
389
-        (r"Create a file (with|containing):", "preview", "medium"),
390
-        (r"(The|Your) [\w./]+ (should|will) (look like|contain):", "preview", "medium"),
391
-    ]
392
-
393
-    # Preamble patterns - model describing JSON/function calls instead of using them
394
-    PREAMBLE_PATTERNS = [
395
-        (r"Here is a JSON response", "preamble", "high"),
396
-        (r"Here are the function calls", "preamble", "high"),
397
-        (r"Here is the response with", "preamble", "high"),
398
-        (r"I will respond with", "preamble", "high"),
399
-        (r"The following (JSON|function calls|tool calls)", "preamble", "high"),
400
-        (r"Below (is|are) the (JSON|function|tool)", "preamble", "high"),
401
-    ]
402
-
403
-    # Code block patterns
404
-    CODE_BLOCK_PATTERNS = [
405
-        (r'```\w+\n', "code_block", "high"),
406
-        (r'```\n', "code_block", "medium"),
407
-    ]
408
-
409
-    def __init__(self):
410
-        self._all_patterns = (
411
-            self.NARRATION_PATTERNS +
412
-            self.PREVIEW_PATTERNS +
413
-            self.PREAMBLE_PATTERNS +
414
-            self.CODE_BLOCK_PATTERNS
415
-        )
416
-        self._recent_detections: list[PatternMatch] = []
417
-
418
-    def reset(self):
419
-        """Reset detection state."""
420
-        self._recent_detections = []
421
-
422
-    def detect(self, content: str) -> list[PatternMatch]:
423
-        """Detect problematic patterns in content."""
424
-        matches = []
425
-
426
-        for pattern, ptype, severity in self._all_patterns:
427
-            for match in re.finditer(pattern, content, re.IGNORECASE):
428
-                matches.append(PatternMatch(
429
-                    pattern_type=ptype,
430
-                    match_text=match.group(0),
431
-                    severity=severity,
432
-                ))
433
-
434
-        self._recent_detections.extend(matches)
435
-        return matches
436
-
437
-    def has_high_severity(self, content: str) -> bool:
438
-        """Check if content has high-severity patterns."""
439
-        matches = self.detect(content)
440
-        return any(m.severity == "high" for m in matches)
441
-
442
-    def get_steering_message(self, matches: list[PatternMatch]) -> str | None:
443
-        """Generate a steering message based on detected patterns.
444
-
445
-        Returns None if no steering needed.
446
-        """
447
-        if not matches:
448
-            return None
449
-
450
-        # Prioritize high severity
451
-        high_severity = [m for m in matches if m.severity == "high"]
452
-        if not high_severity:
453
-            return None
454
-
455
-        # Generate appropriate steering message
456
-        pattern_types = set(m.pattern_type for m in high_severity)
457
-
458
-        if "preamble" in pattern_types:
459
-            return (
460
-                "[STOP] Do not describe JSON or function calls. "
461
-                "Just USE the tools directly. No preambles."
462
-            )
463
-        elif "code_block" in pattern_types or "preview" in pattern_types:
464
-            return (
465
-                "[REMINDER] Do not show code blocks or previews. "
466
-                "Use tools directly to create/edit files. "
467
-                "No ```code```, just call the tool."
468
-            )
469
-        elif "narration" in pattern_types:
470
-            return (
471
-                "[REMINDER] Don't announce tool calls. "
472
-                "Just use the tool directly without narration."
473
-            )
474
-
475
-        return None
476
-
477
-
478
-class RuntimeSafeguards:
479
-    """Combined runtime safeguards for the agent.
480
-
481
-    Usage:
482
-        safeguards = RuntimeSafeguards()
483
-
484
-        # For streaming:
485
-        filtered = safeguards.filter_stream_chunk(chunk)
486
-        if safeguards.should_steer():
487
-            steering_msg = safeguards.get_steering_message()
488
-
489
-        # Before tool execution:
490
-        is_dup, reason = safeguards.check_duplicate(tool_name, args)
491
-        if is_dup:
492
-            skip this tool call
493
-
494
-        # Pre-action validation:
495
-        validation = safeguards.validate_action(tool_name, args)
496
-        if not validation.valid:
497
-            skip or warn
498
-
499
-        # After tool execution:
500
-        safeguards.record_action(tool_name, args)
501
-    """
502
-
503
-    def __init__(self):
504
-        self.code_filter = CodeBlockFilter()
505
-        self.pattern_detector = PatternDetector()
506
-        self.action_tracker = ActionTracker()
507
-        self.validator = PreActionValidator()
508
-        self._pending_steering: str | None = None
509
-        self._accumulated_content = ""
510
-
511
-    def reset(self):
512
-        """Reset all safeguards for a new conversation."""
513
-        self.code_filter.reset()
514
-        self.pattern_detector.reset()
515
-        self.action_tracker.reset()
516
-        self._pending_steering = None
517
-        self._accumulated_content = ""
518
-
519
-    def filter_stream_chunk(self, chunk: str) -> str:
520
-        """Filter a streaming chunk, removing code blocks.
521
-
522
-        Also detects patterns for potential steering.
523
-        """
524
-        # Filter code blocks
525
-        result = self.code_filter.filter_chunk(chunk)
526
-
527
-        # Accumulate for pattern detection
528
-        self._accumulated_content += chunk
529
-
530
-        # Check for patterns periodically (every 200 chars)
531
-        if len(self._accumulated_content) > 200:
532
-            matches = self.pattern_detector.detect(self._accumulated_content)
533
-            if matches:
534
-                steering = self.pattern_detector.get_steering_message(matches)
535
-                if steering:
536
-                    self._pending_steering = steering
537
-            self._accumulated_content = self._accumulated_content[-100:]  # Keep last 100 chars for context
538
-
539
-        return result.content
540
-
541
-    def filter_complete_content(self, content: str) -> str:
542
-        """Filter complete content (non-streaming)."""
543
-        result = self.code_filter.filter_complete(content)
544
-
545
-        # Also detect patterns
546
-        matches = self.pattern_detector.detect(content)
547
-        if matches:
548
-            steering = self.pattern_detector.get_steering_message(matches)
549
-            if steering:
550
-                self._pending_steering = steering
551
-
552
-        return result.content
553
-
554
-    def should_steer(self) -> bool:
555
-        """Check if we should inject a steering message."""
556
-        return self._pending_steering is not None
557
-
558
-    def get_steering_message(self) -> str | None:
559
-        """Get pending steering message and clear it."""
560
-        msg = self._pending_steering
561
-        self._pending_steering = None
562
-        return msg
563
-
564
-    def check_duplicate(self, tool_name: str, arguments: dict) -> tuple[bool, str]:
565
-        """Check if a tool call would be a duplicate."""
566
-        return self.action_tracker.check_tool_call(tool_name, arguments)
567
-
568
-    def record_action(self, tool_name: str, arguments: dict) -> None:
569
-        """Record a completed tool action."""
570
-        self.action_tracker.record_tool_call(tool_name, arguments)
571
-
572
-    def detect_loop(self) -> tuple[bool, str]:
573
-        """Detect if the agent is in a repetitive loop.
574
-
575
-        Returns (is_loop, pattern_description).
576
-        """
577
-        return self.action_tracker.detect_loop()
578
-
579
-    def validate_action(self, tool_name: str, arguments: dict) -> ValidationResult:
580
-        """Validate a tool action before execution.
581
-
582
-        Returns ValidationResult with validity and any warnings/errors.
583
-        """
584
-        return self.validator.validate(tool_name, arguments)
585
-
586
-    def record_response(self, response: str) -> None:
587
-        """Record a response for text loop detection."""
588
-        self.action_tracker.record_response(response)
589
-
590
-    def detect_text_loop(self, response: str) -> tuple[bool, str]:
591
-        """Detect if the agent is repeating the same response.
592
-
593
-        Returns (is_loop, description).
594
-        """
595
-        return self.action_tracker.detect_text_loop(response)
12
+__all__ = [
13
+    "ActionTracker",
14
+    "CodeBlockFilter",
15
+    "FilterResult",
16
+    "PatternDetector",
17
+    "PatternMatch",
18
+    "PreActionValidator",
19
+    "RuntimeSafeguards",
20
+    "ValidationResult",
21
+]
src/loader/agent/safeguards.py → src/loader/runtime/safeguards.pycopied (98% similarity)
@@ -1,15 +1,9 @@
1
-"""Runtime safeguards to improve agent behavior.
2
-
3
-These safeguards help keep the agent on track when models don't follow
4
-instructions perfectly. They work at runtime to filter, detect, and correct
5
-problematic patterns.
6
-"""
1
+"""Runtime-owned safeguard helpers and combined safeguard policy."""
72
 
83
 import re
94
 from dataclasses import dataclass, field
10
-from pathlib import Path
115
 
12
-from ..runtime.safeguard_services import (
6
+from .safeguard_services import (
137
     ActionTracker,
148
     PreActionValidator,
159
     ValidationResult,