@@ -130,6 +130,33 @@ class CodeBlockFilter: |
| 130 | 130 | was_filtered = True |
| 131 | 131 | continue |
| 132 | 132 | |
| 133 | + # Check for hallucinated tool narration and filter the line |
| 134 | + hallucination_match = re.search( |
| 135 | + r'([Uu]sed\s+`?(?:bash|write|read|edit|glob|grep)`?\s+tool|' |
| 136 | + r'[Uu]sing\s+the\s+`?(?:bash|write|read|edit|glob|grep)`?\s+tool|' |
| 137 | + r'with\s+file_path\s*=\s*[`\'"]|' |
| 138 | + r'with\s+command\s*[`\'"]|' |
| 139 | + r'[Hh]ere\s+is\s+what\s+[Ii]\s+did:)', |
| 140 | + self._buffer |
| 141 | + ) |
| 142 | + if hallucination_match: |
| 143 | + # Find end of line and remove whole line |
| 144 | + line_start = self._buffer.rfind('\n', 0, hallucination_match.start()) + 1 |
| 145 | + line_end = self._buffer.find('\n', hallucination_match.end()) |
| 146 | + if line_end == -1: |
| 147 | + # Line continues to end of buffer - wait for more |
| 148 | + if line_start > 0: |
| 149 | + result_parts.append(self._buffer[:line_start]) |
| 150 | + self._buffer = self._buffer[line_start:] |
| 151 | + break |
| 152 | + else: |
| 153 | + # Remove the whole line |
| 154 | + result_parts.append(self._buffer[:line_start]) |
| 155 | + removed.append(self._buffer[line_start:line_end]) |
| 156 | + self._buffer = self._buffer[line_end:] |
| 157 | + was_filtered = True |
| 158 | + continue |
| 159 | + |
| 133 | 160 | # Check for preamble patterns and filter the line |
| 134 | 161 | preamble_match = re.search( |
| 135 | 162 | r'(Here is a JSON response|Here are the function calls|' |
@@ -273,6 +300,23 @@ class CodeBlockFilter: |
| 273 | 300 | removed.extend(matches) |
| 274 | 301 | filtered = re.sub(pattern, '', filtered, flags=re.IGNORECASE | re.MULTILINE) |
| 275 | 302 | |
| 303 | + # Pattern to match hallucinated/narrated tool uses (remove entire line) |
| 304 | + # These are lines where model describes using tools instead of actually calling them |
| 305 | + hallucination_patterns = [ |
| 306 | + r'^.*[Uu]sed\s+`?(?:bash|write|read|edit|glob|grep)`?\s+tool.*$', # "Used bash tool..." |
| 307 | + r'^.*[Uu]sing\s+the\s+`?(?:bash|write|read|edit|glob|grep)`?\s+tool.*$', # "...using the write tool" |
| 308 | + r'^.*with\s+file_path\s*=\s*[`\'"][^`\'"]+[`\'"].*$', # Narrated file_path parameter |
| 309 | + r'^.*with\s+command\s*[`\'"][^`\'"]+[`\'"].*$', # Narrated bash command |
| 310 | + r'^\s*\*\s*[Uu]sed\s+`.*$', # "* Used `bash`..." (bullet point narration) |
| 311 | + r'^.*[Hh]ere\s+is\s+what\s+[Ii]\s+did:.*$', # "Here is what I did:" |
| 312 | + r'^\s*\d+\.\s+[Uu]sed\s+.*tool.*$', # "1. Used bash tool..." |
| 313 | + r'^\s*\d+\.\s+[Cc]reated\s+.*using\s+the\s+.*tool.*$', # "1. Created... using the write tool" |
| 314 | + ] |
| 315 | + for pattern in hallucination_patterns: |
| 316 | + matches = re.findall(pattern, filtered, re.MULTILINE) |
| 317 | + removed.extend(matches) |
| 318 | + filtered = re.sub(pattern, '', filtered, flags=re.MULTILINE) |
| 319 | + |
| 276 | 320 | # Filter internal recovery/system prompts (multiline blocks) |
| 277 | 321 | internal_prompt_patterns = [ |
| 278 | 322 | # Recovery prompts |