@@ -181,43 +181,45 @@ class ActionTracker: |
| 181 | self._response_history.pop(0) | 181 | self._response_history.pop(0) |
| 182 | | 182 | |
| 183 | def detect_text_loop(self, response: str) -> tuple[bool, str]: | 183 | def detect_text_loop(self, response: str) -> tuple[bool, str]: |
| 184 | - if len(self._response_history) < 1: | 184 | + if len(self._response_history) < 2: |
| 185 | return False, "" | 185 | return False, "" |
| 186 | | 186 | |
| 187 | normalized = self._normalize_response(response) | 187 | normalized = self._normalize_response(response) |
| 188 | exact_matches = sum(1 for r in self._response_history if r == normalized) | 188 | exact_matches = sum(1 for r in self._response_history if r == normalized) |
| 189 | - if exact_matches >= 1: | 189 | + if exact_matches >= 2: |
| 190 | return True, f"Agent repeated the same response {exact_matches + 1} times" | 190 | return True, f"Agent repeated the same response {exact_matches + 1} times" |
| 191 | | 191 | |
| 192 | repetitive_phrases = [ | 192 | repetitive_phrases = [ |
| 193 | "apologies for any confusion", | 193 | "apologies for any confusion", |
| 194 | "let me proceed", | 194 | "let me proceed", |
| 195 | "i will now use the", | 195 | "i will now use the", |
| 196 | - "let's proceed with creating", | | |
| 197 | - "i'll create the", | | |
| 198 | ] | 196 | ] |
| 199 | response_lower = response.lower() | 197 | response_lower = response.lower() |
| 200 | for phrase in repetitive_phrases: | 198 | for phrase in repetitive_phrases: |
| 201 | if phrase in response_lower: | 199 | if phrase in response_lower: |
| 202 | phrase_count = sum(1 for r in self._response_history if phrase in r) | 200 | phrase_count = sum(1 for r in self._response_history if phrase in r) |
| 203 | - if phrase_count >= 1: | 201 | + if phrase_count >= 2: |
| 204 | return True, f"Agent is stuck repeating '{phrase}'" | 202 | return True, f"Agent is stuck repeating '{phrase}'" |
| 205 | | 203 | |
| 206 | current_words = set(normalized.split()) | 204 | current_words = set(normalized.split()) |
| 207 | similarity_matches = 0 | 205 | similarity_matches = 0 |
| 208 | for prev in self._response_history[-3:]: | 206 | for prev in self._response_history[-3:]: |
| 209 | prev_words = set(prev.split()) | 207 | prev_words = set(prev.split()) |
| 210 | - if len(current_words) > 5 and len(prev_words) > 5: | 208 | + if len(current_words) > 10 and len(prev_words) > 10: |
| 211 | overlap = len(current_words & prev_words) | 209 | overlap = len(current_words & prev_words) |
| 212 | similarity = overlap / max(len(current_words), len(prev_words)) | 210 | similarity = overlap / max(len(current_words), len(prev_words)) |
| 213 | - if similarity > 0.7: | 211 | + if similarity > 0.85: |
| 214 | similarity_matches += 1 | 212 | similarity_matches += 1 |
| 215 | | 213 | |
| 216 | - if similarity_matches >= 1: | 214 | + if similarity_matches >= 2: |
| 217 | return True, "Agent responses are highly repetitive" | 215 | return True, "Agent responses are highly repetitive" |
| 218 | | 216 | |
| 219 | return False, "" | 217 | return False, "" |
| 220 | | 218 | |
| | 219 | + def reset_response_history(self) -> None: |
| | 220 | + """Clear response history between turns to prevent cross-turn false positives.""" |
| | 221 | + self._response_history.clear() |
| | 222 | + |
| 221 | | 223 | |
| 222 | @dataclass | 224 | @dataclass |
| 223 | class ValidationResult: | 225 | class ValidationResult: |