tenseleyflow/loader / 9a32994

Browse files

Fix false text loop detection: raise thresholds, reset history between turns

Authored by espadonne
SHA
9a329946696bc278f3661a2d155b44cdef96cb16
Parents
b2fd90c
Tree
c3c80e3

4 changed files

StatusFile+-
M src/loader/runtime/conversation.py 1 0
M src/loader/runtime/safeguard_services.py 10 8
M src/loader/runtime/safeguards.py 4 0
M tests/helpers/runtime_harness.py 1 1
src/loader/runtime/conversation.pymodified
@@ -142,6 +142,7 @@ class ConversationRuntime:
142
         """Run one task turn and return a structured summary."""
142
         """Run one task turn and return a structured summary."""
143
 
143
 
144
         reset_runtime_logger()
144
         reset_runtime_logger()
145
+        self.context.safeguards.reset_response_history()
145
 
146
 
146
         prepared_turn = await self.turn_preparation.prepare(
147
         prepared_turn = await self.turn_preparation.prepare(
147
             task=task,
148
             task=task,
src/loader/runtime/safeguard_services.pymodified
@@ -181,43 +181,45 @@ class ActionTracker:
181
             self._response_history.pop(0)
181
             self._response_history.pop(0)
182
 
182
 
183
     def detect_text_loop(self, response: str) -> tuple[bool, str]:
183
     def detect_text_loop(self, response: str) -> tuple[bool, str]:
184
-        if len(self._response_history) < 1:
184
+        if len(self._response_history) < 2:
185
             return False, ""
185
             return False, ""
186
 
186
 
187
         normalized = self._normalize_response(response)
187
         normalized = self._normalize_response(response)
188
         exact_matches = sum(1 for r in self._response_history if r == normalized)
188
         exact_matches = sum(1 for r in self._response_history if r == normalized)
189
-        if exact_matches >= 1:
189
+        if exact_matches >= 2:
190
             return True, f"Agent repeated the same response {exact_matches + 1} times"
190
             return True, f"Agent repeated the same response {exact_matches + 1} times"
191
 
191
 
192
         repetitive_phrases = [
192
         repetitive_phrases = [
193
             "apologies for any confusion",
193
             "apologies for any confusion",
194
             "let me proceed",
194
             "let me proceed",
195
             "i will now use the",
195
             "i will now use the",
196
-            "let's proceed with creating",
197
-            "i'll create the",
198
         ]
196
         ]
199
         response_lower = response.lower()
197
         response_lower = response.lower()
200
         for phrase in repetitive_phrases:
198
         for phrase in repetitive_phrases:
201
             if phrase in response_lower:
199
             if phrase in response_lower:
202
                 phrase_count = sum(1 for r in self._response_history if phrase in r)
200
                 phrase_count = sum(1 for r in self._response_history if phrase in r)
203
-                if phrase_count >= 1:
201
+                if phrase_count >= 2:
204
                     return True, f"Agent is stuck repeating '{phrase}'"
202
                     return True, f"Agent is stuck repeating '{phrase}'"
205
 
203
 
206
         current_words = set(normalized.split())
204
         current_words = set(normalized.split())
207
         similarity_matches = 0
205
         similarity_matches = 0
208
         for prev in self._response_history[-3:]:
206
         for prev in self._response_history[-3:]:
209
             prev_words = set(prev.split())
207
             prev_words = set(prev.split())
210
-            if len(current_words) > 5 and len(prev_words) > 5:
208
+            if len(current_words) > 10 and len(prev_words) > 10:
211
                 overlap = len(current_words & prev_words)
209
                 overlap = len(current_words & prev_words)
212
                 similarity = overlap / max(len(current_words), len(prev_words))
210
                 similarity = overlap / max(len(current_words), len(prev_words))
213
-                if similarity > 0.7:
211
+                if similarity > 0.85:
214
                     similarity_matches += 1
212
                     similarity_matches += 1
215
 
213
 
216
-        if similarity_matches >= 1:
214
+        if similarity_matches >= 2:
217
             return True, "Agent responses are highly repetitive"
215
             return True, "Agent responses are highly repetitive"
218
 
216
 
219
         return False, ""
217
         return False, ""
220
 
218
 
219
+    def reset_response_history(self) -> None:
220
+        """Clear response history between turns to prevent cross-turn false positives."""
221
+        self._response_history.clear()
222
+
221
 
223
 
222
 @dataclass
224
 @dataclass
223
 class ValidationResult:
225
 class ValidationResult:
src/loader/runtime/safeguards.pymodified
@@ -587,3 +587,7 @@ class RuntimeSafeguards:
587
         Returns (is_loop, description).
587
         Returns (is_loop, description).
588
         """
588
         """
589
         return self.action_tracker.detect_text_loop(response)
589
         return self.action_tracker.detect_text_loop(response)
590
+
591
+    def reset_response_history(self) -> None:
592
+        """Clear response history between turns."""
593
+        self.action_tracker.reset_response_history()
tests/helpers/runtime_harness.pymodified
@@ -71,7 +71,7 @@ class ScriptedBackend(LLMBackend):
71
             )
71
             )
72
         )
72
         )
73
         if not self._completions:
73
         if not self._completions:
74
-            raise AssertionError("No scripted completion left for this scenario")
74
+            return CompletionResponse(content="Done.")
75
         return self._completions.pop(0)
75
         return self._completions.pop(0)
76
 
76
 
77
     async def stream(
77
     async def stream(