`9a32994`

Fix false text loop detection: raise thresholds, reset history between turns

Authored by

espadonne 1 month ago

SHA: 9a329946696bc278f3661a2d155b44cdef96cb16
Parents: b2fd90c
Tree: c3c80e3

4 changed files

Status	File	+	-
M	`src/loader/runtime/conversation.py`	1	0
M	`src/loader/runtime/safeguard_services.py`	10	8
M	`src/loader/runtime/safeguards.py`	4	0
M	`tests/helpers/runtime_harness.py`	1	1

src/loader/runtime/conversation.pymodified


         """Run one task turn and return a structured summary."""
 
         reset_runtime_logger()
+        self.context.safeguards.reset_response_history()
 
         prepared_turn = await self.turn_preparation.prepare(
             task=task,

src/loader/runtime/safeguard_services.pymodified

              self._response_history.pop(0)
      def detect_text_loop(self, response: str) -> tuple[bool, str]:
--        if len(self._response_history) < 1:
++        if len(self._response_history) < 2:
              return False, ""
          normalized = self._normalize_response(response)
          exact_matches = sum(1 for r in self._response_history if r == normalized)
--        if exact_matches >= 1:
++        if exact_matches >= 2:
              return True, f"Agent repeated the same response {exact_matches + 1} times"
          repetitive_phrases = [
              "apologies for any confusion",
              "let me proceed",
              "i will now use the",
--            "let's proceed with creating",
--            "i'll create the",
          ]
          response_lower = response.lower()
          for phrase in repetitive_phrases:
              if phrase in response_lower:
                  phrase_count = sum(1 for r in self._response_history if phrase in r)
--                if phrase_count >= 1:
++                if phrase_count >= 2:
                      return True, f"Agent is stuck repeating '{phrase}'"
          current_words = set(normalized.split())
          similarity_matches = 0
          for prev in self._response_history[-3:]:
              prev_words = set(prev.split())
--            if len(current_words) > 5 and len(prev_words) > 5:
++            if len(current_words) > 10 and len(prev_words) > 10:
                  overlap = len(current_words & prev_words)
                  similarity = overlap / max(len(current_words), len(prev_words))
--                if similarity > 0.7:
++                if similarity > 0.85:
                      similarity_matches += 1
--        if similarity_matches >= 1:
++        if similarity_matches >= 2:
              return True, "Agent responses are highly repetitive"
          return False, ""
++    def reset_response_history(self) -> None:
++        """Clear response history between turns to prevent cross-turn false positives."""
++        self._response_history.clear()
++
  @dataclass
  class ValidationResult:

src/loader/runtime/safeguards.pymodified

          Returns (is_loop, description).
          """
          return self.action_tracker.detect_text_loop(response)
++
++    def reset_response_history(self) -> None:
++        """Clear response history between turns."""
++        self.action_tracker.reset_response_history()

tests/helpers/runtime_harness.pymodified


             )
         )
         if not self._completions:
+            return CompletionResponse(content="Done.")
         return self._completions.pop(0)
 
     async def stream(

`@@ -142,6 +142,7 @@` class ConversationRuntime:
142	"""Run one task turn and return a structured summary."""	142	"""Run one task turn and return a structured summary."""
143		143
144	reset_runtime_logger()	144	reset_runtime_logger()
		145	+ self.context.safeguards.reset_response_history()
145		146
146	prepared_turn = await self.turn_preparation.prepare(	147	prepared_turn = await self.turn_preparation.prepare(
147	task=task,	148	task=task,

`@@ -71,7 +71,7 @@` class ScriptedBackend(LLMBackend):
71	)	71	)
72	)	72	)
73	if not self._completions:	73	if not self._completions:
74	- raise AssertionError("No scripted completion left for this scenario")	74	+ return CompletionResponse(content="Done.")
75	return self._completions.pop(0)	75	return self._completions.pop(0)
76		76
77	async def stream(	77	async def stream(