`9a32994`

Fix false text loop detection: raise thresholds, reset history between turns

Authored by

espadonne 1 month ago

SHA: 9a329946696bc278f3661a2d155b44cdef96cb16
Parents: b2fd90c
Tree: c3c80e3

4 changed files

Status	File	+	-
M	`src/loader/runtime/conversation.py`	1	0
M	`src/loader/runtime/safeguard_services.py`	10	8
M	`src/loader/runtime/safeguards.py`	4	0
M	`tests/helpers/runtime_harness.py`	1	1

src/loader/runtime/conversation.pymodified

          """Run one task turn and return a structured summary."""
          reset_runtime_logger()
 +        self.context.safeguards.reset_response_history()
          prepared_turn = await self.turn_preparation.prepare(
              task=task,

src/loader/runtime/safeguard_services.pymodified

              self._response_history.pop(0)
      def detect_text_loop(self, response: str) -> tuple[bool, str]:
 -        if len(self._response_history) < 1:
 +        if len(self._response_history) < 2:
              return False, ""
          normalized = self._normalize_response(response)
          exact_matches = sum(1 for r in self._response_history if r == normalized)
 -        if exact_matches >= 1:
 +        if exact_matches >= 2:
              return True, f"Agent repeated the same response {exact_matches + 1} times"
          repetitive_phrases = [
              "apologies for any confusion",
              "let me proceed",
              "i will now use the",
 -            "let's proceed with creating",
 -            "i'll create the",
+         ]
          response_lower = response.lower()
          for phrase in repetitive_phrases:
              if phrase in response_lower:
                  phrase_count = sum(1 for r in self._response_history if phrase in r)
 -                if phrase_count >= 1:
 +                if phrase_count >= 2:
                      return True, f"Agent is stuck repeating '{phrase}'"
          current_words = set(normalized.split())
          similarity_matches = 0
          for prev in self._response_history[-3:]:
              prev_words = set(prev.split())
 -            if len(current_words) > 5 and len(prev_words) > 5:
 +            if len(current_words) > 10 and len(prev_words) > 10:
                  overlap = len(current_words & prev_words)
                  similarity = overlap / max(len(current_words), len(prev_words))
 -                if similarity > 0.7:
 +                if similarity > 0.85:
                      similarity_matches += 1
 -        if similarity_matches >= 1:
 +        if similarity_matches >= 2:
              return True, "Agent responses are highly repetitive"
          return False, ""
 +    def reset_response_history(self) -> None:
 +        """Clear response history between turns to prevent cross-turn false positives."""
 +        self._response_history.clear()
++
  @dataclass
  class ValidationResult:

src/loader/runtime/safeguards.pymodified

          Returns (is_loop, description).
          """
          return self.action_tracker.detect_text_loop(response)
++
 +    def reset_response_history(self) -> None:
 +        """Clear response history between turns."""
 +        self.action_tracker.reset_response_history()

tests/helpers/runtime_harness.pymodified


             )
         )
         if not self._completions:
-            raise AssertionError("No scripted completion left for this scenario")
+            return CompletionResponse(content="Done.")
         return self._completions.pop(0)
 
     async def stream(

`@@ -71,7 +71,7 @@` class ScriptedBackend(LLMBackend):
71	71	)
72	72	)
73	73	if not self._completions:
74		- raise AssertionError("No scripted completion left for this scenario")
	74	+ return CompletionResponse(content="Done.")
75	75	return self._completions.pop(0)
76	76
77	77	async def stream(