Reset empty retry after progress
Authored by
mfwolffe <wolffemf@dukes.jmu.edu>
- SHA
fd483bfb4682b811185656389200cd50c40d0eb2- Parents
-
e4b7f3b - Tree
a4f4506
fd483bf
fd483bfb4682b811185656389200cd50c40d0eb2e4b7f3b
a4f4506| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/turn_iteration.py
|
36 | 0 |
| M |
tests/test_turn_iteration.py
|
33 | 2 |
src/loader/runtime/turn_iteration.pymodified@@ -244,6 +244,8 @@ class TurnIterationController: | |||
| 244 | reason_code="repair_empty_response", | 244 | reason_code="repair_empty_response", |
| 245 | kind=TurnTransitionKind.RETRY, | 245 | kind=TurnTransitionKind.RETRY, |
| 246 | ) | 246 | ) |
| 247 | + if _successful_progress_after_latest_empty_retry(self.context.session.messages): | ||
| 248 | + empty_retry_count = 0 | ||
| 247 | next_empty_retry_count = empty_retry_count + 1 | 249 | next_empty_retry_count = empty_retry_count + 1 |
| 248 | empty_decision = self.repairer.handle_empty_response( | 250 | empty_decision = self.repairer.handle_empty_response( |
| 249 | task=task, | 251 | task=task, |
@@ -322,3 +324,37 @@ class TurnIterationController: | |||
| 322 | "[PLANNED ARTIFACTS STILL MISSING]", | 324 | "[PLANNED ARTIFACTS STILL MISSING]", |
| 323 | ) | 325 | ) |
| 324 | ) | 326 | ) |
| 327 | + | ||
| 328 | + | ||
| 329 | +def _successful_progress_after_latest_empty_retry(messages: list[Message]) -> bool: | ||
| 330 | + """Return whether tool progress should start a fresh empty-response episode.""" | ||
| 331 | + | ||
| 332 | + saw_progress = False | ||
| 333 | + for message in reversed(messages): | ||
| 334 | + content = str(getattr(message, "content", "") or "") | ||
| 335 | + role = getattr(message, "role", None) | ||
| 336 | + role_value = getattr(role, "value", role) | ||
| 337 | + if role_value == Role.USER.value and content.startswith( | ||
| 338 | + "[EMPTY ASSISTANT RESPONSE]" | ||
| 339 | + ): | ||
| 340 | + return saw_progress | ||
| 341 | + if _message_is_successful_progress(message): | ||
| 342 | + saw_progress = True | ||
| 343 | + return False | ||
| 344 | + | ||
| 345 | + | ||
| 346 | +def _message_is_successful_progress(message: Message) -> bool: | ||
| 347 | + role = getattr(message, "role", None) | ||
| 348 | + role_value = getattr(role, "value", role) | ||
| 349 | + content = str(getattr(message, "content", "") or "") | ||
| 350 | + if role_value == Role.TOOL.value: | ||
| 351 | + blocked_prefixes = ( | ||
| 352 | + "[Blocked -", | ||
| 353 | + "[Skipped - duplicate action:", | ||
| 354 | + "[Skipped -", | ||
| 355 | + "Recovering from error", | ||
| 356 | + ) | ||
| 357 | + return bool(content.strip()) and not content.startswith(blocked_prefixes) | ||
| 358 | + if role_value == Role.ASSISTANT.value: | ||
| 359 | + return bool(content.strip()) | ||
| 360 | + return False | ||
tests/test_turn_iteration.pymodified@@ -7,9 +7,12 @@ from pathlib import Path | |||
| 7 | import pytest | 7 | import pytest |
| 8 | 8 | ||
| 9 | from loader.agent.loop import Agent, AgentConfig | 9 | from loader.agent.loop import Agent, AgentConfig |
| 10 | -from loader.llm.base import CompletionResponse, ToolCall | 10 | +from loader.llm.base import CompletionResponse, Message, Role, ToolCall |
| 11 | from loader.runtime.conversation import ConversationRuntime | 11 | from loader.runtime.conversation import ConversationRuntime |
| 12 | -from loader.runtime.turn_iteration import TurnIterationAction | 12 | +from loader.runtime.turn_iteration import ( |
| 13 | + TurnIterationAction, | ||
| 14 | + _successful_progress_after_latest_empty_retry, | ||
| 15 | +) | ||
| 13 | from tests.helpers.runtime_harness import ScriptedBackend | 16 | from tests.helpers.runtime_harness import ScriptedBackend |
| 14 | 17 | ||
| 15 | 18 | ||
@@ -138,3 +141,31 @@ async def test_turn_iteration_executes_native_tool_batch_and_continues( | |||
| 138 | event.type == "tool_result" and "Loader runtime notes" in event.content | 141 | event.type == "tool_result" and "Loader runtime notes" in event.content |
| 139 | for event in events | 142 | for event in events |
| 140 | ) | 143 | ) |
| 144 | + | ||
| 145 | + | ||
| 146 | +def test_empty_retry_episode_resets_after_successful_tool_progress() -> None: | ||
| 147 | + messages = [ | ||
| 148 | + Message(role=Role.USER, content="[EMPTY ASSISTANT RESPONSE] retry 1/6"), | ||
| 149 | + Message.tool_result_message( | ||
| 150 | + tool_call_id="write-1", | ||
| 151 | + display_content="Observation [write]: Result: Successfully wrote file", | ||
| 152 | + result_content="Observation [write]: Result: Successfully wrote file", | ||
| 153 | + is_error=False, | ||
| 154 | + ), | ||
| 155 | + ] | ||
| 156 | + | ||
| 157 | + assert _successful_progress_after_latest_empty_retry(messages) is True | ||
| 158 | + | ||
| 159 | + | ||
| 160 | +def test_empty_retry_episode_does_not_reset_after_blocked_tool_result() -> None: | ||
| 161 | + messages = [ | ||
| 162 | + Message(role=Role.USER, content="[EMPTY ASSISTANT RESPONSE] retry 1/6"), | ||
| 163 | + Message.tool_result_message( | ||
| 164 | + tool_call_id="write-1", | ||
| 165 | + display_content="[Blocked - HTML content contains placeholder or stub text]", | ||
| 166 | + result_content="[Blocked - HTML content contains placeholder or stub text]", | ||
| 167 | + is_error=True, | ||
| 168 | + ), | ||
| 169 | + ] | ||
| 170 | + | ||
| 171 | + assert _successful_progress_after_latest_empty_retry(messages) is False | ||