Collapse empty-response retry prompts
- SHA
b8752d4de8b369dc5ec2e393038283f82d365da9- Parents
-
13a16fb - Tree
550acdb
b8752d4
b8752d4de8b369dc5ec2e393038283f82d365da913a16fb
550acdb| Status | File | + | - |
|---|---|---|---|
| M |
src/loader/runtime/turn_iteration.py
|
15 | 5 |
| M |
tests/test_runtime_repair_flows.py
|
43 | 0 |
src/loader/runtime/turn_iteration.pymodified@@ -263,12 +263,22 @@ class TurnIterationController: | ||
| 263 | 263 | policy_stage="empty_response", |
| 264 | 264 | policy_outcome="retry", |
| 265 | 265 | ) |
| 266 | - self.context.session.append( | |
| 267 | - Message( | |
| 268 | - role=Role.USER, | |
| 269 | - content=empty_decision.retry_message, | |
| 270 | - ) | |
| 266 | + retry_message = Message( | |
| 267 | + role=Role.USER, | |
| 268 | + content=empty_decision.retry_message, | |
| 271 | 269 | ) |
| 270 | + if ( | |
| 271 | + self.context.session.messages | |
| 272 | + and self.context.session.messages[-1].role == Role.USER | |
| 273 | + and self.context.session.messages[-1].content.startswith( | |
| 274 | + "[EMPTY ASSISTANT RESPONSE]" | |
| 275 | + ) | |
| 276 | + ): | |
| 277 | + self.context.session.messages[-1] = retry_message | |
| 278 | + self.context.session.touch() | |
| 279 | + self.context.session.persist() | |
| 280 | + else: | |
| 281 | + self.context.session.append(retry_message) | |
| 272 | 282 | return TurnIterationDecision( |
| 273 | 283 | action=TurnIterationAction.CONTINUE, |
| 274 | 284 | continuation_count=continuation_count, |
tests/test_runtime_repair_flows.pymodified@@ -243,6 +243,49 @@ async def test_repeated_empty_responses_fail_honestly_after_one_retry( | ||
| 243 | 243 | ) |
| 244 | 244 | |
| 245 | 245 | |
| 246 | +@pytest.mark.asyncio | |
| 247 | +async def test_empty_response_retries_replace_prior_retry_message_within_same_episode( | |
| 248 | + temp_dir: Path, | |
| 249 | +) -> None: | |
| 250 | + target = temp_dir / "three.txt" | |
| 251 | + backend = ScriptedBackend( | |
| 252 | + completions=[ | |
| 253 | + CompletionResponse(content=""), | |
| 254 | + CompletionResponse(content=""), | |
| 255 | + CompletionResponse( | |
| 256 | + content="I'll create the file now.", | |
| 257 | + tool_calls=[ | |
| 258 | + ToolCall( | |
| 259 | + id="write-1", | |
| 260 | + name="write", | |
| 261 | + arguments={ | |
| 262 | + "file_path": str(target), | |
| 263 | + "content": "three\n", | |
| 264 | + }, | |
| 265 | + ) | |
| 266 | + ], | |
| 267 | + ), | |
| 268 | + CompletionResponse(content="Done."), | |
| 269 | + ] | |
| 270 | + ) | |
| 271 | + | |
| 272 | + run = await run_scenario( | |
| 273 | + "Create three.txt.", | |
| 274 | + backend, | |
| 275 | + config=non_streaming_config(), | |
| 276 | + project_root=temp_dir, | |
| 277 | + ) | |
| 278 | + | |
| 279 | + assert run.response.startswith("Done.") | |
| 280 | + third_invocation_retry_messages = [ | |
| 281 | + message.content | |
| 282 | + for message in backend.invocations[2].messages | |
| 283 | + if message.role == Role.USER and "[EMPTY ASSISTANT RESPONSE]" in message.content | |
| 284 | + ] | |
| 285 | + assert len(third_invocation_retry_messages) == 1 | |
| 286 | + assert "retry 2/2" in third_invocation_retry_messages[0] | |
| 287 | + | |
| 288 | + | |
| 246 | 289 | @pytest.mark.asyncio |
| 247 | 290 | async def test_raw_text_tool_recovery_budget_fails_honestly( |
| 248 | 291 | temp_dir: Path, |