@@ -0,0 +1,115 @@ |
| 1 | +"""Deterministic coverage for current runtime repair behavior.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +import pytest |
| 8 | + |
| 9 | +from loader.agent.loop import AgentConfig |
| 10 | +from loader.llm.base import CompletionResponse, Role, ToolCall |
| 11 | +from tests.helpers.runtime_harness import ScriptedBackend, run_scenario |
| 12 | + |
| 13 | + |
| 14 | +def non_streaming_config() -> AgentConfig: |
| 15 | + """Shared deterministic config for repair-flow tests.""" |
| 16 | + |
| 17 | + config = AgentConfig(auto_context=False, stream=False, max_iterations=8) |
| 18 | + config.reasoning.completion_check = False |
| 19 | + return config |
| 20 | + |
| 21 | + |
| 22 | +def tool_event_names(run) -> list[str]: |
| 23 | + """Return non-verification tool events in order.""" |
| 24 | + |
| 25 | + return [ |
| 26 | + event.tool_name |
| 27 | + for event in run.events |
| 28 | + if event.type == "tool_call" and event.tool_name and event.phase != "verification" |
| 29 | + ] |
| 30 | + |
| 31 | + |
| 32 | +@pytest.mark.asyncio |
| 33 | +async def test_first_turn_action_prompt_does_not_inject_prefill_message( |
| 34 | + temp_dir: Path, |
| 35 | +) -> None: |
| 36 | + backend = ScriptedBackend( |
| 37 | + completions=[CompletionResponse(content="I can help with that.")] |
| 38 | + ) |
| 39 | + |
| 40 | + await run_scenario( |
| 41 | + "Create allowed.txt with a greeting.", |
| 42 | + backend, |
| 43 | + config=non_streaming_config(), |
| 44 | + project_root=temp_dir, |
| 45 | + ) |
| 46 | + |
| 47 | + assert not any( |
| 48 | + message.role == Role.ASSISTANT and message.content == "[" |
| 49 | + for message in backend.invocations[0].messages |
| 50 | + ) |
| 51 | + |
| 52 | + |
| 53 | +@pytest.mark.asyncio |
| 54 | +async def test_empty_response_retry_injects_honest_user_reminder_and_recovers( |
| 55 | + temp_dir: Path, |
| 56 | +) -> None: |
| 57 | + fixture = temp_dir / "fixture.txt" |
| 58 | + fixture.write_text("repair baseline\n") |
| 59 | + backend = ScriptedBackend( |
| 60 | + completions=[ |
| 61 | + CompletionResponse(content=""), |
| 62 | + CompletionResponse( |
| 63 | + content="I'll inspect the file now.", |
| 64 | + tool_calls=[ |
| 65 | + ToolCall( |
| 66 | + id="read-1", |
| 67 | + name="read", |
| 68 | + arguments={"file_path": str(fixture)}, |
| 69 | + ) |
| 70 | + ], |
| 71 | + ), |
| 72 | + CompletionResponse(content="Recovered after the empty response."), |
| 73 | + ] |
| 74 | + ) |
| 75 | + |
| 76 | + run = await run_scenario( |
| 77 | + "Read the fixture file.", |
| 78 | + backend, |
| 79 | + config=non_streaming_config(), |
| 80 | + project_root=temp_dir, |
| 81 | + ) |
| 82 | + |
| 83 | + assert tool_event_names(run) == ["read"] |
| 84 | + assert "Recovered after the empty response." in run.response |
| 85 | + assert any( |
| 86 | + message.role == Role.USER |
| 87 | + and "[EMPTY ASSISTANT RESPONSE]" in message.content |
| 88 | + for message in backend.invocations[1].messages |
| 89 | + ) |
| 90 | + |
| 91 | + |
| 92 | +@pytest.mark.asyncio |
| 93 | +async def test_repeated_empty_responses_fail_honestly_after_one_retry( |
| 94 | + temp_dir: Path, |
| 95 | +) -> None: |
| 96 | + backend = ScriptedBackend( |
| 97 | + completions=[ |
| 98 | + CompletionResponse(content=""), |
| 99 | + CompletionResponse(content=""), |
| 100 | + ] |
| 101 | + ) |
| 102 | + |
| 103 | + run = await run_scenario( |
| 104 | + "Read the fixture file.", |
| 105 | + backend, |
| 106 | + config=non_streaming_config(), |
| 107 | + project_root=temp_dir, |
| 108 | + ) |
| 109 | + |
| 110 | + assert tool_event_names(run) == [] |
| 111 | + assert run.response == ( |
| 112 | + "I didn't get a usable response from the model after retrying once. " |
| 113 | + "Please try again or switch to a different backend/model." |
| 114 | + ) |
| 115 | + assert len(backend.invocations) == 2 |