| 1 | """Tests for assistant-response routing outside the full iteration controller.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | from pathlib import Path |
| 6 | |
| 7 | import pytest |
| 8 | |
| 9 | from loader.agent.loop import Agent, AgentConfig |
| 10 | from loader.llm.base import ToolCall |
| 11 | from loader.runtime.conversation import ConversationRuntime |
| 12 | from loader.runtime.phases import TurnPhase |
| 13 | from loader.runtime.repair import ToolCallAnalysis |
| 14 | from loader.runtime.response_routing import ( |
| 15 | ResponseRouteAction, |
| 16 | ResponseRouteContext, |
| 17 | ) |
| 18 | from tests.helpers.runtime_harness import ScriptedBackend |
| 19 | |
| 20 | |
| 21 | def non_streaming_config() -> AgentConfig: |
| 22 | """Shared config for direct response-routing tests.""" |
| 23 | |
| 24 | return AgentConfig(auto_context=False, stream=False, max_iterations=8) |
| 25 | |
| 26 | |
| 27 | async def _prepare_context( |
| 28 | runtime: ConversationRuntime, |
| 29 | *, |
| 30 | task: str, |
| 31 | continuation_count: int = 0, |
| 32 | consecutive_errors: int = 0, |
| 33 | ) -> tuple[ResponseRouteContext, list]: |
| 34 | events = [] |
| 35 | |
| 36 | async def capture(event) -> None: |
| 37 | events.append(event) |
| 38 | |
| 39 | prepared = await runtime.turn_preparation.prepare( |
| 40 | task=task, |
| 41 | emit=capture, |
| 42 | requested_mode="execute", |
| 43 | original_task=None, |
| 44 | on_user_question=None, |
| 45 | ) |
| 46 | await runtime.phase_tracker.enter( |
| 47 | TurnPhase.ASSISTANT, |
| 48 | capture, |
| 49 | detail="Requesting assistant response", |
| 50 | reason_code="request_assistant_response", |
| 51 | ) |
| 52 | context = ResponseRouteContext( |
| 53 | task=prepared.task, |
| 54 | effective_task=prepared.effective_task, |
| 55 | iterations=1, |
| 56 | max_iterations=runtime.context.config.max_iterations, |
| 57 | actions_taken=[], |
| 58 | continuation_count=continuation_count, |
| 59 | consecutive_errors=consecutive_errors, |
| 60 | dod=prepared.definition_of_done, |
| 61 | summary=prepared.summary, |
| 62 | executor=prepared.executor, |
| 63 | rollback_plan=prepared.rollback_plan, |
| 64 | ) |
| 65 | return context, events |
| 66 | |
| 67 | |
| 68 | @pytest.mark.asyncio |
| 69 | async def test_response_router_completes_final_answer_route( |
| 70 | temp_dir: Path, |
| 71 | ) -> None: |
| 72 | agent = Agent( |
| 73 | backend=ScriptedBackend(completions=[]), |
| 74 | config=non_streaming_config(), |
| 75 | project_root=temp_dir, |
| 76 | ) |
| 77 | runtime = ConversationRuntime(agent) |
| 78 | context, events = await _prepare_context( |
| 79 | runtime, |
| 80 | task="Explain whether final answers route correctly.", |
| 81 | continuation_count=1, |
| 82 | consecutive_errors=2, |
| 83 | ) |
| 84 | |
| 85 | async def capture(event) -> None: |
| 86 | events.append(event) |
| 87 | |
| 88 | decision = await runtime.response_router.route_response( |
| 89 | analysis=ToolCallAnalysis( |
| 90 | content="All set.", |
| 91 | response_content="Final Answer: All set.", |
| 92 | is_final_answer=True, |
| 93 | final_response="All set.", |
| 94 | ), |
| 95 | pending_tool_calls_seen=set(), |
| 96 | context=context, |
| 97 | emit=capture, |
| 98 | on_confirmation=None, |
| 99 | on_user_question=None, |
| 100 | emit_confirmation=runtime._emit_confirmation(capture), |
| 101 | ) |
| 102 | |
| 103 | assert decision.action == ResponseRouteAction.COMPLETE |
| 104 | assert decision.continuation_count == 1 |
| 105 | assert decision.consecutive_errors == 2 |
| 106 | assert context.summary.final_response == "All set." |
| 107 | assert context.summary.assistant_messages[-1].content == "Final Answer: All set." |
| 108 | assert any(event.type == "response" and event.content == "All set." for event in events) |
| 109 | |
| 110 | |
| 111 | @pytest.mark.asyncio |
| 112 | async def test_response_router_finalizes_halted_tool_batch( |
| 113 | temp_dir: Path, |
| 114 | ) -> None: |
| 115 | config = non_streaming_config() |
| 116 | config.auto_recover = False |
| 117 | agent = Agent( |
| 118 | backend=ScriptedBackend(completions=[]), |
| 119 | config=config, |
| 120 | project_root=temp_dir, |
| 121 | ) |
| 122 | runtime = ConversationRuntime(agent) |
| 123 | context, events = await _prepare_context( |
| 124 | runtime, |
| 125 | task="Inspect the missing file and recover honestly.", |
| 126 | consecutive_errors=2, |
| 127 | ) |
| 128 | |
| 129 | async def capture(event) -> None: |
| 130 | events.append(event) |
| 131 | |
| 132 | decision = await runtime.response_router.route_response( |
| 133 | analysis=ToolCallAnalysis( |
| 134 | content="I'll inspect the file first.", |
| 135 | response_content="I'll inspect the file first.", |
| 136 | tool_calls=[ |
| 137 | ToolCall( |
| 138 | id="read-missing", |
| 139 | name="read", |
| 140 | arguments={"file_path": "missing.md"}, |
| 141 | ) |
| 142 | ], |
| 143 | tool_source="native", |
| 144 | ), |
| 145 | pending_tool_calls_seen=set(), |
| 146 | context=context, |
| 147 | emit=capture, |
| 148 | on_confirmation=None, |
| 149 | on_user_question=None, |
| 150 | emit_confirmation=runtime._emit_confirmation(capture), |
| 151 | ) |
| 152 | |
| 153 | assert decision.action == ResponseRouteAction.FINALIZE |
| 154 | assert decision.consecutive_errors == 3 |
| 155 | assert decision.finalize_reason_code == "tool_batch_halted" |
| 156 | assert decision.new_actions_taken == ["read: {'file_path': 'missing.md'}"] |
| 157 | assert context.summary.final_response == ( |
| 158 | "I ran into some issues. Let me know if you'd like me to try a different approach." |
| 159 | ) |
| 160 | assert "three consecutive tool errors" in context.summary.failures |
| 161 | assert any(event.type == "tool_call" and event.tool_name == "read" for event in events) |
| 162 | assert any( |
| 163 | event.type == "response" |
| 164 | and event.content == "I ran into some issues. Let me know if you'd like me to try a different approach." |
| 165 | for event in events |
| 166 | ) |