@@ -1,10 +1,107 @@ |
| 1 | 1 | """Tests for completion-policy helpers.""" |
| 2 | 2 | |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +from pathlib import Path |
| 6 | +from types import SimpleNamespace |
| 7 | + |
| 8 | +import pytest |
| 9 | + |
| 10 | +from loader.llm.base import Message, Role |
| 3 | 11 | from loader.runtime.completion_policy import CompletionPolicy |
| 12 | +from loader.runtime.context import RuntimeContext |
| 13 | +from loader.runtime.events import TurnSummary |
| 14 | +from loader.runtime.permissions import ( |
| 15 | + PermissionMode, |
| 16 | + build_permission_policy, |
| 17 | + load_permission_rules, |
| 18 | +) |
| 4 | 19 | from loader.runtime.task_completion import ( |
| 5 | 20 | detect_premature_completion, |
| 6 | 21 | get_continuation_prompt, |
| 7 | 22 | ) |
| 23 | +from loader.tools.base import create_default_registry |
| 24 | +from tests.helpers.runtime_harness import ScriptedBackend |
| 25 | + |
| 26 | + |
| 27 | +class FakeCodeFilter: |
| 28 | + def reset(self) -> None: |
| 29 | + return None |
| 30 | + |
| 31 | + |
| 32 | +class FakeSafeguards: |
| 33 | + def __init__(self, *, text_loop: tuple[bool, str] = (False, "")) -> None: |
| 34 | + self.action_tracker = object() |
| 35 | + self.validator = object() |
| 36 | + self.code_filter = FakeCodeFilter() |
| 37 | + self._text_loop = text_loop |
| 38 | + self.recorded: list[str] = [] |
| 39 | + |
| 40 | + def filter_stream_chunk(self, content: str) -> str: |
| 41 | + return content |
| 42 | + |
| 43 | + def filter_complete_content(self, content: str) -> str: |
| 44 | + return content |
| 45 | + |
| 46 | + def should_steer(self) -> bool: |
| 47 | + return False |
| 48 | + |
| 49 | + def get_steering_message(self) -> str | None: |
| 50 | + return None |
| 51 | + |
| 52 | + def record_response(self, content: str) -> None: |
| 53 | + self.recorded.append(content) |
| 54 | + |
| 55 | + def detect_text_loop(self, content: str) -> tuple[bool, str]: |
| 56 | + return self._text_loop |
| 57 | + |
| 58 | + def detect_loop(self) -> tuple[bool, str]: |
| 59 | + return False, "" |
| 60 | + |
| 61 | + |
| 62 | +class FakeSession: |
| 63 | + def __init__(self) -> None: |
| 64 | + self.messages: list[Message] = [] |
| 65 | + |
| 66 | + def append(self, message: Message) -> None: |
| 67 | + self.messages.append(message) |
| 68 | + |
| 69 | + |
| 70 | +def build_context( |
| 71 | + temp_dir: Path, |
| 72 | + *, |
| 73 | + safeguards: FakeSafeguards, |
| 74 | + max_continuation_prompts: int = 5, |
| 75 | + use_quick_completion: bool = True, |
| 76 | +) -> RuntimeContext: |
| 77 | + registry = create_default_registry(temp_dir) |
| 78 | + registry.configure_workspace_root(temp_dir) |
| 79 | + rule_status = load_permission_rules(temp_dir) |
| 80 | + policy = build_permission_policy( |
| 81 | + active_mode=PermissionMode.WORKSPACE_WRITE, |
| 82 | + workspace_root=temp_dir, |
| 83 | + tool_requirements=registry.get_tool_requirements(), |
| 84 | + rules=rule_status.rules, |
| 85 | + ) |
| 86 | + return RuntimeContext( |
| 87 | + project_root=temp_dir, |
| 88 | + backend=ScriptedBackend(), |
| 89 | + registry=registry, |
| 90 | + session=FakeSession(), # type: ignore[arg-type] |
| 91 | + config=SimpleNamespace( |
| 92 | + force_react=False, |
| 93 | + reasoning=SimpleNamespace( |
| 94 | + max_continuation_prompts=max_continuation_prompts, |
| 95 | + use_quick_completion=use_quick_completion, |
| 96 | + ), |
| 97 | + ), |
| 98 | + capability_profile=SimpleNamespace(supports_native_tools=True), # type: ignore[arg-type] |
| 99 | + project_context=None, |
| 100 | + permission_policy=policy, |
| 101 | + permission_config_status=rule_status, |
| 102 | + workflow_mode="execute", |
| 103 | + safeguards=safeguards, |
| 104 | + ) |
| 8 | 105 | |
| 9 | 106 | |
| 10 | 107 | def test_completion_policy_finalize_response_text_keeps_original_response() -> None: |
@@ -32,3 +129,65 @@ def test_get_continuation_prompt_surfaces_missing_verification_steps() -> None: |
| 32 | 129 | ) |
| 33 | 130 | |
| 34 | 131 | assert "Run the tests" in prompt or "verify it works" in prompt |
| 132 | + |
| 133 | + |
| 134 | +@pytest.mark.asyncio |
| 135 | +async def test_completion_policy_stops_for_text_loop_using_runtime_context( |
| 136 | + temp_dir: Path, |
| 137 | +) -> None: |
| 138 | + context = build_context( |
| 139 | + temp_dir, |
| 140 | + safeguards=FakeSafeguards(text_loop=(True, "assistant repeated the same summary")), |
| 141 | + ) |
| 142 | + policy = CompletionPolicy(context) |
| 143 | + summary = TurnSummary(final_response="") |
| 144 | + events = [] |
| 145 | + |
| 146 | + async def emit(event) -> None: |
| 147 | + events.append(event) |
| 148 | + |
| 149 | + decision = await policy.maybe_stop_for_text_loop( |
| 150 | + content="Same summary again.", |
| 151 | + emit=emit, |
| 152 | + summary=summary, |
| 153 | + ) |
| 154 | + |
| 155 | + assert decision.should_stop is True |
| 156 | + assert summary.final_response.startswith("I seem to be repeating myself") |
| 157 | + assert summary.assistant_messages[-1].role == Role.ASSISTANT |
| 158 | + assert context.session.messages[-1].content == summary.final_response |
| 159 | + assert events[0].type == "error" |
| 160 | + assert events[1].type == "response" |
| 161 | + |
| 162 | + |
| 163 | +@pytest.mark.asyncio |
| 164 | +async def test_completion_policy_requests_continuation_using_runtime_context( |
| 165 | + temp_dir: Path, |
| 166 | +) -> None: |
| 167 | + context = build_context( |
| 168 | + temp_dir, |
| 169 | + safeguards=FakeSafeguards(), |
| 170 | + ) |
| 171 | + policy = CompletionPolicy(context) |
| 172 | + events = [] |
| 173 | + |
| 174 | + async def emit(event) -> None: |
| 175 | + events.append(event) |
| 176 | + |
| 177 | + decision = await policy.maybe_continue_for_completion( |
| 178 | + content="I can handle that.", |
| 179 | + response_content="I can handle that.", |
| 180 | + task="Create the file and verify it works.", |
| 181 | + actions_taken=[], |
| 182 | + continuation_count=0, |
| 183 | + emit=emit, |
| 184 | + ) |
| 185 | + |
| 186 | + assert decision.should_continue is True |
| 187 | + assert context.session.messages[-2] == Message( |
| 188 | + role=Role.ASSISTANT, |
| 189 | + content="I can handle that.", |
| 190 | + ) |
| 191 | + assert context.session.messages[-1].role == Role.USER |
| 192 | + assert "verify it works" in context.session.messages[-1].content.lower() |
| 193 | + assert events[0].type == "completion_check" |