@@ -0,0 +1,77 @@ |
| | 1 | +"""Tests for runtime-owned confidence and verification services.""" |
| | 2 | + |
| | 3 | +from __future__ import annotations |
| | 4 | + |
| | 5 | +from types import SimpleNamespace |
| | 6 | + |
| | 7 | +import pytest |
| | 8 | + |
| | 9 | +from loader.llm.base import CompletionResponse |
| | 10 | +from loader.runtime.reasoning_service import RuntimeReasoningService |
| | 11 | +from tests.helpers.runtime_harness import ScriptedBackend |
| | 12 | + |
| | 13 | + |
| | 14 | +def build_config(*, use_quick_confidence: bool = True, use_quick_verification: bool = True): |
| | 15 | + return SimpleNamespace( |
| | 16 | + reasoning=SimpleNamespace( |
| | 17 | + use_quick_confidence=use_quick_confidence, |
| | 18 | + use_quick_verification=use_quick_verification, |
| | 19 | + ) |
| | 20 | + ) |
| | 21 | + |
| | 22 | + |
| | 23 | +@pytest.mark.asyncio |
| | 24 | +async def test_runtime_reasoning_service_uses_quick_confidence_without_backend() -> None: |
| | 25 | + backend = ScriptedBackend() |
| | 26 | + service = RuntimeReasoningService(backend, build_config()) |
| | 27 | + |
| | 28 | + assessment = await service.assess_confidence( |
| | 29 | + "read", |
| | 30 | + {"file_path": "README.md"}, |
| | 31 | + "Please inspect the docs first.", |
| | 32 | + ) |
| | 33 | + |
| | 34 | + assert assessment.score >= 3 |
| | 35 | + assert assessment.reasoning == "Quick heuristic assessment" |
| | 36 | + assert backend.invocations == [] |
| | 37 | + |
| | 38 | + |
| | 39 | +@pytest.mark.asyncio |
| | 40 | +async def test_runtime_reasoning_service_calls_backend_for_low_confidence_actions() -> None: |
| | 41 | + backend = ScriptedBackend( |
| | 42 | + completions=[ |
| | 43 | + CompletionResponse( |
| | 44 | + content=( |
| | 45 | + '{"confidence": 2, "reasoning": "This is risky.", ' |
| | 46 | + '"risks": ["Potential deletion"], "mitigations": ["Inspect first"]}' |
| | 47 | + ) |
| | 48 | + ) |
| | 49 | + ] |
| | 50 | + ) |
| | 51 | + service = RuntimeReasoningService(backend, build_config()) |
| | 52 | + |
| | 53 | + assessment = await service.assess_confidence( |
| | 54 | + "bash", |
| | 55 | + {"command": "rm -rf build"}, |
| | 56 | + "Need to clean up artifacts.", |
| | 57 | + ) |
| | 58 | + |
| | 59 | + assert assessment.score == 2 |
| | 60 | + assert assessment.reasoning == "This is risky." |
| | 61 | + assert len(backend.invocations) == 1 |
| | 62 | + |
| | 63 | + |
| | 64 | +@pytest.mark.asyncio |
| | 65 | +async def test_runtime_reasoning_service_uses_quick_verification_without_backend() -> None: |
| | 66 | + backend = ScriptedBackend() |
| | 67 | + service = RuntimeReasoningService(backend, build_config()) |
| | 68 | + |
| | 69 | + verification = await service.verify_action( |
| | 70 | + "read", |
| | 71 | + {"file_path": "README.md"}, |
| | 72 | + "loader docs", |
| | 73 | + ) |
| | 74 | + |
| | 75 | + assert verification.verified is True |
| | 76 | + assert verification.verification_method == "quick_heuristic" |
| | 77 | + assert backend.invocations == [] |