@@ -0,0 +1,77 @@ |
| 1 | +"""Tests for runtime-owned confidence and verification services.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +from types import SimpleNamespace |
| 6 | + |
| 7 | +import pytest |
| 8 | + |
| 9 | +from loader.llm.base import CompletionResponse |
| 10 | +from loader.runtime.reasoning_service import RuntimeReasoningService |
| 11 | +from tests.helpers.runtime_harness import ScriptedBackend |
| 12 | + |
| 13 | + |
| 14 | +def build_config(*, use_quick_confidence: bool = True, use_quick_verification: bool = True): |
| 15 | + return SimpleNamespace( |
| 16 | + reasoning=SimpleNamespace( |
| 17 | + use_quick_confidence=use_quick_confidence, |
| 18 | + use_quick_verification=use_quick_verification, |
| 19 | + ) |
| 20 | + ) |
| 21 | + |
| 22 | + |
| 23 | +@pytest.mark.asyncio |
| 24 | +async def test_runtime_reasoning_service_uses_quick_confidence_without_backend() -> None: |
| 25 | + backend = ScriptedBackend() |
| 26 | + service = RuntimeReasoningService(backend, build_config()) |
| 27 | + |
| 28 | + assessment = await service.assess_confidence( |
| 29 | + "read", |
| 30 | + {"file_path": "README.md"}, |
| 31 | + "Please inspect the docs first.", |
| 32 | + ) |
| 33 | + |
| 34 | + assert assessment.score >= 3 |
| 35 | + assert assessment.reasoning == "Quick heuristic assessment" |
| 36 | + assert backend.invocations == [] |
| 37 | + |
| 38 | + |
| 39 | +@pytest.mark.asyncio |
| 40 | +async def test_runtime_reasoning_service_calls_backend_for_low_confidence_actions() -> None: |
| 41 | + backend = ScriptedBackend( |
| 42 | + completions=[ |
| 43 | + CompletionResponse( |
| 44 | + content=( |
| 45 | + '{"confidence": 2, "reasoning": "This is risky.", ' |
| 46 | + '"risks": ["Potential deletion"], "mitigations": ["Inspect first"]}' |
| 47 | + ) |
| 48 | + ) |
| 49 | + ] |
| 50 | + ) |
| 51 | + service = RuntimeReasoningService(backend, build_config()) |
| 52 | + |
| 53 | + assessment = await service.assess_confidence( |
| 54 | + "bash", |
| 55 | + {"command": "rm -rf build"}, |
| 56 | + "Need to clean up artifacts.", |
| 57 | + ) |
| 58 | + |
| 59 | + assert assessment.score == 2 |
| 60 | + assert assessment.reasoning == "This is risky." |
| 61 | + assert len(backend.invocations) == 1 |
| 62 | + |
| 63 | + |
| 64 | +@pytest.mark.asyncio |
| 65 | +async def test_runtime_reasoning_service_uses_quick_verification_without_backend() -> None: |
| 66 | + backend = ScriptedBackend() |
| 67 | + service = RuntimeReasoningService(backend, build_config()) |
| 68 | + |
| 69 | + verification = await service.verify_action( |
| 70 | + "read", |
| 71 | + {"file_path": "README.md"}, |
| 72 | + "loader docs", |
| 73 | + ) |
| 74 | + |
| 75 | + assert verification.verified is True |
| 76 | + assert verification.verification_method == "quick_heuristic" |
| 77 | + assert backend.invocations == [] |