| 1 | """Tests for runtime-owned confidence and verification services.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | from types import SimpleNamespace |
| 6 | |
| 7 | import pytest |
| 8 | |
| 9 | from loader.llm.base import CompletionResponse |
| 10 | from loader.runtime.reasoning_service import RuntimeReasoningService |
| 11 | from tests.helpers.runtime_harness import ScriptedBackend |
| 12 | |
| 13 | |
| 14 | def build_config(*, use_quick_confidence: bool = True, use_quick_verification: bool = True): |
| 15 | return SimpleNamespace( |
| 16 | reasoning=SimpleNamespace( |
| 17 | use_quick_confidence=use_quick_confidence, |
| 18 | use_quick_verification=use_quick_verification, |
| 19 | ) |
| 20 | ) |
| 21 | |
| 22 | |
| 23 | @pytest.mark.asyncio |
| 24 | async def test_runtime_reasoning_service_uses_quick_confidence_without_backend() -> None: |
| 25 | backend = ScriptedBackend() |
| 26 | service = RuntimeReasoningService(backend, build_config()) |
| 27 | |
| 28 | assessment = await service.assess_confidence( |
| 29 | "read", |
| 30 | {"file_path": "README.md"}, |
| 31 | "Please inspect the docs first.", |
| 32 | ) |
| 33 | |
| 34 | assert assessment.score >= 3 |
| 35 | assert assessment.reasoning == "Quick heuristic assessment" |
| 36 | assert backend.invocations == [] |
| 37 | |
| 38 | |
| 39 | @pytest.mark.asyncio |
| 40 | async def test_runtime_reasoning_service_calls_backend_for_low_confidence_actions() -> None: |
| 41 | backend = ScriptedBackend( |
| 42 | completions=[ |
| 43 | CompletionResponse( |
| 44 | content=( |
| 45 | '{"confidence": 2, "reasoning": "This is risky.", ' |
| 46 | '"risks": ["Potential deletion"], "mitigations": ["Inspect first"]}' |
| 47 | ) |
| 48 | ) |
| 49 | ] |
| 50 | ) |
| 51 | service = RuntimeReasoningService(backend, build_config()) |
| 52 | |
| 53 | assessment = await service.assess_confidence( |
| 54 | "bash", |
| 55 | {"command": "rm -rf build"}, |
| 56 | "Need to clean up artifacts.", |
| 57 | ) |
| 58 | |
| 59 | assert assessment.score == 2 |
| 60 | assert assessment.reasoning == "This is risky." |
| 61 | assert len(backend.invocations) == 1 |
| 62 | |
| 63 | |
| 64 | @pytest.mark.asyncio |
| 65 | async def test_runtime_reasoning_service_uses_quick_verification_without_backend() -> None: |
| 66 | backend = ScriptedBackend() |
| 67 | service = RuntimeReasoningService(backend, build_config()) |
| 68 | |
| 69 | verification = await service.verify_action( |
| 70 | "read", |
| 71 | {"file_path": "README.md"}, |
| 72 | "loader docs", |
| 73 | ) |
| 74 | |
| 75 | assert verification.verified is True |
| 76 | assert verification.verification_method == "quick_heuristic" |
| 77 | assert backend.invocations == [] |