Python · 3905 bytes Raw Blame History
1 """Tests for runtime-owned reasoning helpers and compatibility exports."""
2
3 from __future__ import annotations
4
5 import loader.agent.reasoning as agent_reasoning
6 from loader.runtime.action_reasoning import parse_confidence
7 from loader.runtime.deliberation import (
8 DECOMPOSITION_PROMPT,
9 SELF_CRITIQUE_PROMPT,
10 parse_decomposition,
11 parse_self_critique,
12 should_decompose,
13 )
14 from loader.runtime.task_classification import is_conversational
15 from loader.runtime.task_completion import (
16 COMPLETION_CHECK_PROMPT,
17 parse_completion_check,
18 )
19
20
21 def test_parse_decomposition_falls_back_to_single_subtask_on_invalid_json() -> None:
22 decomposition = parse_decomposition("not valid json", "Refactor auth and add tests")
23
24 assert decomposition.original_task == "Refactor auth and add tests"
25 assert len(decomposition.subtasks) == 1
26 assert decomposition.subtasks[0].description == "Refactor auth and add tests"
27 assert decomposition.subtasks[0].verification == "Check completion"
28
29
30 def test_parse_self_critique_reads_revision_signal() -> None:
31 critique = parse_self_critique(
32 (
33 '{"issues":["Missing edge case"],'
34 '"suggestions":["Handle empty input"],'
35 '"should_revise":true}'
36 ),
37 "Draft response",
38 )
39
40 assert critique.original_response == "Draft response"
41 assert critique.should_revise is True
42 assert critique.issues_found == ["Missing edge case"]
43 assert critique.suggestions == ["Handle empty input"]
44
45
46 def test_parse_completion_check_builds_continuation_prompt() -> None:
47 completion = parse_completion_check(
48 (
49 '{"is_complete": false,'
50 '"accomplished":["Created the file"],'
51 '"remaining":["Run the tests"],'
52 '"next_steps":["Run pytest -q"]}'
53 ),
54 "Create the file and verify it works",
55 )
56
57 assert completion.is_complete is False
58 assert completion.remaining == ["Run the tests"]
59 assert completion.missing_evidence == ["Run the tests"]
60 assert "Run pytest -q" in completion.continuation_prompt
61
62
63 def test_agent_reasoning_reexports_runtime_helpers() -> None:
64 assert agent_reasoning.DECOMPOSITION_PROMPT == DECOMPOSITION_PROMPT
65 assert agent_reasoning.SELF_CRITIQUE_PROMPT == SELF_CRITIQUE_PROMPT
66 assert agent_reasoning.COMPLETION_CHECK_PROMPT == COMPLETION_CHECK_PROMPT
67 assert agent_reasoning.parse_decomposition is parse_decomposition
68 assert agent_reasoning.parse_self_critique is parse_self_critique
69 assert agent_reasoning.should_decompose is should_decompose
70 assert agent_reasoning.parse_completion_check is parse_completion_check
71 assert agent_reasoning.parse_confidence is parse_confidence
72 assert agent_reasoning.is_conversational is is_conversational
73
74
75 def test_agent_reasoning_exports_curated_compatibility_surface() -> None:
76 assert agent_reasoning.__all__ == [
77 "ActionVerification",
78 "COMPLETION_CHECK_PROMPT",
79 "CONFIDENCE_PROMPT",
80 "ConfidenceAssessment",
81 "ConfidenceLevel",
82 "DECOMPOSITION_PROMPT",
83 "RollbackAction",
84 "RollbackPlan",
85 "RollbackType",
86 "SELF_CRITIQUE_PROMPT",
87 "SelfCritique",
88 "Subtask",
89 "TaskCompletionCheck",
90 "TaskDecomposition",
91 "VERIFICATION_PROMPT",
92 "create_rollback_plan_for_action",
93 "detect_premature_completion",
94 "estimate_complexity",
95 "estimate_confidence_quick",
96 "execute_rollback",
97 "get_continuation_prompt",
98 "get_token_budget",
99 "get_undo_command",
100 "is_conversational",
101 "is_destructive_tool",
102 "parse_completion_check",
103 "parse_confidence",
104 "parse_decomposition",
105 "parse_self_critique",
106 "parse_verification",
107 "quick_verify",
108 "should_decompose",
109 "should_self_critique",
110 ]