loader Public

Watch 0 Fork 0 Star 0

Python · 3905 bytes Raw Blame History

  
        1
        """Tests for runtime-owned reasoning helpers and compatibility exports."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        import loader.agent.reasoning as agent_reasoning
      
        6
        from loader.runtime.action_reasoning import parse_confidence
      
        7
        from loader.runtime.deliberation import (
      
        8
            DECOMPOSITION_PROMPT,
      
        9
            SELF_CRITIQUE_PROMPT,
      
        10
            parse_decomposition,
      
        11
            parse_self_critique,
      
        12
            should_decompose,
      
        13
        )
      
        14
        from loader.runtime.task_classification import is_conversational
      
        15
        from loader.runtime.task_completion import (
      
        16
            COMPLETION_CHECK_PROMPT,
      
        17
            parse_completion_check,
      
        18
        )
      
        19
        
        20
        
        21
        def test_parse_decomposition_falls_back_to_single_subtask_on_invalid_json() -> None:
      
        22
            decomposition = parse_decomposition("not valid json", "Refactor auth and add tests")
      
        23
        
        24
            assert decomposition.original_task == "Refactor auth and add tests"
      
        25
            assert len(decomposition.subtasks) == 1
      
        26
            assert decomposition.subtasks[0].description == "Refactor auth and add tests"
      
        27
            assert decomposition.subtasks[0].verification == "Check completion"
      
        28
        
        29
        
        30
        def test_parse_self_critique_reads_revision_signal() -> None:
      
        31
            critique = parse_self_critique(
      
        32
                (
      
        33
                    '{"issues":["Missing edge case"],'
      
        34
                    '"suggestions":["Handle empty input"],'
      
        35
                    '"should_revise":true}'
      
        36
                ),
      
        37
                "Draft response",
      
        38
            )
      
        39
        
        40
            assert critique.original_response == "Draft response"
      
        41
            assert critique.should_revise is True
      
        42
            assert critique.issues_found == ["Missing edge case"]
      
        43
            assert critique.suggestions == ["Handle empty input"]
      
        44
        
        45
        
        46
        def test_parse_completion_check_builds_continuation_prompt() -> None:
      
        47
            completion = parse_completion_check(
      
        48
                (
      
        49
                    '{"is_complete": false,'
      
        50
                    '"accomplished":["Created the file"],'
      
        51
                    '"remaining":["Run the tests"],'
      
        52
                    '"next_steps":["Run pytest -q"]}'
      
        53
                ),
      
        54
                "Create the file and verify it works",
      
        55
            )
      
        56
        
        57
            assert completion.is_complete is False
      
        58
            assert completion.remaining == ["Run the tests"]
      
        59
            assert completion.missing_evidence == ["Run the tests"]
      
        60
            assert "Run pytest -q" in completion.continuation_prompt
      
        61
        
        62
        
        63
        def test_agent_reasoning_reexports_runtime_helpers() -> None:
      
        64
            assert agent_reasoning.DECOMPOSITION_PROMPT == DECOMPOSITION_PROMPT
      
        65
            assert agent_reasoning.SELF_CRITIQUE_PROMPT == SELF_CRITIQUE_PROMPT
      
        66
            assert agent_reasoning.COMPLETION_CHECK_PROMPT == COMPLETION_CHECK_PROMPT
      
        67
            assert agent_reasoning.parse_decomposition is parse_decomposition
      
        68
            assert agent_reasoning.parse_self_critique is parse_self_critique
      
        69
            assert agent_reasoning.should_decompose is should_decompose
      
        70
            assert agent_reasoning.parse_completion_check is parse_completion_check
      
        71
            assert agent_reasoning.parse_confidence is parse_confidence
      
        72
            assert agent_reasoning.is_conversational is is_conversational
      
        73
        
        74
        
        75
        def test_agent_reasoning_exports_curated_compatibility_surface() -> None:
      
        76
            assert agent_reasoning.__all__ == [
      
        77
                "ActionVerification",
      
        78
                "COMPLETION_CHECK_PROMPT",
      
        79
                "CONFIDENCE_PROMPT",
      
        80
                "ConfidenceAssessment",
      
        81
                "ConfidenceLevel",
      
        82
                "DECOMPOSITION_PROMPT",
      
        83
                "RollbackAction",
      
        84
                "RollbackPlan",
      
        85
                "RollbackType",
      
        86
                "SELF_CRITIQUE_PROMPT",
      
        87
                "SelfCritique",
      
        88
                "Subtask",
      
        89
                "TaskCompletionCheck",
      
        90
                "TaskDecomposition",
      
        91
                "VERIFICATION_PROMPT",
      
        92
                "create_rollback_plan_for_action",
      
        93
                "detect_premature_completion",
      
        94
                "estimate_complexity",
      
        95
                "estimate_confidence_quick",
      
        96
                "execute_rollback",
      
        97
                "get_continuation_prompt",
      
        98
                "get_token_budget",
      
        99
                "get_undo_command",
      
        100
                "is_conversational",
      
        101
                "is_destructive_tool",
      
        102
                "parse_completion_check",
      
        103
                "parse_confidence",
      
        104
                "parse_decomposition",
      
        105
                "parse_self_critique",
      
        106
                "parse_verification",
      
        107
                "quick_verify",
      
        108
                "should_decompose",
      
        109
                "should_self_critique",
      
        110
            ]

1	"""Tests for runtime-owned reasoning helpers and compatibility exports."""
2
3	from __future__ import annotations
4
5	import loader.agent.reasoning as agent_reasoning
6	from loader.runtime.action_reasoning import parse_confidence
7	from loader.runtime.deliberation import (
8	DECOMPOSITION_PROMPT,
9	SELF_CRITIQUE_PROMPT,
10	parse_decomposition,
11	parse_self_critique,
12	should_decompose,
13	)
14	from loader.runtime.task_classification import is_conversational
15	from loader.runtime.task_completion import (
16	COMPLETION_CHECK_PROMPT,
17	parse_completion_check,
18	)
19
20
21	def test_parse_decomposition_falls_back_to_single_subtask_on_invalid_json() -> None:
22	decomposition = parse_decomposition("not valid json", "Refactor auth and add tests")
23
24	assert decomposition.original_task == "Refactor auth and add tests"
25	assert len(decomposition.subtasks) == 1
26	assert decomposition.subtasks[0].description == "Refactor auth and add tests"
27	assert decomposition.subtasks[0].verification == "Check completion"
28
29
30	def test_parse_self_critique_reads_revision_signal() -> None:
31	critique = parse_self_critique(
32	(
33	'{"issues":["Missing edge case"],'
34	'"suggestions":["Handle empty input"],'
35	'"should_revise":true}'
36	),
37	"Draft response",
38	)
39
40	assert critique.original_response == "Draft response"
41	assert critique.should_revise is True
42	assert critique.issues_found == ["Missing edge case"]
43	assert critique.suggestions == ["Handle empty input"]
44
45
46	def test_parse_completion_check_builds_continuation_prompt() -> None:
47	completion = parse_completion_check(
48	(
49	'{"is_complete": false,'
50	'"accomplished":["Created the file"],'
51	'"remaining":["Run the tests"],'
52	'"next_steps":["Run pytest -q"]}'
53	),
54	"Create the file and verify it works",
55	)
56
57	assert completion.is_complete is False
58	assert completion.remaining == ["Run the tests"]
59	assert completion.missing_evidence == ["Run the tests"]
60	assert "Run pytest -q" in completion.continuation_prompt
61
62
63	def test_agent_reasoning_reexports_runtime_helpers() -> None:
64	assert agent_reasoning.DECOMPOSITION_PROMPT == DECOMPOSITION_PROMPT
65	assert agent_reasoning.SELF_CRITIQUE_PROMPT == SELF_CRITIQUE_PROMPT
66	assert agent_reasoning.COMPLETION_CHECK_PROMPT == COMPLETION_CHECK_PROMPT
67	assert agent_reasoning.parse_decomposition is parse_decomposition
68	assert agent_reasoning.parse_self_critique is parse_self_critique
69	assert agent_reasoning.should_decompose is should_decompose
70	assert agent_reasoning.parse_completion_check is parse_completion_check
71	assert agent_reasoning.parse_confidence is parse_confidence
72	assert agent_reasoning.is_conversational is is_conversational
73
74
75	def test_agent_reasoning_exports_curated_compatibility_surface() -> None:
76	assert agent_reasoning.__all__ == [
77	"ActionVerification",
78	"COMPLETION_CHECK_PROMPT",
79	"CONFIDENCE_PROMPT",
80	"ConfidenceAssessment",
81	"ConfidenceLevel",
82	"DECOMPOSITION_PROMPT",
83	"RollbackAction",
84	"RollbackPlan",
85	"RollbackType",
86	"SELF_CRITIQUE_PROMPT",
87	"SelfCritique",
88	"Subtask",
89	"TaskCompletionCheck",
90	"TaskDecomposition",
91	"VERIFICATION_PROMPT",
92	"create_rollback_plan_for_action",
93	"detect_premature_completion",
94	"estimate_complexity",
95	"estimate_confidence_quick",
96	"execute_rollback",
97	"get_continuation_prompt",
98	"get_token_budget",
99	"get_undo_command",
100	"is_conversational",
101	"is_destructive_tool",
102	"parse_completion_check",
103	"parse_confidence",
104	"parse_decomposition",
105	"parse_self_critique",
106	"parse_verification",
107	"quick_verify",
108	"should_decompose",
109	"should_self_critique",
110	]