Python · 1899 bytes Raw Blame History
1 """Tests for runtime-owned reasoning type surfaces."""
2
3 from __future__ import annotations
4
5 from loader.runtime.reasoning_types import (
6 ConfidenceAssessment,
7 ConfidenceLevel,
8 SelfCritique,
9 Subtask,
10 TaskCompletionCheck,
11 TaskDecomposition,
12 )
13
14
15 def test_task_decomposition_tracks_progress_and_retry_state() -> None:
16 decomposition = TaskDecomposition(
17 original_task="Ship feature",
18 subtasks=[
19 Subtask(id="1", description="Read spec"),
20 Subtask(id="2", description="Implement", dependencies=["1"]),
21 ],
22 )
23
24 assert decomposition.next_subtask() is decomposition.subtasks[0]
25 assert decomposition.progress_str() == "[0/2]"
26
27 decomposition.mark_completed("1", "done")
28
29 assert decomposition.progress_str() == "[1/2]"
30 assert decomposition.next_subtask() is decomposition.subtasks[1]
31
32 decomposition.mark_failed("2", "test failure")
33
34 assert decomposition.can_retry("2") is True
35 decomposition.reset_for_retry("2")
36 assert decomposition.subtasks[1].status == "pending"
37
38
39 def test_confidence_assessment_exposes_score_helpers() -> None:
40 assessment = ConfidenceAssessment(
41 action="Write file",
42 tool_name="write",
43 tool_args={"file_path": "notes.txt"},
44 level=ConfidenceLevel.LOW,
45 )
46
47 assert assessment.score == 2
48 assert assessment.is_low_confidence is True
49
50
51 def test_self_critique_and_completion_defaults_are_stable() -> None:
52 critique = SelfCritique(
53 original_response="draft",
54 should_revise=True,
55 revision_count=1,
56 max_revisions=2,
57 )
58 completion = TaskCompletionCheck(original_task="Ship feature")
59
60 assert critique.can_revise() is True
61 assert completion.is_complete is False
62 assert completion.accomplished == []
63 assert completion.required_evidence == []
64 assert completion.missing_evidence == []