Python · 6024 bytes Raw Blame History
1 """Tests for the Sprint 10 workflow policy and timeline core."""
2
3 from __future__ import annotations
4
5 from loader.runtime.clarify_strategy import ClarifySnapshot
6 from loader.runtime.workflow import (
7 ArtifactEvidenceKind,
8 WorkflowDecisionKind,
9 WorkflowMode,
10 WorkflowPolicy,
11 WorkflowTimelineEntry,
12 WorkflowTimelineEntryKind,
13 )
14 from loader.runtime.workflow_signals import WorkflowSignalPacket
15
16
17 def test_workflow_policy_reports_winner_and_runner_up() -> None:
18 policy = WorkflowPolicy()
19
20 decision = policy.route("Improve Loader so it feels more like claw-code.")
21
22 assert decision.mode == WorkflowMode.CLARIFY
23 assert decision.route_score >= policy.clarify_threshold
24 assert decision.runner_up_mode is not None
25 assert decision.runner_up_score > 0
26 assert decision.pressure_summary
27 assert decision.signal_summary
28
29
30 def test_workflow_policy_routes_from_typed_signal_packet() -> None:
31 policy = WorkflowPolicy()
32
33 decision = policy.route_from_signals(
34 WorkflowSignalPacket(
35 task="Keep improving Loader.",
36 ambiguity_score=0.62,
37 complexity_score=0.28,
38 allow_clarify=True,
39 signal_summary=["ambiguity=0.62", "complexity=0.28"],
40 )
41 )
42
43 assert decision.mode == WorkflowMode.CLARIFY
44 assert decision.signal_summary == ["ambiguity=0.62", "complexity=0.28"]
45
46
47 def test_workflow_policy_prefers_plan_refresh_for_stale_plan() -> None:
48 policy = WorkflowPolicy()
49
50 decision = policy.route(
51 "Keep working on the runtime task.",
52 has_plan=True,
53 stale_plan=True,
54 )
55
56 assert decision.mode == WorkflowMode.PLAN
57 assert decision.reason_code == "stale_plan_artifacts"
58 assert decision.decision_kind == "reentry"
59 assert decision.scheduled_next_mode == WorkflowMode.EXECUTE
60
61
62 def test_workflow_policy_marks_unplanned_touched_files_as_stale() -> None:
63 policy = WorkflowPolicy()
64
65 freshness = policy.assess_artifact_freshness(
66 implementation_text="# Implementation Plan\n- Update loader.py only\n",
67 verification_text="# Verification Plan\n- Run pytest\n",
68 touched_files=["/tmp/loader.py", "/tmp/unplanned.py"],
69 )
70
71 assert freshness.stale_plan is True
72 assert "unplanned.py" in freshness.reasons[0]
73
74
75 def test_workflow_policy_requests_follow_up_when_clarify_answer_is_still_ambiguous() -> None:
76 policy = WorkflowPolicy()
77
78 review = policy.review_clarify(
79 task="Improve Loader so it feels more like claw-code.",
80 answer="Make it nicer.",
81 snapshot=ClarifySnapshot(
82 task_statement="Improve Loader so it feels more like claw-code.",
83 explicit_sections=[],
84 ),
85 round_index=1,
86 max_rounds=2,
87 )
88
89 assert review.should_continue is True
90 assert review.reason_code == "clarify_follow_up_needed"
91 assert review.unresolved_questions
92 assert review.unresolved_slots
93 assert review.focus_slot == "likely_touchpoints"
94
95
96 def test_workflow_policy_requests_pressure_pass_on_later_clarify_round() -> None:
97 policy = WorkflowPolicy()
98
99 review = policy.review_clarify(
100 task="Improve Loader runtime behavior.",
101 answer="Focus on src/loader/runtime/conversation.py.",
102 snapshot=ClarifySnapshot(
103 task_statement="Improve Loader runtime behavior.",
104 explicit_sections=["desired_outcome", "likely_touchpoints"],
105 desired_outcome=["Make the runtime flow more disciplined."],
106 likely_touchpoints=["src/loader/runtime/conversation.py"],
107 ),
108 round_index=2,
109 max_rounds=4,
110 )
111
112 assert review.should_continue is True
113 assert review.reason_code == "clarify_pressure_pass_required"
114 assert review.stage == "readiness"
115 assert review.pressure_kind == "tradeoff"
116 assert review.pressure_pass_complete is False
117
118
119 def test_workflow_timeline_entry_round_trips() -> None:
120 entry = WorkflowTimelineEntry(
121 timestamp="2026-04-07T12:00:00Z",
122 kind=WorkflowTimelineEntryKind.ROUTE.value,
123 mode=WorkflowMode.PLAN.value,
124 reason_code="task_is_complex",
125 summary="plan: workflow pressure favors a persisted plan before execution",
126 decision_kind="initial_route",
127 route_score=0.81,
128 runner_up_mode="clarify",
129 runner_up_score=0.66,
130 scheduled_next_mode="execute",
131 unresolved_questions=["Scope is still broad."],
132 signal_summary=["ambiguity=0.20", "complexity=0.81"],
133 evidence_summary=[
134 (
135 f"{ArtifactEvidenceKind.CONFIRMED_TOUCHPOINT.value.replace('_', ' ')}: "
136 "`conversation.py` was already touched during execution."
137 )
138 ],
139 clarify_stage="readiness",
140 clarify_pressure_kind="tradeoff",
141 pressure_pass_complete=False,
142 missing_readiness_gates=["non_goals", "decision_boundaries"],
143 prompt_format="native",
144 prompt_sections=["Runtime Config", "Workflow Context"],
145 artifact_paths=["/tmp/implementation.md"],
146 )
147
148 restored = WorkflowTimelineEntry.from_dict(entry.to_dict())
149
150 assert restored == entry
151
152
153 def test_workflow_accountability_entry_round_trips() -> None:
154 entry = WorkflowTimelineEntry.accountability(
155 kind=WorkflowTimelineEntryKind.COMPLETION_CONTINUE,
156 mode=WorkflowMode.EXECUTE,
157 reason_code="verification_failed_reentry",
158 summary="completion: verification failed; returning to execute for fixes",
159 policy_stage="definition_of_done",
160 policy_outcome="continue",
161 decision_kind=WorkflowDecisionKind.FORCED,
162 prompt_format="native",
163 prompt_sections=["Runtime Config", "Workflow Context"],
164 signal_summary=["stage=definition_of_done"],
165 evidence_summary=["verification contradiction: pytest still failed"],
166 artifact_paths=["/tmp/verification.md"],
167 )
168
169 restored = WorkflowTimelineEntry.from_dict(entry.to_dict())
170
171 assert restored == entry