Python · 12696 bytes Raw Blame History
1 """Tests for shared workflow timeline read models."""
2
3 from __future__ import annotations
4
5 from loader.runtime.evidence_provenance import EvidenceProvenance
6 from loader.runtime.verification_observations import VerificationObservation
7 from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem
8 from loader.runtime.workflow_policy import WorkflowTimelineEntry
9 from loader.runtime.workflow_timeline_read_model import project_workflow_timeline
10
11
12 def test_project_workflow_timeline_builds_policy_views_and_highlights() -> None:
13 entries = [
14 WorkflowTimelineEntry(
15 timestamp="2026-04-09T12:00:00Z",
16 kind="repair_retry",
17 mode="execute",
18 reason_code="raw_text_tool_recovered",
19 summary="repair: recovered raw-text tool calls into executable tool invocations",
20 decision_kind="forced",
21 policy_stage="raw_text_tool_fallback",
22 policy_outcome="retry",
23 ),
24 WorkflowTimelineEntry(
25 timestamp="2026-04-09T12:01:00Z",
26 kind="completion_continue",
27 mode="execute",
28 reason_code="verification_failed_reentry",
29 summary=(
30 "completion: continued after verification failed and the runtime "
31 "re-entered execute mode"
32 ),
33 decision_kind="forced",
34 policy_stage="definition_of_done",
35 policy_outcome="continue",
36 evidence_provenance=[
37 EvidenceProvenance(
38 category="verification",
39 source="dod.evidence",
40 summary="verification failed for `pytest -q`",
41 status="contradicts",
42 subject="pytest -q",
43 )
44 ],
45 verification_observations=[
46 VerificationObservation(
47 status="failed",
48 summary="verification failed for `pytest -q`",
49 command="pytest -q",
50 kind="test",
51 detail="1 failed",
52 )
53 ],
54 ),
55 ]
56 ledger = WorkflowLedger(
57 assumptions=[
58 WorkflowLedgerItem(
59 text="notes.txt stays out of scope.",
60 status="contradicted",
61 introduced_phase="clarify",
62 updated_phase="recovery",
63 evidence=["Execution already touched notes.txt."],
64 )
65 ]
66 )
67
68 projection = project_workflow_timeline(entries, workflow_ledger=ledger)
69
70 assert projection.total_entries == 2
71 assert [entry.kind for entry in projection.policy_entries] == [
72 "repair_retry",
73 "completion_continue",
74 ]
75 assert projection.latest_policy_summary is not None
76 assert "verification_failed_reentry" in projection.latest_policy_summary
77 assert "provenance=contradicts:verification@dod.evidence(pytest -q)" in (
78 projection.latest_policy_summary
79 )
80 assert "observed=verification failed for `pytest -q` [1 failed]" in (
81 projection.latest_policy_summary
82 )
83 assert projection.latest_policy_evidence is not None
84 assert projection.latest_policy_evidence.blocking == [
85 "verification failed for `pytest -q`"
86 ]
87 assert projection.latest_policy_observed_verification == [
88 "verification failed for `pytest -q` [1 failed]"
89 ]
90 assert any(item.startswith("Repair path:") for item in projection.highlights)
91 assert any(item.startswith("Completion decision:") for item in projection.highlights)
92 assert any(item.startswith("Contradicted assumptions:") for item in projection.highlights)
93
94
95 def test_project_workflow_timeline_treats_verify_observation_as_accountability() -> None:
96 entries = [
97 WorkflowTimelineEntry(
98 timestamp="2026-04-09T12:03:00Z",
99 kind="verify_observation",
100 mode="verify",
101 reason_code="verification_command_failed",
102 summary="verify: verification failed for `pytest -q`",
103 decision_kind="forced",
104 policy_stage="verification",
105 policy_outcome="failed",
106 verification_observations=[
107 VerificationObservation(
108 status="failed",
109 summary="verification failed for `pytest -q`",
110 command="pytest -q",
111 kind="test",
112 detail="1 failed",
113 )
114 ],
115 )
116 ]
117
118 projection = project_workflow_timeline(entries, accountability_only=True)
119
120 assert [entry.kind for entry in projection.policy_entries] == ["verify_observation"]
121 assert [entry.kind for entry in projection.entries] == ["verify_observation"]
122 assert projection.latest_policy_summary is not None
123 assert "policy-stage=verification" in projection.latest_policy_summary
124 assert "observed=verification failed for `pytest -q` [1 failed]" in (
125 projection.latest_policy_summary
126 )
127 assert any(item.startswith("Verify observed:") for item in projection.highlights)
128
129
130 def test_project_workflow_timeline_highlights_pending_verification() -> None:
131 entries = [
132 WorkflowTimelineEntry(
133 timestamp="2026-04-09T12:03:00Z",
134 kind="verify_observation",
135 mode="verify",
136 reason_code="verification_pending",
137 summary="verify: verification is pending for the active command set",
138 decision_kind="forced",
139 policy_stage="verification",
140 policy_outcome="pending",
141 verification_observations=[
142 VerificationObservation(
143 status="pending",
144 summary="verification pending for `pytest -q`",
145 command="pytest -q",
146 kind="test",
147 attempt_id="verification-attempt-2",
148 attempt_number=2,
149 )
150 ],
151 )
152 ]
153
154 projection = project_workflow_timeline(entries, accountability_only=True)
155
156 assert projection.latest_policy_summary is not None
157 assert "policy-outcome=pending" in projection.latest_policy_summary
158 assert "observed=verification pending for `pytest -q` [attempt 2]" in (
159 projection.latest_policy_summary
160 )
161 assert any(item.startswith("Verify pending:") for item in projection.highlights)
162
163
164 def test_project_workflow_timeline_highlights_planned_verification() -> None:
165 entries = [
166 WorkflowTimelineEntry(
167 timestamp="2026-04-09T12:02:00Z",
168 kind="verify_observation",
169 mode="execute",
170 reason_code="verification_planned",
171 summary="verify: verification is planned after new mutating work",
172 decision_kind="forced",
173 policy_stage="verification",
174 policy_outcome="planned",
175 verification_observations=[
176 VerificationObservation(
177 status="planned",
178 summary="verification planned for `pytest -q`",
179 command="pytest -q",
180 kind="runtime",
181 detail="write changed README.md",
182 attempt_id="verification-attempt-3",
183 attempt_number=3,
184 )
185 ],
186 )
187 ]
188
189 projection = project_workflow_timeline(entries, accountability_only=True)
190
191 assert projection.latest_policy_summary is not None
192 assert "policy-outcome=planned" in projection.latest_policy_summary
193 assert (
194 "observed=verification planned for `pytest -q` "
195 "[write changed README.md; attempt 3]"
196 ) in (
197 projection.latest_policy_summary
198 )
199 assert any(item.startswith("Verify planned:") for item in projection.highlights)
200
201
202 def test_project_workflow_timeline_highlights_stale_verification() -> None:
203 entries = [
204 WorkflowTimelineEntry(
205 timestamp="2026-04-09T12:04:00Z",
206 kind="verify_observation",
207 mode="execute",
208 reason_code="verification_stale",
209 summary="verify: previous verification became stale after new mutating work",
210 decision_kind="forced",
211 policy_stage="verification",
212 policy_outcome="stale",
213 verification_observations=[
214 VerificationObservation(
215 status="stale",
216 summary=(
217 "verification became stale for `pytest -q` after new mutating work"
218 ),
219 command="pytest -q",
220 kind="runtime",
221 detail="write changed README.md",
222 attempt_id="verification-attempt-1",
223 attempt_number=1,
224 supersedes_attempt_id="verification-attempt-2",
225 )
226 ],
227 )
228 ]
229
230 projection = project_workflow_timeline(entries, accountability_only=True)
231
232 assert projection.latest_policy_summary is not None
233 assert "policy-outcome=stale" in projection.latest_policy_summary
234 assert (
235 "observed=verification became stale for `pytest -q` after new mutating work "
236 "[write changed README.md; attempt 1 -> attempt 2]"
237 ) in (
238 projection.latest_policy_summary
239 )
240 assert any(item.startswith("Verify stale:") for item in projection.highlights)
241
242
243 def test_project_workflow_timeline_applies_policy_filters_and_limits() -> None:
244 entries = [
245 WorkflowTimelineEntry(
246 timestamp="2026-04-09T12:00:00Z",
247 kind="handoff",
248 mode="plan",
249 reason_code="task_is_complex",
250 summary="plan: workflow pressure favors a persisted plan before execution",
251 decision_kind="handoff",
252 ),
253 WorkflowTimelineEntry(
254 timestamp="2026-04-09T12:01:00Z",
255 kind="repair_retry",
256 mode="execute",
257 reason_code="raw_text_tool_recovered",
258 summary="repair: recovered raw-text tool calls into executable tool invocations",
259 decision_kind="forced",
260 policy_stage="raw_text_tool_fallback",
261 policy_outcome="retry",
262 ),
263 WorkflowTimelineEntry(
264 timestamp="2026-04-09T12:02:00Z",
265 kind="completion_check",
266 mode="execute",
267 reason_code="completion_response_accepted",
268 summary=(
269 "completion: accepted the response because completion heuristics "
270 "found no missing follow-through"
271 ),
272 decision_kind="forced",
273 policy_stage="continuation_check",
274 policy_outcome="accept",
275 ),
276 ]
277
278 projection = project_workflow_timeline(
279 entries,
280 accountability_only=True,
281 mode="execute",
282 limit=1,
283 )
284
285 assert projection.total_entries == 3
286 assert [entry.kind for entry in projection.entries] == ["completion_check"]
287 assert [entry.kind for entry in projection.policy_entries] == [
288 "repair_retry",
289 "completion_check",
290 ]
291
292
293 def test_project_workflow_timeline_rolls_up_supporting_and_missing_evidence() -> None:
294 entries = [
295 WorkflowTimelineEntry(
296 timestamp="2026-04-09T12:02:00Z",
297 kind="completion_finalize",
298 mode="verify",
299 reason_code="verification_budget_exhausted",
300 summary="completion: stopped because verification evidence was still missing",
301 decision_kind="forced",
302 policy_stage="definition_of_done",
303 policy_outcome="finalize",
304 evidence_provenance=[
305 EvidenceProvenance(
306 category="verification",
307 source="dod.evidence",
308 summary="verification evidence was still missing for `pytest -q`",
309 status="missing",
310 subject="pytest -q",
311 ),
312 EvidenceProvenance(
313 category="tracked_work",
314 source="dod.pending_items",
315 summary="all tracked work items except verification were complete",
316 status="supports",
317 ),
318 ],
319 )
320 ]
321
322 projection = project_workflow_timeline(entries)
323
324 assert projection.latest_policy_evidence is not None
325 assert projection.latest_policy_evidence.missing == [
326 "verification evidence was still missing for `pytest -q`"
327 ]
328 assert projection.latest_policy_evidence.supporting == [
329 "all tracked work items except verification were complete"
330 ]