| 1 | """Tests for shared workflow timeline read models.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | from loader.runtime.evidence_provenance import EvidenceProvenance |
| 6 | from loader.runtime.verification_observations import VerificationObservation |
| 7 | from loader.runtime.workflow_ledger import WorkflowLedger, WorkflowLedgerItem |
| 8 | from loader.runtime.workflow_policy import WorkflowTimelineEntry |
| 9 | from loader.runtime.workflow_timeline_read_model import project_workflow_timeline |
| 10 | |
| 11 | |
| 12 | def test_project_workflow_timeline_builds_policy_views_and_highlights() -> None: |
| 13 | entries = [ |
| 14 | WorkflowTimelineEntry( |
| 15 | timestamp="2026-04-09T12:00:00Z", |
| 16 | kind="repair_retry", |
| 17 | mode="execute", |
| 18 | reason_code="raw_text_tool_recovered", |
| 19 | summary="repair: recovered raw-text tool calls into executable tool invocations", |
| 20 | decision_kind="forced", |
| 21 | policy_stage="raw_text_tool_fallback", |
| 22 | policy_outcome="retry", |
| 23 | ), |
| 24 | WorkflowTimelineEntry( |
| 25 | timestamp="2026-04-09T12:01:00Z", |
| 26 | kind="completion_continue", |
| 27 | mode="execute", |
| 28 | reason_code="verification_failed_reentry", |
| 29 | summary=( |
| 30 | "completion: continued after verification failed and the runtime " |
| 31 | "re-entered execute mode" |
| 32 | ), |
| 33 | decision_kind="forced", |
| 34 | policy_stage="definition_of_done", |
| 35 | policy_outcome="continue", |
| 36 | evidence_provenance=[ |
| 37 | EvidenceProvenance( |
| 38 | category="verification", |
| 39 | source="dod.evidence", |
| 40 | summary="verification failed for `pytest -q`", |
| 41 | status="contradicts", |
| 42 | subject="pytest -q", |
| 43 | ) |
| 44 | ], |
| 45 | verification_observations=[ |
| 46 | VerificationObservation( |
| 47 | status="failed", |
| 48 | summary="verification failed for `pytest -q`", |
| 49 | command="pytest -q", |
| 50 | kind="test", |
| 51 | detail="1 failed", |
| 52 | ) |
| 53 | ], |
| 54 | ), |
| 55 | ] |
| 56 | ledger = WorkflowLedger( |
| 57 | assumptions=[ |
| 58 | WorkflowLedgerItem( |
| 59 | text="notes.txt stays out of scope.", |
| 60 | status="contradicted", |
| 61 | introduced_phase="clarify", |
| 62 | updated_phase="recovery", |
| 63 | evidence=["Execution already touched notes.txt."], |
| 64 | ) |
| 65 | ] |
| 66 | ) |
| 67 | |
| 68 | projection = project_workflow_timeline(entries, workflow_ledger=ledger) |
| 69 | |
| 70 | assert projection.total_entries == 2 |
| 71 | assert [entry.kind for entry in projection.policy_entries] == [ |
| 72 | "repair_retry", |
| 73 | "completion_continue", |
| 74 | ] |
| 75 | assert projection.latest_policy_summary is not None |
| 76 | assert "verification_failed_reentry" in projection.latest_policy_summary |
| 77 | assert "provenance=contradicts:verification@dod.evidence(pytest -q)" in ( |
| 78 | projection.latest_policy_summary |
| 79 | ) |
| 80 | assert "observed=verification failed for `pytest -q` [1 failed]" in ( |
| 81 | projection.latest_policy_summary |
| 82 | ) |
| 83 | assert projection.latest_policy_evidence is not None |
| 84 | assert projection.latest_policy_evidence.blocking == [ |
| 85 | "verification failed for `pytest -q`" |
| 86 | ] |
| 87 | assert projection.latest_policy_observed_verification == [ |
| 88 | "verification failed for `pytest -q` [1 failed]" |
| 89 | ] |
| 90 | assert any(item.startswith("Repair path:") for item in projection.highlights) |
| 91 | assert any(item.startswith("Completion decision:") for item in projection.highlights) |
| 92 | assert any(item.startswith("Contradicted assumptions:") for item in projection.highlights) |
| 93 | |
| 94 | |
| 95 | def test_project_workflow_timeline_treats_verify_observation_as_accountability() -> None: |
| 96 | entries = [ |
| 97 | WorkflowTimelineEntry( |
| 98 | timestamp="2026-04-09T12:03:00Z", |
| 99 | kind="verify_observation", |
| 100 | mode="verify", |
| 101 | reason_code="verification_command_failed", |
| 102 | summary="verify: verification failed for `pytest -q`", |
| 103 | decision_kind="forced", |
| 104 | policy_stage="verification", |
| 105 | policy_outcome="failed", |
| 106 | verification_observations=[ |
| 107 | VerificationObservation( |
| 108 | status="failed", |
| 109 | summary="verification failed for `pytest -q`", |
| 110 | command="pytest -q", |
| 111 | kind="test", |
| 112 | detail="1 failed", |
| 113 | ) |
| 114 | ], |
| 115 | ) |
| 116 | ] |
| 117 | |
| 118 | projection = project_workflow_timeline(entries, accountability_only=True) |
| 119 | |
| 120 | assert [entry.kind for entry in projection.policy_entries] == ["verify_observation"] |
| 121 | assert [entry.kind for entry in projection.entries] == ["verify_observation"] |
| 122 | assert projection.latest_policy_summary is not None |
| 123 | assert "policy-stage=verification" in projection.latest_policy_summary |
| 124 | assert "observed=verification failed for `pytest -q` [1 failed]" in ( |
| 125 | projection.latest_policy_summary |
| 126 | ) |
| 127 | assert any(item.startswith("Verify observed:") for item in projection.highlights) |
| 128 | |
| 129 | |
| 130 | def test_project_workflow_timeline_highlights_pending_verification() -> None: |
| 131 | entries = [ |
| 132 | WorkflowTimelineEntry( |
| 133 | timestamp="2026-04-09T12:03:00Z", |
| 134 | kind="verify_observation", |
| 135 | mode="verify", |
| 136 | reason_code="verification_pending", |
| 137 | summary="verify: verification is pending for the active command set", |
| 138 | decision_kind="forced", |
| 139 | policy_stage="verification", |
| 140 | policy_outcome="pending", |
| 141 | verification_observations=[ |
| 142 | VerificationObservation( |
| 143 | status="pending", |
| 144 | summary="verification pending for `pytest -q`", |
| 145 | command="pytest -q", |
| 146 | kind="test", |
| 147 | attempt_id="verification-attempt-2", |
| 148 | attempt_number=2, |
| 149 | ) |
| 150 | ], |
| 151 | ) |
| 152 | ] |
| 153 | |
| 154 | projection = project_workflow_timeline(entries, accountability_only=True) |
| 155 | |
| 156 | assert projection.latest_policy_summary is not None |
| 157 | assert "policy-outcome=pending" in projection.latest_policy_summary |
| 158 | assert "observed=verification pending for `pytest -q` [attempt 2]" in ( |
| 159 | projection.latest_policy_summary |
| 160 | ) |
| 161 | assert any(item.startswith("Verify pending:") for item in projection.highlights) |
| 162 | |
| 163 | |
| 164 | def test_project_workflow_timeline_highlights_planned_verification() -> None: |
| 165 | entries = [ |
| 166 | WorkflowTimelineEntry( |
| 167 | timestamp="2026-04-09T12:02:00Z", |
| 168 | kind="verify_observation", |
| 169 | mode="execute", |
| 170 | reason_code="verification_planned", |
| 171 | summary="verify: verification is planned after new mutating work", |
| 172 | decision_kind="forced", |
| 173 | policy_stage="verification", |
| 174 | policy_outcome="planned", |
| 175 | verification_observations=[ |
| 176 | VerificationObservation( |
| 177 | status="planned", |
| 178 | summary="verification planned for `pytest -q`", |
| 179 | command="pytest -q", |
| 180 | kind="runtime", |
| 181 | detail="write changed README.md", |
| 182 | attempt_id="verification-attempt-3", |
| 183 | attempt_number=3, |
| 184 | ) |
| 185 | ], |
| 186 | ) |
| 187 | ] |
| 188 | |
| 189 | projection = project_workflow_timeline(entries, accountability_only=True) |
| 190 | |
| 191 | assert projection.latest_policy_summary is not None |
| 192 | assert "policy-outcome=planned" in projection.latest_policy_summary |
| 193 | assert ( |
| 194 | "observed=verification planned for `pytest -q` " |
| 195 | "[write changed README.md; attempt 3]" |
| 196 | ) in ( |
| 197 | projection.latest_policy_summary |
| 198 | ) |
| 199 | assert any(item.startswith("Verify planned:") for item in projection.highlights) |
| 200 | |
| 201 | |
| 202 | def test_project_workflow_timeline_highlights_stale_verification() -> None: |
| 203 | entries = [ |
| 204 | WorkflowTimelineEntry( |
| 205 | timestamp="2026-04-09T12:04:00Z", |
| 206 | kind="verify_observation", |
| 207 | mode="execute", |
| 208 | reason_code="verification_stale", |
| 209 | summary="verify: previous verification became stale after new mutating work", |
| 210 | decision_kind="forced", |
| 211 | policy_stage="verification", |
| 212 | policy_outcome="stale", |
| 213 | verification_observations=[ |
| 214 | VerificationObservation( |
| 215 | status="stale", |
| 216 | summary=( |
| 217 | "verification became stale for `pytest -q` after new mutating work" |
| 218 | ), |
| 219 | command="pytest -q", |
| 220 | kind="runtime", |
| 221 | detail="write changed README.md", |
| 222 | attempt_id="verification-attempt-1", |
| 223 | attempt_number=1, |
| 224 | supersedes_attempt_id="verification-attempt-2", |
| 225 | ) |
| 226 | ], |
| 227 | ) |
| 228 | ] |
| 229 | |
| 230 | projection = project_workflow_timeline(entries, accountability_only=True) |
| 231 | |
| 232 | assert projection.latest_policy_summary is not None |
| 233 | assert "policy-outcome=stale" in projection.latest_policy_summary |
| 234 | assert ( |
| 235 | "observed=verification became stale for `pytest -q` after new mutating work " |
| 236 | "[write changed README.md; attempt 1 -> attempt 2]" |
| 237 | ) in ( |
| 238 | projection.latest_policy_summary |
| 239 | ) |
| 240 | assert any(item.startswith("Verify stale:") for item in projection.highlights) |
| 241 | |
| 242 | |
| 243 | def test_project_workflow_timeline_applies_policy_filters_and_limits() -> None: |
| 244 | entries = [ |
| 245 | WorkflowTimelineEntry( |
| 246 | timestamp="2026-04-09T12:00:00Z", |
| 247 | kind="handoff", |
| 248 | mode="plan", |
| 249 | reason_code="task_is_complex", |
| 250 | summary="plan: workflow pressure favors a persisted plan before execution", |
| 251 | decision_kind="handoff", |
| 252 | ), |
| 253 | WorkflowTimelineEntry( |
| 254 | timestamp="2026-04-09T12:01:00Z", |
| 255 | kind="repair_retry", |
| 256 | mode="execute", |
| 257 | reason_code="raw_text_tool_recovered", |
| 258 | summary="repair: recovered raw-text tool calls into executable tool invocations", |
| 259 | decision_kind="forced", |
| 260 | policy_stage="raw_text_tool_fallback", |
| 261 | policy_outcome="retry", |
| 262 | ), |
| 263 | WorkflowTimelineEntry( |
| 264 | timestamp="2026-04-09T12:02:00Z", |
| 265 | kind="completion_check", |
| 266 | mode="execute", |
| 267 | reason_code="completion_response_accepted", |
| 268 | summary=( |
| 269 | "completion: accepted the response because completion heuristics " |
| 270 | "found no missing follow-through" |
| 271 | ), |
| 272 | decision_kind="forced", |
| 273 | policy_stage="continuation_check", |
| 274 | policy_outcome="accept", |
| 275 | ), |
| 276 | ] |
| 277 | |
| 278 | projection = project_workflow_timeline( |
| 279 | entries, |
| 280 | accountability_only=True, |
| 281 | mode="execute", |
| 282 | limit=1, |
| 283 | ) |
| 284 | |
| 285 | assert projection.total_entries == 3 |
| 286 | assert [entry.kind for entry in projection.entries] == ["completion_check"] |
| 287 | assert [entry.kind for entry in projection.policy_entries] == [ |
| 288 | "repair_retry", |
| 289 | "completion_check", |
| 290 | ] |
| 291 | |
| 292 | |
| 293 | def test_project_workflow_timeline_rolls_up_supporting_and_missing_evidence() -> None: |
| 294 | entries = [ |
| 295 | WorkflowTimelineEntry( |
| 296 | timestamp="2026-04-09T12:02:00Z", |
| 297 | kind="completion_finalize", |
| 298 | mode="verify", |
| 299 | reason_code="verification_budget_exhausted", |
| 300 | summary="completion: stopped because verification evidence was still missing", |
| 301 | decision_kind="forced", |
| 302 | policy_stage="definition_of_done", |
| 303 | policy_outcome="finalize", |
| 304 | evidence_provenance=[ |
| 305 | EvidenceProvenance( |
| 306 | category="verification", |
| 307 | source="dod.evidence", |
| 308 | summary="verification evidence was still missing for `pytest -q`", |
| 309 | status="missing", |
| 310 | subject="pytest -q", |
| 311 | ), |
| 312 | EvidenceProvenance( |
| 313 | category="tracked_work", |
| 314 | source="dod.pending_items", |
| 315 | summary="all tracked work items except verification were complete", |
| 316 | status="supports", |
| 317 | ), |
| 318 | ], |
| 319 | ) |
| 320 | ] |
| 321 | |
| 322 | projection = project_workflow_timeline(entries) |
| 323 | |
| 324 | assert projection.latest_policy_evidence is not None |
| 325 | assert projection.latest_policy_evidence.missing == [ |
| 326 | "verification evidence was still missing for `pytest -q`" |
| 327 | ] |
| 328 | assert projection.latest_policy_evidence.supporting == [ |
| 329 | "all tracked work items except verification were complete" |
| 330 | ] |