| 1 | """Tests for the Sprint 10 workflow policy and timeline core.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | from loader.runtime.clarify_strategy import ClarifySnapshot |
| 6 | from loader.runtime.workflow import ( |
| 7 | ArtifactEvidenceKind, |
| 8 | WorkflowDecisionKind, |
| 9 | WorkflowMode, |
| 10 | WorkflowPolicy, |
| 11 | WorkflowTimelineEntry, |
| 12 | WorkflowTimelineEntryKind, |
| 13 | ) |
| 14 | from loader.runtime.workflow_signals import WorkflowSignalPacket |
| 15 | |
| 16 | |
| 17 | def test_workflow_policy_reports_winner_and_runner_up() -> None: |
| 18 | policy = WorkflowPolicy() |
| 19 | |
| 20 | decision = policy.route("Improve Loader so it feels more like claw-code.") |
| 21 | |
| 22 | assert decision.mode == WorkflowMode.CLARIFY |
| 23 | assert decision.route_score >= policy.clarify_threshold |
| 24 | assert decision.runner_up_mode is not None |
| 25 | assert decision.runner_up_score > 0 |
| 26 | assert decision.pressure_summary |
| 27 | assert decision.signal_summary |
| 28 | |
| 29 | |
| 30 | def test_workflow_policy_routes_from_typed_signal_packet() -> None: |
| 31 | policy = WorkflowPolicy() |
| 32 | |
| 33 | decision = policy.route_from_signals( |
| 34 | WorkflowSignalPacket( |
| 35 | task="Keep improving Loader.", |
| 36 | ambiguity_score=0.62, |
| 37 | complexity_score=0.28, |
| 38 | allow_clarify=True, |
| 39 | signal_summary=["ambiguity=0.62", "complexity=0.28"], |
| 40 | ) |
| 41 | ) |
| 42 | |
| 43 | assert decision.mode == WorkflowMode.CLARIFY |
| 44 | assert decision.signal_summary == ["ambiguity=0.62", "complexity=0.28"] |
| 45 | |
| 46 | |
| 47 | def test_workflow_policy_prefers_plan_refresh_for_stale_plan() -> None: |
| 48 | policy = WorkflowPolicy() |
| 49 | |
| 50 | decision = policy.route( |
| 51 | "Keep working on the runtime task.", |
| 52 | has_plan=True, |
| 53 | stale_plan=True, |
| 54 | ) |
| 55 | |
| 56 | assert decision.mode == WorkflowMode.PLAN |
| 57 | assert decision.reason_code == "stale_plan_artifacts" |
| 58 | assert decision.decision_kind == "reentry" |
| 59 | assert decision.scheduled_next_mode == WorkflowMode.EXECUTE |
| 60 | |
| 61 | |
| 62 | def test_workflow_policy_marks_unplanned_touched_files_as_stale() -> None: |
| 63 | policy = WorkflowPolicy() |
| 64 | |
| 65 | freshness = policy.assess_artifact_freshness( |
| 66 | implementation_text="# Implementation Plan\n- Update loader.py only\n", |
| 67 | verification_text="# Verification Plan\n- Run pytest\n", |
| 68 | touched_files=["/tmp/loader.py", "/tmp/unplanned.py"], |
| 69 | ) |
| 70 | |
| 71 | assert freshness.stale_plan is True |
| 72 | assert "unplanned.py" in freshness.reasons[0] |
| 73 | |
| 74 | |
| 75 | def test_workflow_policy_requests_follow_up_when_clarify_answer_is_still_ambiguous() -> None: |
| 76 | policy = WorkflowPolicy() |
| 77 | |
| 78 | review = policy.review_clarify( |
| 79 | task="Improve Loader so it feels more like claw-code.", |
| 80 | answer="Make it nicer.", |
| 81 | snapshot=ClarifySnapshot( |
| 82 | task_statement="Improve Loader so it feels more like claw-code.", |
| 83 | explicit_sections=[], |
| 84 | ), |
| 85 | round_index=1, |
| 86 | max_rounds=2, |
| 87 | ) |
| 88 | |
| 89 | assert review.should_continue is True |
| 90 | assert review.reason_code == "clarify_follow_up_needed" |
| 91 | assert review.unresolved_questions |
| 92 | assert review.unresolved_slots |
| 93 | assert review.focus_slot == "likely_touchpoints" |
| 94 | |
| 95 | |
| 96 | def test_workflow_policy_requests_pressure_pass_on_later_clarify_round() -> None: |
| 97 | policy = WorkflowPolicy() |
| 98 | |
| 99 | review = policy.review_clarify( |
| 100 | task="Improve Loader runtime behavior.", |
| 101 | answer="Focus on src/loader/runtime/conversation.py.", |
| 102 | snapshot=ClarifySnapshot( |
| 103 | task_statement="Improve Loader runtime behavior.", |
| 104 | explicit_sections=["desired_outcome", "likely_touchpoints"], |
| 105 | desired_outcome=["Make the runtime flow more disciplined."], |
| 106 | likely_touchpoints=["src/loader/runtime/conversation.py"], |
| 107 | ), |
| 108 | round_index=2, |
| 109 | max_rounds=4, |
| 110 | ) |
| 111 | |
| 112 | assert review.should_continue is True |
| 113 | assert review.reason_code == "clarify_pressure_pass_required" |
| 114 | assert review.stage == "readiness" |
| 115 | assert review.pressure_kind == "tradeoff" |
| 116 | assert review.pressure_pass_complete is False |
| 117 | |
| 118 | |
| 119 | def test_workflow_timeline_entry_round_trips() -> None: |
| 120 | entry = WorkflowTimelineEntry( |
| 121 | timestamp="2026-04-07T12:00:00Z", |
| 122 | kind=WorkflowTimelineEntryKind.ROUTE.value, |
| 123 | mode=WorkflowMode.PLAN.value, |
| 124 | reason_code="task_is_complex", |
| 125 | summary="plan: workflow pressure favors a persisted plan before execution", |
| 126 | decision_kind="initial_route", |
| 127 | route_score=0.81, |
| 128 | runner_up_mode="clarify", |
| 129 | runner_up_score=0.66, |
| 130 | scheduled_next_mode="execute", |
| 131 | unresolved_questions=["Scope is still broad."], |
| 132 | signal_summary=["ambiguity=0.20", "complexity=0.81"], |
| 133 | evidence_summary=[ |
| 134 | ( |
| 135 | f"{ArtifactEvidenceKind.CONFIRMED_TOUCHPOINT.value.replace('_', ' ')}: " |
| 136 | "`conversation.py` was already touched during execution." |
| 137 | ) |
| 138 | ], |
| 139 | clarify_stage="readiness", |
| 140 | clarify_pressure_kind="tradeoff", |
| 141 | pressure_pass_complete=False, |
| 142 | missing_readiness_gates=["non_goals", "decision_boundaries"], |
| 143 | prompt_format="native", |
| 144 | prompt_sections=["Runtime Config", "Workflow Context"], |
| 145 | artifact_paths=["/tmp/implementation.md"], |
| 146 | ) |
| 147 | |
| 148 | restored = WorkflowTimelineEntry.from_dict(entry.to_dict()) |
| 149 | |
| 150 | assert restored == entry |
| 151 | |
| 152 | |
| 153 | def test_workflow_accountability_entry_round_trips() -> None: |
| 154 | entry = WorkflowTimelineEntry.accountability( |
| 155 | kind=WorkflowTimelineEntryKind.COMPLETION_CONTINUE, |
| 156 | mode=WorkflowMode.EXECUTE, |
| 157 | reason_code="verification_failed_reentry", |
| 158 | summary="completion: verification failed; returning to execute for fixes", |
| 159 | policy_stage="definition_of_done", |
| 160 | policy_outcome="continue", |
| 161 | decision_kind=WorkflowDecisionKind.FORCED, |
| 162 | prompt_format="native", |
| 163 | prompt_sections=["Runtime Config", "Workflow Context"], |
| 164 | signal_summary=["stage=definition_of_done"], |
| 165 | evidence_summary=["verification contradiction: pytest still failed"], |
| 166 | artifact_paths=["/tmp/verification.md"], |
| 167 | ) |
| 168 | |
| 169 | restored = WorkflowTimelineEntry.from_dict(entry.to_dict()) |
| 170 | |
| 171 | assert restored == entry |