Python · 7603 bytes Raw Blame History
1 """Tests for semantic artifact invalidation and recovery selection."""
2
3 from __future__ import annotations
4
5 from loader.runtime.artifact_invalidation import (
6 ArtifactInvalidationAssessor,
7 WorkflowRecoveryStrategy,
8 )
9 from loader.runtime.workflow import ArtifactEvidenceKind
10
11
12 def test_artifact_invalidation_requests_plan_refresh_for_plan_only_drift() -> None:
13 assessor = ArtifactInvalidationAssessor()
14
15 freshness = assessor.assess(
16 task_statement="Implement the runtime report artifact.",
17 clarify_text=None,
18 implementation_text="# Implementation Plan\n- Create report.md only\n",
19 verification_text="# Verification Plan\n- report.md exists\n",
20 acceptance_criteria=["report.md exists"],
21 touched_files=["/tmp/notes.md"],
22 last_verification_result=None,
23 )
24
25 assert freshness.stale_plan is True
26 assert freshness.stale_brief is False
27 assert freshness.recovery_strategy == WorkflowRecoveryStrategy.PLAN_REFRESH.value
28 assert "touched_files_outside_plan" in freshness.reason_codes
29 assert any(
30 item.kind == ArtifactEvidenceKind.CONFIRMED_TOUCHPOINT.value
31 and "notes.md" in item.summary
32 for item in freshness.evidence
33 )
34 assert any(
35 item.kind == ArtifactEvidenceKind.ACCEPTANCE_ANCHOR.value
36 and "report.md exists" in item.summary
37 for item in freshness.evidence
38 )
39
40
41 def test_artifact_invalidation_can_force_full_replan_when_brief_and_plan_drift() -> None:
42 assessor = ArtifactInvalidationAssessor()
43
44 freshness = assessor.assess(
45 task_statement="Improve Loader runtime workflow discipline.",
46 clarify_text="# Task Brief\n\n## Desired Outcome\n- Improve Loader runtime workflow.\n",
47 implementation_text="# Implementation Plan\n- Touch planned.txt only\n",
48 verification_text="# Verification Plan\n## Acceptance Criteria\n- planned.txt exists.\n",
49 acceptance_criteria=["notes.txt exists in the workspace root."],
50 touched_files=["/tmp/notes.txt"],
51 last_verification_result="failed",
52 )
53
54 assert freshness.stale_plan is True
55 assert freshness.stale_brief is True
56 assert freshness.recovery_strategy == WorkflowRecoveryStrategy.FULL_REPLAN.value
57 assert "touchpoints_outside_brief" in freshness.reason_codes
58 assert "acceptance_criteria_outside_plan" in freshness.reason_codes
59 assert any(
60 item.kind == ArtifactEvidenceKind.VERIFICATION_CONTRADICTION.value
61 and "notes.txt exists in the workspace root." in item.summary
62 for item in freshness.evidence
63 )
64 assert any(
65 item.kind == ArtifactEvidenceKind.CONTRADICTED_ASSUMPTION.value
66 and "notes.txt" in item.summary
67 for item in freshness.evidence
68 )
69 assert freshness.evidence_summary
70
71
72 def test_artifact_invalidation_treats_path_separator_variants_as_same_touchpoint() -> None:
73 assessor = ArtifactInvalidationAssessor()
74
75 freshness = assessor.assess(
76 task_statement="Build a multi-file nginx guide.",
77 clarify_text=None,
78 implementation_text=(
79 "# Implementation Plan\n"
80 "- Create 01-getting-started.html in the chapters directory.\n"
81 ),
82 verification_text=(
83 "# Verification Plan\n"
84 "## Acceptance Criteria\n"
85 "- 01-getting-started.html exists.\n"
86 ),
87 acceptance_criteria=["01-getting-started.html exists."],
88 touched_files=["/tmp/chapters/01_getting_started.html"],
89 last_verification_result=None,
90 )
91
92 assert freshness.stale_plan is False
93 assert freshness.stale_brief is False
94 assert "touched_files_outside_plan" not in freshness.reason_codes
95
96
97 def test_artifact_invalidation_allows_supplemental_repair_files_after_failed_verification() -> None:
98 assessor = ArtifactInvalidationAssessor()
99
100 freshness = assessor.assess(
101 task_statement="Build a multi-file nginx guide.",
102 clarify_text=None,
103 implementation_text=(
104 "# Implementation Plan\n"
105 "- Create index.html.\n"
106 "- Create 01-getting-started.html.\n"
107 "- Create 02-installation.html.\n"
108 ),
109 verification_text=(
110 "# Verification Plan\n"
111 "## Acceptance Criteria\n"
112 "- index.html exists.\n"
113 "- 01-getting-started.html exists.\n"
114 "- 02-installation.html exists.\n"
115 ),
116 acceptance_criteria=[
117 "index.html exists.",
118 "01-getting-started.html exists.",
119 "02-installation.html exists.",
120 ],
121 touched_files=[
122 "/tmp/guides/nginx/index.html",
123 "/tmp/guides/nginx/chapters/01-getting-started.html",
124 "/tmp/guides/nginx/chapters/02-installation.html",
125 "/tmp/guides/nginx/styles.css",
126 ],
127 last_verification_result="planned",
128 retry_count=1,
129 planned_artifacts_complete=True,
130 )
131
132 assert freshness.stale_plan is False
133 assert freshness.stale_brief is False
134 assert freshness.recovery_strategy == WorkflowRecoveryStrategy.NONE.value
135 assert "touched_files_outside_plan" not in freshness.reason_codes
136 assert any(
137 item.kind == ArtifactEvidenceKind.CONFIRMED_TOUCHPOINT.value
138 and "styles.css" in item.summary
139 for item in freshness.evidence
140 )
141
142
143 def test_artifact_invalidation_treats_child_files_under_planned_directory_as_in_plan() -> None:
144 assessor = ArtifactInvalidationAssessor()
145
146 freshness = assessor.assess(
147 task_statement="Build a multi-file nginx guide.",
148 clarify_text=None,
149 implementation_text=(
150 "# Implementation Plan\n"
151 "- Create `~/Loader/guides/nginx/index.html`.\n"
152 "- Create `~/Loader/guides/nginx/chapters/`.\n"
153 ),
154 verification_text=(
155 "# Verification Plan\n"
156 "## Acceptance Criteria\n"
157 "- `~/Loader/guides/nginx/index.html` exists.\n"
158 "- Chapter files exist under `~/Loader/guides/nginx/chapters/`.\n"
159 ),
160 acceptance_criteria=[
161 "~/Loader/guides/nginx/index.html exists.",
162 "Chapter files exist under ~/Loader/guides/nginx/chapters/.",
163 ],
164 touched_files=[
165 "/private/tmp/session/Loader/guides/nginx/index.html",
166 "/private/tmp/session/Loader/guides/nginx/chapters/03-configuration.html",
167 ],
168 last_verification_result=None,
169 )
170
171 assert freshness.stale_plan is False
172 assert freshness.recovery_strategy == WorkflowRecoveryStrategy.NONE.value
173 assert "touched_files_outside_plan" not in freshness.reason_codes
174
175
176 def test_artifact_invalidation_keeps_root_level_sibling_files_out_of_plan() -> None:
177 assessor = ArtifactInvalidationAssessor()
178
179 freshness = assessor.assess(
180 task_statement="Implement the runtime report artifact.",
181 clarify_text=None,
182 implementation_text=(
183 "# Implementation Plan\n"
184 "- Create `/tmp/session/planned.txt`.\n"
185 ),
186 verification_text=(
187 "# Verification Plan\n"
188 "## Acceptance Criteria\n"
189 "- `/tmp/session/planned.txt` exists.\n"
190 ),
191 acceptance_criteria=["/tmp/session/planned.txt exists."],
192 touched_files=["/tmp/session/notes.txt"],
193 last_verification_result=None,
194 )
195
196 assert freshness.stale_plan is True
197 assert freshness.recovery_strategy == WorkflowRecoveryStrategy.PLAN_REFRESH.value
198 assert "touched_files_outside_plan" in freshness.reason_codes