| 1 | """Tests for semantic artifact invalidation and recovery selection.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | from loader.runtime.artifact_invalidation import ( |
| 6 | ArtifactInvalidationAssessor, |
| 7 | WorkflowRecoveryStrategy, |
| 8 | ) |
| 9 | from loader.runtime.workflow import ArtifactEvidenceKind |
| 10 | |
| 11 | |
| 12 | def test_artifact_invalidation_requests_plan_refresh_for_plan_only_drift() -> None: |
| 13 | assessor = ArtifactInvalidationAssessor() |
| 14 | |
| 15 | freshness = assessor.assess( |
| 16 | task_statement="Implement the runtime report artifact.", |
| 17 | clarify_text=None, |
| 18 | implementation_text="# Implementation Plan\n- Create report.md only\n", |
| 19 | verification_text="# Verification Plan\n- report.md exists\n", |
| 20 | acceptance_criteria=["report.md exists"], |
| 21 | touched_files=["/tmp/notes.md"], |
| 22 | last_verification_result=None, |
| 23 | ) |
| 24 | |
| 25 | assert freshness.stale_plan is True |
| 26 | assert freshness.stale_brief is False |
| 27 | assert freshness.recovery_strategy == WorkflowRecoveryStrategy.PLAN_REFRESH.value |
| 28 | assert "touched_files_outside_plan" in freshness.reason_codes |
| 29 | assert any( |
| 30 | item.kind == ArtifactEvidenceKind.CONFIRMED_TOUCHPOINT.value |
| 31 | and "notes.md" in item.summary |
| 32 | for item in freshness.evidence |
| 33 | ) |
| 34 | assert any( |
| 35 | item.kind == ArtifactEvidenceKind.ACCEPTANCE_ANCHOR.value |
| 36 | and "report.md exists" in item.summary |
| 37 | for item in freshness.evidence |
| 38 | ) |
| 39 | |
| 40 | |
| 41 | def test_artifact_invalidation_can_force_full_replan_when_brief_and_plan_drift() -> None: |
| 42 | assessor = ArtifactInvalidationAssessor() |
| 43 | |
| 44 | freshness = assessor.assess( |
| 45 | task_statement="Improve Loader runtime workflow discipline.", |
| 46 | clarify_text="# Task Brief\n\n## Desired Outcome\n- Improve Loader runtime workflow.\n", |
| 47 | implementation_text="# Implementation Plan\n- Touch planned.txt only\n", |
| 48 | verification_text="# Verification Plan\n## Acceptance Criteria\n- planned.txt exists.\n", |
| 49 | acceptance_criteria=["notes.txt exists in the workspace root."], |
| 50 | touched_files=["/tmp/notes.txt"], |
| 51 | last_verification_result="failed", |
| 52 | ) |
| 53 | |
| 54 | assert freshness.stale_plan is True |
| 55 | assert freshness.stale_brief is True |
| 56 | assert freshness.recovery_strategy == WorkflowRecoveryStrategy.FULL_REPLAN.value |
| 57 | assert "touchpoints_outside_brief" in freshness.reason_codes |
| 58 | assert "acceptance_criteria_outside_plan" in freshness.reason_codes |
| 59 | assert any( |
| 60 | item.kind == ArtifactEvidenceKind.VERIFICATION_CONTRADICTION.value |
| 61 | and "notes.txt exists in the workspace root." in item.summary |
| 62 | for item in freshness.evidence |
| 63 | ) |
| 64 | assert any( |
| 65 | item.kind == ArtifactEvidenceKind.CONTRADICTED_ASSUMPTION.value |
| 66 | and "notes.txt" in item.summary |
| 67 | for item in freshness.evidence |
| 68 | ) |
| 69 | assert freshness.evidence_summary |
| 70 | |
| 71 | |
| 72 | def test_artifact_invalidation_treats_path_separator_variants_as_same_touchpoint() -> None: |
| 73 | assessor = ArtifactInvalidationAssessor() |
| 74 | |
| 75 | freshness = assessor.assess( |
| 76 | task_statement="Build a multi-file nginx guide.", |
| 77 | clarify_text=None, |
| 78 | implementation_text=( |
| 79 | "# Implementation Plan\n" |
| 80 | "- Create 01-getting-started.html in the chapters directory.\n" |
| 81 | ), |
| 82 | verification_text=( |
| 83 | "# Verification Plan\n" |
| 84 | "## Acceptance Criteria\n" |
| 85 | "- 01-getting-started.html exists.\n" |
| 86 | ), |
| 87 | acceptance_criteria=["01-getting-started.html exists."], |
| 88 | touched_files=["/tmp/chapters/01_getting_started.html"], |
| 89 | last_verification_result=None, |
| 90 | ) |
| 91 | |
| 92 | assert freshness.stale_plan is False |
| 93 | assert freshness.stale_brief is False |
| 94 | assert "touched_files_outside_plan" not in freshness.reason_codes |
| 95 | |
| 96 | |
| 97 | def test_artifact_invalidation_allows_supplemental_repair_files_after_failed_verification() -> None: |
| 98 | assessor = ArtifactInvalidationAssessor() |
| 99 | |
| 100 | freshness = assessor.assess( |
| 101 | task_statement="Build a multi-file nginx guide.", |
| 102 | clarify_text=None, |
| 103 | implementation_text=( |
| 104 | "# Implementation Plan\n" |
| 105 | "- Create index.html.\n" |
| 106 | "- Create 01-getting-started.html.\n" |
| 107 | "- Create 02-installation.html.\n" |
| 108 | ), |
| 109 | verification_text=( |
| 110 | "# Verification Plan\n" |
| 111 | "## Acceptance Criteria\n" |
| 112 | "- index.html exists.\n" |
| 113 | "- 01-getting-started.html exists.\n" |
| 114 | "- 02-installation.html exists.\n" |
| 115 | ), |
| 116 | acceptance_criteria=[ |
| 117 | "index.html exists.", |
| 118 | "01-getting-started.html exists.", |
| 119 | "02-installation.html exists.", |
| 120 | ], |
| 121 | touched_files=[ |
| 122 | "/tmp/guides/nginx/index.html", |
| 123 | "/tmp/guides/nginx/chapters/01-getting-started.html", |
| 124 | "/tmp/guides/nginx/chapters/02-installation.html", |
| 125 | "/tmp/guides/nginx/styles.css", |
| 126 | ], |
| 127 | last_verification_result="planned", |
| 128 | retry_count=1, |
| 129 | planned_artifacts_complete=True, |
| 130 | ) |
| 131 | |
| 132 | assert freshness.stale_plan is False |
| 133 | assert freshness.stale_brief is False |
| 134 | assert freshness.recovery_strategy == WorkflowRecoveryStrategy.NONE.value |
| 135 | assert "touched_files_outside_plan" not in freshness.reason_codes |
| 136 | assert any( |
| 137 | item.kind == ArtifactEvidenceKind.CONFIRMED_TOUCHPOINT.value |
| 138 | and "styles.css" in item.summary |
| 139 | for item in freshness.evidence |
| 140 | ) |
| 141 | |
| 142 | |
| 143 | def test_artifact_invalidation_treats_child_files_under_planned_directory_as_in_plan() -> None: |
| 144 | assessor = ArtifactInvalidationAssessor() |
| 145 | |
| 146 | freshness = assessor.assess( |
| 147 | task_statement="Build a multi-file nginx guide.", |
| 148 | clarify_text=None, |
| 149 | implementation_text=( |
| 150 | "# Implementation Plan\n" |
| 151 | "- Create `~/Loader/guides/nginx/index.html`.\n" |
| 152 | "- Create `~/Loader/guides/nginx/chapters/`.\n" |
| 153 | ), |
| 154 | verification_text=( |
| 155 | "# Verification Plan\n" |
| 156 | "## Acceptance Criteria\n" |
| 157 | "- `~/Loader/guides/nginx/index.html` exists.\n" |
| 158 | "- Chapter files exist under `~/Loader/guides/nginx/chapters/`.\n" |
| 159 | ), |
| 160 | acceptance_criteria=[ |
| 161 | "~/Loader/guides/nginx/index.html exists.", |
| 162 | "Chapter files exist under ~/Loader/guides/nginx/chapters/.", |
| 163 | ], |
| 164 | touched_files=[ |
| 165 | "/private/tmp/session/Loader/guides/nginx/index.html", |
| 166 | "/private/tmp/session/Loader/guides/nginx/chapters/03-configuration.html", |
| 167 | ], |
| 168 | last_verification_result=None, |
| 169 | ) |
| 170 | |
| 171 | assert freshness.stale_plan is False |
| 172 | assert freshness.recovery_strategy == WorkflowRecoveryStrategy.NONE.value |
| 173 | assert "touched_files_outside_plan" not in freshness.reason_codes |
| 174 | |
| 175 | |
| 176 | def test_artifact_invalidation_keeps_root_level_sibling_files_out_of_plan() -> None: |
| 177 | assessor = ArtifactInvalidationAssessor() |
| 178 | |
| 179 | freshness = assessor.assess( |
| 180 | task_statement="Implement the runtime report artifact.", |
| 181 | clarify_text=None, |
| 182 | implementation_text=( |
| 183 | "# Implementation Plan\n" |
| 184 | "- Create `/tmp/session/planned.txt`.\n" |
| 185 | ), |
| 186 | verification_text=( |
| 187 | "# Verification Plan\n" |
| 188 | "## Acceptance Criteria\n" |
| 189 | "- `/tmp/session/planned.txt` exists.\n" |
| 190 | ), |
| 191 | acceptance_criteria=["/tmp/session/planned.txt exists."], |
| 192 | touched_files=["/tmp/session/notes.txt"], |
| 193 | last_verification_result=None, |
| 194 | ) |
| 195 | |
| 196 | assert freshness.stale_plan is True |
| 197 | assert freshness.recovery_strategy == WorkflowRecoveryStrategy.PLAN_REFRESH.value |
| 198 | assert "touched_files_outside_plan" in freshness.reason_codes |