Python · 8806 bytes Raw Blame History
1 """Tests for the D3 extras-rollup surface.
2
3 Covers ``report.collect_missing_extras`` (pure extraction) and the
4 terminal/markdown renderers' handling of the resulting footer.
5 """
6
7 from __future__ import annotations
8
9 from datetime import UTC, datetime
10
11 from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict
12 from dlm_sway.suite import report
13
14
15 def _suite_with_messages(messages: list[str]) -> SuiteResult:
16 now = datetime.now(UTC)
17 probes = tuple(
18 ProbeResult(
19 name=f"p{i}",
20 kind="delta_kl",
21 verdict=Verdict.SKIP,
22 score=None,
23 message=msg,
24 )
25 for i, msg in enumerate(messages)
26 )
27 return SuiteResult(
28 spec_path="<test>",
29 started_at=now,
30 finished_at=now,
31 base_model_id="b",
32 adapter_id="a",
33 sway_version="0.0.0",
34 probes=probes,
35 )
36
37
38 class TestCollectMissingExtras:
39 def test_single_extra_single_probe(self) -> None:
40 suite = _suite_with_messages(
41 ["adapter_revert: install the [semsim] extra for sentence embeddings"]
42 )
43 assert report.collect_missing_extras(suite) == ["semsim"]
44
45 def test_multiple_probes_deduplicated(self) -> None:
46 suite = _suite_with_messages(
47 [
48 "install the [semsim] extra",
49 "install the [semsim] extra",
50 "install the [style] extra",
51 ]
52 )
53 assert report.collect_missing_extras(suite) == ["semsim", "style"]
54
55 def test_non_skip_messages_ignored(self) -> None:
56 now = datetime.now(UTC)
57 probes = (
58 ProbeResult(
59 name="p1",
60 kind="delta_kl",
61 verdict=Verdict.PASS,
62 score=1.0,
63 message="install the [semsim] extra",
64 ),
65 )
66 suite = SuiteResult(
67 spec_path="<test>",
68 started_at=now,
69 finished_at=now,
70 base_model_id="b",
71 adapter_id="a",
72 sway_version="0.0.0",
73 probes=probes,
74 )
75 # A PASS probe mentioning install hints in passing must not
76 # pollute the rollup.
77 assert report.collect_missing_extras(suite) == []
78
79 def test_empty_suite_no_extras(self) -> None:
80 now = datetime.now(UTC)
81 suite = SuiteResult(
82 spec_path="<test>",
83 started_at=now,
84 finished_at=now,
85 base_model_id="b",
86 adapter_id="a",
87 sway_version="0.0.0",
88 )
89 assert report.collect_missing_extras(suite) == []
90
91
92 class TestExtrasFooterInMarkdown:
93 def test_footer_includes_pip_command(self) -> None:
94 suite = _suite_with_messages(
95 [
96 "adapter_revert: install the [semsim] extra",
97 "style_fingerprint: install the [style] extra",
98 ]
99 )
100 score = SwayScore(overall=0.0, components={}, band="noise")
101 md = report.to_markdown(suite, score)
102 assert "pip install 'dlm-sway[semsim,style]'" in md
103 assert "Skipped probes" in md
104
105 def test_no_footer_when_no_skips(self) -> None:
106 now = datetime.now(UTC)
107 probes = (
108 ProbeResult(name="p1", kind="delta_kl", verdict=Verdict.PASS, score=0.9, message="ok"),
109 )
110 suite = SuiteResult(
111 spec_path="<test>",
112 started_at=now,
113 finished_at=now,
114 base_model_id="b",
115 adapter_id="a",
116 sway_version="0.0.0",
117 probes=probes,
118 )
119 score = SwayScore(overall=0.9, components={}, band="healthy")
120 md = report.to_markdown(suite, score)
121 assert "Skipped probes" not in md
122
123
124 class TestNullOptOutsRollup:
125 """F15 — surface ``null_adapter.evidence["skipped_kinds"]`` in the report."""
126
127 def _suite_with_null_opt_outs(self, skipped: list[str]) -> SuiteResult:
128 now = datetime.now(UTC)
129 probes = (
130 ProbeResult(
131 name="null",
132 kind="null_adapter",
133 verdict=Verdict.PASS,
134 score=1.0,
135 evidence={"skipped_kinds": skipped},
136 ),
137 ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9, message="ok"),
138 )
139 return SuiteResult(
140 spec_path="<test>",
141 started_at=now,
142 finished_at=now,
143 base_model_id="b",
144 adapter_id="a",
145 sway_version="0.0.0",
146 probes=probes,
147 )
148
149 def test_collect_deduplicates_and_sorts(self) -> None:
150 suite = self._suite_with_null_opt_outs(
151 ["adapter_revert", "prompt_collapse", "adapter_revert"]
152 )
153 assert report.collect_null_opt_outs(suite) == ["adapter_revert", "prompt_collapse"]
154
155 def test_empty_when_no_null_adapter(self) -> None:
156 now = datetime.now(UTC)
157 probes = (
158 ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9, message="ok"),
159 )
160 suite = SuiteResult(
161 spec_path="<test>",
162 started_at=now,
163 finished_at=now,
164 base_model_id="b",
165 adapter_id="a",
166 sway_version="0.0.0",
167 probes=probes,
168 )
169 assert report.collect_null_opt_outs(suite) == []
170
171 def test_markdown_section_appears(self) -> None:
172 suite = self._suite_with_null_opt_outs(["adapter_revert", "prompt_collapse"])
173 score = SwayScore(overall=0.9, components={}, band="healthy")
174 md = report.to_markdown(suite, score)
175 assert "Null-calibration opt-outs" in md
176 assert "`adapter_revert`" in md
177 assert "`prompt_collapse`" in md
178
179 def test_markdown_omits_section_when_none(self) -> None:
180 suite = self._suite_with_null_opt_outs([])
181 score = SwayScore(overall=0.9, components={}, band="healthy")
182 md = report.to_markdown(suite, score)
183 assert "Null-calibration opt-outs" not in md
184
185
186 class TestDegenerateNullRollup:
187 """F02 (Audit 03) — probes whose null-calibration ran but produced
188 a degenerate baseline (std ≈ 0, typically ``runs: 1``) surface in
189 a separate footer rollup so the user sees the actionable fix."""
190
191 def _suite(self, null_stats: dict[str, dict[str, float]]) -> SuiteResult:
192 now = datetime.now(UTC)
193 probes = (
194 ProbeResult(name="null", kind="null_adapter", verdict=Verdict.PASS, score=1.0),
195 ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.5, message="ok"),
196 )
197 return SuiteResult(
198 spec_path="<test>",
199 started_at=now,
200 finished_at=now,
201 base_model_id="b",
202 adapter_id="a",
203 sway_version="0.0.0",
204 probes=probes,
205 null_stats=null_stats,
206 )
207
208 def test_degenerate_flag_surfaces_in_rollup(self) -> None:
209 suite = self._suite(
210 {
211 "delta_kl": {"mean": 0.01, "std": 1e-6, "n": 1.0, "degenerate": 1.0},
212 "leakage": {"mean": 0.0, "std": 1e-6, "n": 1.0, "degenerate": 1.0},
213 }
214 )
215 assert report.collect_degenerate_null_kinds(suite) == ["delta_kl", "leakage"]
216
217 def test_non_degenerate_stats_excluded(self) -> None:
218 suite = self._suite(
219 {
220 "delta_kl": {"mean": 0.01, "std": 0.005, "n": 3.0, "degenerate": 0.0},
221 }
222 )
223 assert report.collect_degenerate_null_kinds(suite) == []
224
225 def test_no_null_adapter_probe_returns_empty(self) -> None:
226 now = datetime.now(UTC)
227 suite = SuiteResult(
228 spec_path="<test>",
229 started_at=now,
230 finished_at=now,
231 base_model_id="b",
232 adapter_id="a",
233 sway_version="0.0.0",
234 probes=(ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9),),
235 )
236 assert report.collect_degenerate_null_kinds(suite) == []
237
238 def test_markdown_section_appears_when_degenerate(self) -> None:
239 suite = self._suite({"leakage": {"mean": 0.0, "std": 1e-6, "n": 1.0, "degenerate": 1.0}})
240 score = SwayScore(overall=0.9, components={}, band="healthy")
241 md = report.to_markdown(suite, score)
242 assert "Degenerate null calibration" in md
243 assert "`leakage`" in md
244 assert "bump `runs:`" in md
245
246 def test_markdown_omits_section_when_none_degenerate(self) -> None:
247 suite = self._suite({"delta_kl": {"mean": 0.0, "std": 0.01, "n": 3.0, "degenerate": 0.0}})
248 score = SwayScore(overall=0.9, components={}, band="healthy")
249 md = report.to_markdown(suite, score)
250 assert "Degenerate null calibration" not in md