tenseleyflow/sway / 8f65acd

Browse files

tests/report: degenerate null rollup coverage (F02)

Authored by espadonne
SHA
8f65acdcee68dd30e84c9aea084342595be55faa
Parents
4e01c30
Tree
b671861

1 changed file

StatusFile+-
M tests/unit/test_report_extras_rollup.py 67 0
tests/unit/test_report_extras_rollup.pymodified
@@ -181,3 +181,70 @@ class TestNullOptOutsRollup:
181181
         score = SwayScore(overall=0.9, components={}, band="healthy")
182182
         md = report.to_markdown(suite, score)
183183
         assert "Null-calibration opt-outs" not in md
184
+
185
+
186
+class TestDegenerateNullRollup:
187
+    """F02 (Audit 03) — probes whose null-calibration ran but produced
188
+    a degenerate baseline (std ≈ 0, typically ``runs: 1``) surface in
189
+    a separate footer rollup so the user sees the actionable fix."""
190
+
191
+    def _suite(self, null_stats: dict[str, dict[str, float]]) -> SuiteResult:
192
+        now = datetime.now(UTC)
193
+        probes = (
194
+            ProbeResult(name="null", kind="null_adapter", verdict=Verdict.PASS, score=1.0),
195
+            ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.5, message="ok"),
196
+        )
197
+        return SuiteResult(
198
+            spec_path="<test>",
199
+            started_at=now,
200
+            finished_at=now,
201
+            base_model_id="b",
202
+            adapter_id="a",
203
+            sway_version="0.0.0",
204
+            probes=probes,
205
+            null_stats=null_stats,
206
+        )
207
+
208
+    def test_degenerate_flag_surfaces_in_rollup(self) -> None:
209
+        suite = self._suite(
210
+            {
211
+                "delta_kl": {"mean": 0.01, "std": 1e-6, "n": 1.0, "degenerate": 1.0},
212
+                "leakage": {"mean": 0.0, "std": 1e-6, "n": 1.0, "degenerate": 1.0},
213
+            }
214
+        )
215
+        assert report.collect_degenerate_null_kinds(suite) == ["delta_kl", "leakage"]
216
+
217
+    def test_non_degenerate_stats_excluded(self) -> None:
218
+        suite = self._suite(
219
+            {
220
+                "delta_kl": {"mean": 0.01, "std": 0.005, "n": 3.0, "degenerate": 0.0},
221
+            }
222
+        )
223
+        assert report.collect_degenerate_null_kinds(suite) == []
224
+
225
+    def test_no_null_adapter_probe_returns_empty(self) -> None:
226
+        now = datetime.now(UTC)
227
+        suite = SuiteResult(
228
+            spec_path="<test>",
229
+            started_at=now,
230
+            finished_at=now,
231
+            base_model_id="b",
232
+            adapter_id="a",
233
+            sway_version="0.0.0",
234
+            probes=(ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9),),
235
+        )
236
+        assert report.collect_degenerate_null_kinds(suite) == []
237
+
238
+    def test_markdown_section_appears_when_degenerate(self) -> None:
239
+        suite = self._suite({"leakage": {"mean": 0.0, "std": 1e-6, "n": 1.0, "degenerate": 1.0}})
240
+        score = SwayScore(overall=0.9, components={}, band="healthy")
241
+        md = report.to_markdown(suite, score)
242
+        assert "Degenerate null calibration" in md
243
+        assert "`leakage`" in md
244
+        assert "bump `runs:`" in md
245
+
246
+    def test_markdown_omits_section_when_none_degenerate(self) -> None:
247
+        suite = self._suite({"delta_kl": {"mean": 0.0, "std": 0.01, "n": 3.0, "degenerate": 0.0}})
248
+        score = SwayScore(overall=0.9, components={}, band="healthy")
249
+        md = report.to_markdown(suite, score)
250
+        assert "Degenerate null calibration" not in md