tests/report: degenerate null rollup coverage (F02)
- SHA
8f65acdcee68dd30e84c9aea084342595be55faa- Parents
-
4e01c30 - Tree
b671861
8f65acd
8f65acdcee68dd30e84c9aea084342595be55faa4e01c30
b671861| Status | File | + | - |
|---|---|---|---|
| M |
tests/unit/test_report_extras_rollup.py
|
67 | 0 |
tests/unit/test_report_extras_rollup.pymodified@@ -181,3 +181,70 @@ class TestNullOptOutsRollup: | ||
| 181 | 181 | score = SwayScore(overall=0.9, components={}, band="healthy") |
| 182 | 182 | md = report.to_markdown(suite, score) |
| 183 | 183 | assert "Null-calibration opt-outs" not in md |
| 184 | + | |
| 185 | + | |
| 186 | +class TestDegenerateNullRollup: | |
| 187 | + """F02 (Audit 03) — probes whose null-calibration ran but produced | |
| 188 | + a degenerate baseline (std ≈ 0, typically ``runs: 1``) surface in | |
| 189 | + a separate footer rollup so the user sees the actionable fix.""" | |
| 190 | + | |
| 191 | + def _suite(self, null_stats: dict[str, dict[str, float]]) -> SuiteResult: | |
| 192 | + now = datetime.now(UTC) | |
| 193 | + probes = ( | |
| 194 | + ProbeResult(name="null", kind="null_adapter", verdict=Verdict.PASS, score=1.0), | |
| 195 | + ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.5, message="ok"), | |
| 196 | + ) | |
| 197 | + return SuiteResult( | |
| 198 | + spec_path="<test>", | |
| 199 | + started_at=now, | |
| 200 | + finished_at=now, | |
| 201 | + base_model_id="b", | |
| 202 | + adapter_id="a", | |
| 203 | + sway_version="0.0.0", | |
| 204 | + probes=probes, | |
| 205 | + null_stats=null_stats, | |
| 206 | + ) | |
| 207 | + | |
| 208 | + def test_degenerate_flag_surfaces_in_rollup(self) -> None: | |
| 209 | + suite = self._suite( | |
| 210 | + { | |
| 211 | + "delta_kl": {"mean": 0.01, "std": 1e-6, "n": 1.0, "degenerate": 1.0}, | |
| 212 | + "leakage": {"mean": 0.0, "std": 1e-6, "n": 1.0, "degenerate": 1.0}, | |
| 213 | + } | |
| 214 | + ) | |
| 215 | + assert report.collect_degenerate_null_kinds(suite) == ["delta_kl", "leakage"] | |
| 216 | + | |
| 217 | + def test_non_degenerate_stats_excluded(self) -> None: | |
| 218 | + suite = self._suite( | |
| 219 | + { | |
| 220 | + "delta_kl": {"mean": 0.01, "std": 0.005, "n": 3.0, "degenerate": 0.0}, | |
| 221 | + } | |
| 222 | + ) | |
| 223 | + assert report.collect_degenerate_null_kinds(suite) == [] | |
| 224 | + | |
| 225 | + def test_no_null_adapter_probe_returns_empty(self) -> None: | |
| 226 | + now = datetime.now(UTC) | |
| 227 | + suite = SuiteResult( | |
| 228 | + spec_path="<test>", | |
| 229 | + started_at=now, | |
| 230 | + finished_at=now, | |
| 231 | + base_model_id="b", | |
| 232 | + adapter_id="a", | |
| 233 | + sway_version="0.0.0", | |
| 234 | + probes=(ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9),), | |
| 235 | + ) | |
| 236 | + assert report.collect_degenerate_null_kinds(suite) == [] | |
| 237 | + | |
| 238 | + def test_markdown_section_appears_when_degenerate(self) -> None: | |
| 239 | + suite = self._suite({"leakage": {"mean": 0.0, "std": 1e-6, "n": 1.0, "degenerate": 1.0}}) | |
| 240 | + score = SwayScore(overall=0.9, components={}, band="healthy") | |
| 241 | + md = report.to_markdown(suite, score) | |
| 242 | + assert "Degenerate null calibration" in md | |
| 243 | + assert "`leakage`" in md | |
| 244 | + assert "bump `runs:`" in md | |
| 245 | + | |
| 246 | + def test_markdown_omits_section_when_none_degenerate(self) -> None: | |
| 247 | + suite = self._suite({"delta_kl": {"mean": 0.0, "std": 0.01, "n": 3.0, "degenerate": 0.0}}) | |
| 248 | + score = SwayScore(overall=0.9, components={}, band="healthy") | |
| 249 | + md = report.to_markdown(suite, score) | |
| 250 | + assert "Degenerate null calibration" not in md | |