| 1 | """Tests for :mod:`dlm_sway.probes._zscore`.""" |
| 2 | |
| 3 | from __future__ import annotations |
| 4 | |
| 5 | import math |
| 6 | |
| 7 | from dlm_sway.core.result import Verdict |
| 8 | from dlm_sway.probes._zscore import ( |
| 9 | MIN_STD, |
| 10 | no_calibration_note, |
| 11 | score_from_z, |
| 12 | verdict_from_z, |
| 13 | z_score, |
| 14 | ) |
| 15 | |
| 16 | |
| 17 | class TestZScore: |
| 18 | def test_healthy_path(self) -> None: |
| 19 | stats = {"mean": 0.01, "std": 0.01, "n": 3.0} |
| 20 | z = z_score(0.08, stats) |
| 21 | assert z is not None |
| 22 | assert math.isclose(z, 7.0) |
| 23 | |
| 24 | def test_raw_below_mean_gives_negative_z(self) -> None: |
| 25 | stats = {"mean": 0.05, "std": 0.01} |
| 26 | z = z_score(0.02, stats) |
| 27 | assert z is not None |
| 28 | assert z < 0 |
| 29 | |
| 30 | def test_none_stats_returns_none(self) -> None: |
| 31 | assert z_score(0.08, None) is None |
| 32 | |
| 33 | def test_std_below_min_returns_none(self) -> None: |
| 34 | stats = {"mean": 0.01, "std": 1e-10} |
| 35 | assert z_score(0.08, stats) is None |
| 36 | |
| 37 | def test_zero_std_returns_none(self) -> None: |
| 38 | stats = {"mean": 0.01, "std": 0.0} |
| 39 | assert z_score(0.08, stats) is None |
| 40 | |
| 41 | def test_negative_std_treated_as_invalid(self) -> None: |
| 42 | stats = {"mean": 0.01, "std": -0.01} |
| 43 | assert z_score(0.08, stats) is None |
| 44 | |
| 45 | def test_missing_mean_returns_none(self) -> None: |
| 46 | stats = {"std": 0.01} |
| 47 | assert z_score(0.08, stats) is None # type: ignore[arg-type] |
| 48 | |
| 49 | def test_nan_raw_returns_none(self) -> None: |
| 50 | stats = {"mean": 0.01, "std": 0.01} |
| 51 | assert z_score(math.nan, stats) is None |
| 52 | |
| 53 | def test_inf_raw_returns_none(self) -> None: |
| 54 | stats = {"mean": 0.01, "std": 0.01} |
| 55 | assert z_score(math.inf, stats) is None |
| 56 | |
| 57 | def test_nan_mean_returns_none(self) -> None: |
| 58 | stats = {"mean": math.nan, "std": 0.01} |
| 59 | assert z_score(0.08, stats) is None |
| 60 | |
| 61 | def test_min_std_boundary_accepted(self) -> None: |
| 62 | stats = {"mean": 0.0, "std": MIN_STD} |
| 63 | z = z_score(1.0, stats) |
| 64 | assert z is not None |
| 65 | |
| 66 | def test_degenerate_flag_rejects_even_valid_std(self) -> None: |
| 67 | """F02 (Audit 03) — when null_adapter marks stats as degenerate |
| 68 | (``runs: 1`` or coincidentally-identical seeds), ``z_score`` |
| 69 | refuses to divide even though the floored std passes MIN_STD. |
| 70 | This is what prevents the observed ``+290,766σ`` output on a |
| 71 | ``runs: 1`` leakage probe.""" |
| 72 | stats = {"mean": 0.01, "std": MIN_STD, "degenerate": 1.0} |
| 73 | assert z_score(0.30, stats) is None |
| 74 | |
| 75 | def test_non_degenerate_flag_does_not_change_behavior(self) -> None: |
| 76 | """A ``degenerate: 0.0`` marker on an otherwise-valid stats |
| 77 | dict behaves identically to the no-marker path.""" |
| 78 | stats = {"mean": 0.01, "std": 0.01, "degenerate": 0.0} |
| 79 | assert z_score(0.08, stats) is not None |
| 80 | |
| 81 | |
| 82 | class TestVerdictFromZ: |
| 83 | def test_pass_at_threshold(self) -> None: |
| 84 | assert verdict_from_z(3.0, threshold=3.0) == Verdict.PASS |
| 85 | |
| 86 | def test_fail_below_threshold(self) -> None: |
| 87 | assert verdict_from_z(2.99, threshold=3.0) == Verdict.FAIL |
| 88 | |
| 89 | def test_high_z_passes(self) -> None: |
| 90 | assert verdict_from_z(100.0, threshold=3.0) == Verdict.PASS |
| 91 | |
| 92 | def test_negative_z_fails(self) -> None: |
| 93 | assert verdict_from_z(-1.0, threshold=3.0) == Verdict.FAIL |
| 94 | |
| 95 | def test_none_z_returns_none(self) -> None: |
| 96 | assert verdict_from_z(None, threshold=3.0) is None |
| 97 | |
| 98 | |
| 99 | class TestScoreFromZ: |
| 100 | def test_z_zero_gives_half(self) -> None: |
| 101 | s = score_from_z(0.0) |
| 102 | assert s is not None |
| 103 | assert math.isclose(s, 0.5) |
| 104 | |
| 105 | def test_z_three_near_optimistic_band(self) -> None: |
| 106 | s = score_from_z(3.0) |
| 107 | assert s is not None |
| 108 | assert 0.7 < s < 0.95 |
| 109 | |
| 110 | def test_negative_z_near_zero(self) -> None: |
| 111 | s = score_from_z(-10.0) |
| 112 | assert s is not None |
| 113 | assert s < 0.1 |
| 114 | |
| 115 | def test_extreme_positive_clamped(self) -> None: |
| 116 | """z=+1000 shouldn't overflow math.exp.""" |
| 117 | s = score_from_z(1000.0) |
| 118 | assert s is not None |
| 119 | assert 0.99 < s <= 1.0 |
| 120 | |
| 121 | def test_extreme_negative_clamped(self) -> None: |
| 122 | s = score_from_z(-1000.0) |
| 123 | assert s is not None |
| 124 | assert 0.0 <= s < 0.01 |
| 125 | |
| 126 | def test_none_returns_none(self) -> None: |
| 127 | assert score_from_z(None) is None |
| 128 | |
| 129 | |
| 130 | class TestNoCalibrationNote: |
| 131 | def test_includes_probe_kind(self) -> None: |
| 132 | note = no_calibration_note("delta_kl") |
| 133 | assert "delta_kl" in note |
| 134 | assert "no calibration" in note.lower() |