Python · 4429 bytes Raw Blame History
1 """Tests for :mod:`dlm_sway.probes._zscore`."""
2
3 from __future__ import annotations
4
5 import math
6
7 from dlm_sway.core.result import Verdict
8 from dlm_sway.probes._zscore import (
9 MIN_STD,
10 no_calibration_note,
11 score_from_z,
12 verdict_from_z,
13 z_score,
14 )
15
16
17 class TestZScore:
18 def test_healthy_path(self) -> None:
19 stats = {"mean": 0.01, "std": 0.01, "n": 3.0}
20 z = z_score(0.08, stats)
21 assert z is not None
22 assert math.isclose(z, 7.0)
23
24 def test_raw_below_mean_gives_negative_z(self) -> None:
25 stats = {"mean": 0.05, "std": 0.01}
26 z = z_score(0.02, stats)
27 assert z is not None
28 assert z < 0
29
30 def test_none_stats_returns_none(self) -> None:
31 assert z_score(0.08, None) is None
32
33 def test_std_below_min_returns_none(self) -> None:
34 stats = {"mean": 0.01, "std": 1e-10}
35 assert z_score(0.08, stats) is None
36
37 def test_zero_std_returns_none(self) -> None:
38 stats = {"mean": 0.01, "std": 0.0}
39 assert z_score(0.08, stats) is None
40
41 def test_negative_std_treated_as_invalid(self) -> None:
42 stats = {"mean": 0.01, "std": -0.01}
43 assert z_score(0.08, stats) is None
44
45 def test_missing_mean_returns_none(self) -> None:
46 stats = {"std": 0.01}
47 assert z_score(0.08, stats) is None # type: ignore[arg-type]
48
49 def test_nan_raw_returns_none(self) -> None:
50 stats = {"mean": 0.01, "std": 0.01}
51 assert z_score(math.nan, stats) is None
52
53 def test_inf_raw_returns_none(self) -> None:
54 stats = {"mean": 0.01, "std": 0.01}
55 assert z_score(math.inf, stats) is None
56
57 def test_nan_mean_returns_none(self) -> None:
58 stats = {"mean": math.nan, "std": 0.01}
59 assert z_score(0.08, stats) is None
60
61 def test_min_std_boundary_accepted(self) -> None:
62 stats = {"mean": 0.0, "std": MIN_STD}
63 z = z_score(1.0, stats)
64 assert z is not None
65
66 def test_degenerate_flag_rejects_even_valid_std(self) -> None:
67 """F02 (Audit 03) — when null_adapter marks stats as degenerate
68 (``runs: 1`` or coincidentally-identical seeds), ``z_score``
69 refuses to divide even though the floored std passes MIN_STD.
70 This is what prevents the observed ``+290,766σ`` output on a
71 ``runs: 1`` leakage probe."""
72 stats = {"mean": 0.01, "std": MIN_STD, "degenerate": 1.0}
73 assert z_score(0.30, stats) is None
74
75 def test_non_degenerate_flag_does_not_change_behavior(self) -> None:
76 """A ``degenerate: 0.0`` marker on an otherwise-valid stats
77 dict behaves identically to the no-marker path."""
78 stats = {"mean": 0.01, "std": 0.01, "degenerate": 0.0}
79 assert z_score(0.08, stats) is not None
80
81
82 class TestVerdictFromZ:
83 def test_pass_at_threshold(self) -> None:
84 assert verdict_from_z(3.0, threshold=3.0) == Verdict.PASS
85
86 def test_fail_below_threshold(self) -> None:
87 assert verdict_from_z(2.99, threshold=3.0) == Verdict.FAIL
88
89 def test_high_z_passes(self) -> None:
90 assert verdict_from_z(100.0, threshold=3.0) == Verdict.PASS
91
92 def test_negative_z_fails(self) -> None:
93 assert verdict_from_z(-1.0, threshold=3.0) == Verdict.FAIL
94
95 def test_none_z_returns_none(self) -> None:
96 assert verdict_from_z(None, threshold=3.0) is None
97
98
99 class TestScoreFromZ:
100 def test_z_zero_gives_half(self) -> None:
101 s = score_from_z(0.0)
102 assert s is not None
103 assert math.isclose(s, 0.5)
104
105 def test_z_three_near_optimistic_band(self) -> None:
106 s = score_from_z(3.0)
107 assert s is not None
108 assert 0.7 < s < 0.95
109
110 def test_negative_z_near_zero(self) -> None:
111 s = score_from_z(-10.0)
112 assert s is not None
113 assert s < 0.1
114
115 def test_extreme_positive_clamped(self) -> None:
116 """z=+1000 shouldn't overflow math.exp."""
117 s = score_from_z(1000.0)
118 assert s is not None
119 assert 0.99 < s <= 1.0
120
121 def test_extreme_negative_clamped(self) -> None:
122 s = score_from_z(-1000.0)
123 assert s is not None
124 assert 0.0 <= s < 0.01
125
126 def test_none_returns_none(self) -> None:
127 assert score_from_z(None) is None
128
129
130 class TestNoCalibrationNote:
131 def test_includes_probe_kind(self) -> None:
132 note = no_calibration_note("delta_kl")
133 assert "delta_kl" in note
134 assert "no calibration" in note.lower()