sway Public

Watch 0 Fork 0 Star 0

Python · 4429 bytes Raw Blame History

  
        1
        """Tests for :mod:`dlm_sway.probes._zscore`."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        import math
      
        6
        
        7
        from dlm_sway.core.result import Verdict
      
        8
        from dlm_sway.probes._zscore import (
      
        9
            MIN_STD,
      
        10
            no_calibration_note,
      
        11
            score_from_z,
      
        12
            verdict_from_z,
      
        13
            z_score,
      
        14
        )
      
        15
        
        16
        
        17
        class TestZScore:
      
        18
            def test_healthy_path(self) -> None:
      
        19
                stats = {"mean": 0.01, "std": 0.01, "n": 3.0}
      
        20
                z = z_score(0.08, stats)
      
        21
                assert z is not None
      
        22
                assert math.isclose(z, 7.0)
      
        23
        
        24
            def test_raw_below_mean_gives_negative_z(self) -> None:
      
        25
                stats = {"mean": 0.05, "std": 0.01}
      
        26
                z = z_score(0.02, stats)
      
        27
                assert z is not None
      
        28
                assert z < 0
      
        29
        
        30
            def test_none_stats_returns_none(self) -> None:
      
        31
                assert z_score(0.08, None) is None
      
        32
        
        33
            def test_std_below_min_returns_none(self) -> None:
      
        34
                stats = {"mean": 0.01, "std": 1e-10}
      
        35
                assert z_score(0.08, stats) is None
      
        36
        
        37
            def test_zero_std_returns_none(self) -> None:
      
        38
                stats = {"mean": 0.01, "std": 0.0}
      
        39
                assert z_score(0.08, stats) is None
      
        40
        
        41
            def test_negative_std_treated_as_invalid(self) -> None:
      
        42
                stats = {"mean": 0.01, "std": -0.01}
      
        43
                assert z_score(0.08, stats) is None
      
        44
        
        45
            def test_missing_mean_returns_none(self) -> None:
      
        46
                stats = {"std": 0.01}
      
        47
                assert z_score(0.08, stats) is None  # type: ignore[arg-type]
      
        48
        
        49
            def test_nan_raw_returns_none(self) -> None:
      
        50
                stats = {"mean": 0.01, "std": 0.01}
      
        51
                assert z_score(math.nan, stats) is None
      
        52
        
        53
            def test_inf_raw_returns_none(self) -> None:
      
        54
                stats = {"mean": 0.01, "std": 0.01}
      
        55
                assert z_score(math.inf, stats) is None
      
        56
        
        57
            def test_nan_mean_returns_none(self) -> None:
      
        58
                stats = {"mean": math.nan, "std": 0.01}
      
        59
                assert z_score(0.08, stats) is None
      
        60
        
        61
            def test_min_std_boundary_accepted(self) -> None:
      
        62
                stats = {"mean": 0.0, "std": MIN_STD}
      
        63
                z = z_score(1.0, stats)
      
        64
                assert z is not None
      
        65
        
        66
            def test_degenerate_flag_rejects_even_valid_std(self) -> None:
      
        67
                """F02 (Audit 03) — when null_adapter marks stats as degenerate
      
        68
                (``runs: 1`` or coincidentally-identical seeds), ``z_score``
      
        69
                refuses to divide even though the floored std passes MIN_STD.
      
        70
                This is what prevents the observed ``+290,766σ`` output on a
      
        71
                ``runs: 1`` leakage probe."""
      
        72
                stats = {"mean": 0.01, "std": MIN_STD, "degenerate": 1.0}
      
        73
                assert z_score(0.30, stats) is None
      
        74
        
        75
            def test_non_degenerate_flag_does_not_change_behavior(self) -> None:
      
        76
                """A ``degenerate: 0.0`` marker on an otherwise-valid stats
      
        77
                dict behaves identically to the no-marker path."""
      
        78
                stats = {"mean": 0.01, "std": 0.01, "degenerate": 0.0}
      
        79
                assert z_score(0.08, stats) is not None
      
        80
        
        81
        
        82
        class TestVerdictFromZ:
      
        83
            def test_pass_at_threshold(self) -> None:
      
        84
                assert verdict_from_z(3.0, threshold=3.0) == Verdict.PASS
      
        85
        
        86
            def test_fail_below_threshold(self) -> None:
      
        87
                assert verdict_from_z(2.99, threshold=3.0) == Verdict.FAIL
      
        88
        
        89
            def test_high_z_passes(self) -> None:
      
        90
                assert verdict_from_z(100.0, threshold=3.0) == Verdict.PASS
      
        91
        
        92
            def test_negative_z_fails(self) -> None:
      
        93
                assert verdict_from_z(-1.0, threshold=3.0) == Verdict.FAIL
      
        94
        
        95
            def test_none_z_returns_none(self) -> None:
      
        96
                assert verdict_from_z(None, threshold=3.0) is None
      
        97
        
        98
        
        99
        class TestScoreFromZ:
      
        100
            def test_z_zero_gives_half(self) -> None:
      
        101
                s = score_from_z(0.0)
      
        102
                assert s is not None
      
        103
                assert math.isclose(s, 0.5)
      
        104
        
        105
            def test_z_three_near_optimistic_band(self) -> None:
      
        106
                s = score_from_z(3.0)
      
        107
                assert s is not None
      
        108
                assert 0.7 < s < 0.95
      
        109
        
        110
            def test_negative_z_near_zero(self) -> None:
      
        111
                s = score_from_z(-10.0)
      
        112
                assert s is not None
      
        113
                assert s < 0.1
      
        114
        
        115
            def test_extreme_positive_clamped(self) -> None:
      
        116
                """z=+1000 shouldn't overflow math.exp."""
      
        117
                s = score_from_z(1000.0)
      
        118
                assert s is not None
      
        119
                assert 0.99 < s <= 1.0
      
        120
        
        121
            def test_extreme_negative_clamped(self) -> None:
      
        122
                s = score_from_z(-1000.0)
      
        123
                assert s is not None
      
        124
                assert 0.0 <= s < 0.01
      
        125
        
        126
            def test_none_returns_none(self) -> None:
      
        127
                assert score_from_z(None) is None
      
        128
        
        129
        
        130
        class TestNoCalibrationNote:
      
        131
            def test_includes_probe_kind(self) -> None:
      
        132
                note = no_calibration_note("delta_kl")
      
        133
                assert "delta_kl" in note
      
        134
                assert "no calibration" in note.lower()

1	"""Tests for :mod:`dlm_sway.probes._zscore`."""
2
3	from __future__ import annotations
4
5	import math
6
7	from dlm_sway.core.result import Verdict
8	from dlm_sway.probes._zscore import (
9	MIN_STD,
10	no_calibration_note,
11	score_from_z,
12	verdict_from_z,
13	z_score,
14	)
15
16
17	class TestZScore:
18	def test_healthy_path(self) -> None:
19	stats = {"mean": 0.01, "std": 0.01, "n": 3.0}
20	z = z_score(0.08, stats)
21	assert z is not None
22	assert math.isclose(z, 7.0)
23
24	def test_raw_below_mean_gives_negative_z(self) -> None:
25	stats = {"mean": 0.05, "std": 0.01}
26	z = z_score(0.02, stats)
27	assert z is not None
28	assert z < 0
29
30	def test_none_stats_returns_none(self) -> None:
31	assert z_score(0.08, None) is None
32
33	def test_std_below_min_returns_none(self) -> None:
34	stats = {"mean": 0.01, "std": 1e-10}
35	assert z_score(0.08, stats) is None
36
37	def test_zero_std_returns_none(self) -> None:
38	stats = {"mean": 0.01, "std": 0.0}
39	assert z_score(0.08, stats) is None
40
41	def test_negative_std_treated_as_invalid(self) -> None:
42	stats = {"mean": 0.01, "std": -0.01}
43	assert z_score(0.08, stats) is None
44
45	def test_missing_mean_returns_none(self) -> None:
46	stats = {"std": 0.01}
47	assert z_score(0.08, stats) is None # type: ignore[arg-type]
48
49	def test_nan_raw_returns_none(self) -> None:
50	stats = {"mean": 0.01, "std": 0.01}
51	assert z_score(math.nan, stats) is None
52
53	def test_inf_raw_returns_none(self) -> None:
54	stats = {"mean": 0.01, "std": 0.01}
55	assert z_score(math.inf, stats) is None
56
57	def test_nan_mean_returns_none(self) -> None:
58	stats = {"mean": math.nan, "std": 0.01}
59	assert z_score(0.08, stats) is None
60
61	def test_min_std_boundary_accepted(self) -> None:
62	stats = {"mean": 0.0, "std": MIN_STD}
63	z = z_score(1.0, stats)
64	assert z is not None
65
66	def test_degenerate_flag_rejects_even_valid_std(self) -> None:
67	"""F02 (Audit 03) — when null_adapter marks stats as degenerate
68	(``runs: 1`` or coincidentally-identical seeds), ``z_score``
69	refuses to divide even though the floored std passes MIN_STD.
70	This is what prevents the observed ``+290,766σ`` output on a
71	``runs: 1`` leakage probe."""
72	stats = {"mean": 0.01, "std": MIN_STD, "degenerate": 1.0}
73	assert z_score(0.30, stats) is None
74
75	def test_non_degenerate_flag_does_not_change_behavior(self) -> None:
76	"""A ``degenerate: 0.0`` marker on an otherwise-valid stats
77	dict behaves identically to the no-marker path."""
78	stats = {"mean": 0.01, "std": 0.01, "degenerate": 0.0}
79	assert z_score(0.08, stats) is not None
80
81
82	class TestVerdictFromZ:
83	def test_pass_at_threshold(self) -> None:
84	assert verdict_from_z(3.0, threshold=3.0) == Verdict.PASS
85
86	def test_fail_below_threshold(self) -> None:
87	assert verdict_from_z(2.99, threshold=3.0) == Verdict.FAIL
88
89	def test_high_z_passes(self) -> None:
90	assert verdict_from_z(100.0, threshold=3.0) == Verdict.PASS
91
92	def test_negative_z_fails(self) -> None:
93	assert verdict_from_z(-1.0, threshold=3.0) == Verdict.FAIL
94
95	def test_none_z_returns_none(self) -> None:
96	assert verdict_from_z(None, threshold=3.0) is None
97
98
99	class TestScoreFromZ:
100	def test_z_zero_gives_half(self) -> None:
101	s = score_from_z(0.0)
102	assert s is not None
103	assert math.isclose(s, 0.5)
104
105	def test_z_three_near_optimistic_band(self) -> None:
106	s = score_from_z(3.0)
107	assert s is not None
108	assert 0.7 < s < 0.95
109
110	def test_negative_z_near_zero(self) -> None:
111	s = score_from_z(-10.0)
112	assert s is not None
113	assert s < 0.1
114
115	def test_extreme_positive_clamped(self) -> None:
116	"""z=+1000 shouldn't overflow math.exp."""
117	s = score_from_z(1000.0)
118	assert s is not None
119	assert 0.99 < s <= 1.0
120
121	def test_extreme_negative_clamped(self) -> None:
122	s = score_from_z(-1000.0)
123	assert s is not None
124	assert 0.0 <= s < 0.01
125
126	def test_none_returns_none(self) -> None:
127	assert score_from_z(None) is None
128
129
130	class TestNoCalibrationNote:
131	def test_includes_probe_kind(self) -> None:
132	note = no_calibration_note("delta_kl")
133	assert "delta_kl" in note
134	assert "no calibration" in note.lower()