Python · 6661 bytes Raw Blame History
1 """Tests for :mod:`dlm_sway.probes.leakage`."""
2
3 from __future__ import annotations
4
5 from dlm_sway.backends.dummy import DummyDifferentialBackend, DummyResponses
6 from dlm_sway.core.result import Verdict
7 from dlm_sway.core.sections import Section
8 from dlm_sway.probes.base import RunContext, build_probe
9 from dlm_sway.probes.leakage import _fragility, _lcs_ratio, _perturb
10
11
12 class TestLCS:
13 def test_identical_returns_one(self) -> None:
14 assert _lcs_ratio("abcdef", "abcdef") == 1.0
15
16 def test_disjoint_returns_low(self) -> None:
17 assert _lcs_ratio("abc", "xyz") < 0.3
18
19 def test_empty_returns_zero(self) -> None:
20 assert _lcs_ratio("", "abc") == 0.0
21
22
23 class TestPerturb:
24 def test_typo_swaps_first_two(self) -> None:
25 assert _perturb("hello", "typo") == "ehllo"
26
27 def test_case_flip_inverts_first_alpha(self) -> None:
28 assert _perturb("abc", "case_flip") == "Abc"
29 assert _perturb("ABC", "case_flip") == "aBC"
30
31 def test_drop_punct_removes_punct(self) -> None:
32 assert _perturb("a, b. c!", "drop_punct") == "a b c"
33
34 def test_synonym_swap_replaces_first_known_word(self) -> None:
35 # "important" → "significant" per the curated table.
36 out = _perturb("This is an important fact.", "synonym_swap")
37 assert "important" not in out
38 assert "significant" in out
39
40 def test_synonym_swap_preserves_capitalization(self) -> None:
41 # Capitalized "Important" → "Significant" (capitalized).
42 out = _perturb("Important news today.", "synonym_swap")
43 assert out.startswith("Significant")
44
45 def test_synonym_swap_passthrough_when_no_match(self) -> None:
46 # No words in our table appear here.
47 text = "Xyzzy frobnitz quux."
48 assert _perturb(text, "synonym_swap") == text
49
50 def test_clause_reverse_swaps_around_comma(self) -> None:
51 out = _perturb("First clause, second clause.", "clause_reverse")
52 # "second clause" + ", " + "First clause"
53 assert out == "second clause, First clause"
54
55 def test_clause_reverse_passthrough_when_no_separator(self) -> None:
56 text = "One simple clause."
57 assert _perturb(text, "clause_reverse") == text
58
59 def test_prefix_inject_prepends_neutral_lead_in(self) -> None:
60 out = _perturb("The model said hello.", "prefix_inject")
61 assert out.startswith("I think that ")
62 # The original first letter gets lower-cased so the sentence reads
63 # naturally after the inserted lead-in.
64 assert out == "I think that the model said hello."
65
66 def test_register_shift_lowers_uppercase_head(self) -> None:
67 out = _perturb("Hello WORLD this is a sentence with rest.", "register_shift")
68 assert out[:30] == "hello world this is a sentence"
69
70 def test_register_shift_uppers_lowercase_head(self) -> None:
71 out = _perturb("hello world this is a sentence with rest.", "register_shift")
72 assert out[:30] == "HELLO WORLD THIS IS A SENTENCE"
73
74
75 class TestPerturbationsConfigurable:
76 def test_default_perturbations_is_seven(self) -> None:
77 from dlm_sway.probes.leakage import _default_perturbations
78
79 assert len(_default_perturbations()) == 7
80
81 def test_spec_perturbations_field_subset(self) -> None:
82 """A spec can request a subset; default is all seven."""
83 probe, spec = build_probe(
84 {
85 "name": "lk",
86 "kind": "leakage",
87 "perturbations": ["typo", "synonym_swap"],
88 }
89 )
90 assert spec.perturbations == ["typo", "synonym_swap"]
91
92
93 class TestFragility:
94 def test_zero_when_clean_zero(self) -> None:
95 assert _fragility(0.0, 0.0) == 0.0
96
97 def test_expected_when_perturbed_dropped(self) -> None:
98 import pytest
99
100 assert _fragility(0.8, 0.2) == pytest.approx(0.75)
101
102
103 def _prose_section(sid: str, content: str) -> Section:
104 return Section(id=sid, kind="prose", content=content)
105
106
107 def _backend(*, ft_recall: float, ft_perturbed_recall: float) -> DummyDifferentialBackend:
108 """Build a backend whose ft generate() returns a controlled prefix of ``target``.
109
110 The target is "aaa..." (200 chars) so we can measure LCS ratio
111 against it deterministically.
112 """
113 content = ("The capital of France is Paris. " * 30).strip()
114 # Generate a fraction of the target to hit the desired recall.
115 target = content[128 : 128 + 256]
116 ft_full = target[: int(ft_recall * len(target))]
117 ft_pert = target[: int(ft_perturbed_recall * len(target))]
118
119 base = DummyResponses()
120 # Cover the prompt at every default perturbation so the probe
121 # doesn't KeyError on a missing canned response. After B11 the
122 # default set has 7 entries.
123 perturbations = (
124 "typo",
125 "case_flip",
126 "drop_punct",
127 "synonym_swap",
128 "clause_reverse",
129 "prefix_inject",
130 "register_shift",
131 )
132 ft = DummyResponses(
133 generations={
134 content[:128]: ft_full,
135 **{_perturb(content[:128], p): ft_pert for p in perturbations},
136 }
137 )
138 return DummyDifferentialBackend(base=base, ft=ft), content
139
140
141 class TestProbe:
142 def test_skip_without_sections(self) -> None:
143 backend, _ = _backend(ft_recall=0.0, ft_perturbed_recall=0.0)
144 probe, spec = build_probe({"name": "c3", "kind": "leakage"})
145 ctx = RunContext(backend=backend)
146 result = probe.run(spec, ctx)
147 assert result.verdict == Verdict.SKIP
148
149 def test_pass_when_no_leak(self) -> None:
150 backend, content = _backend(ft_recall=0.0, ft_perturbed_recall=0.0)
151 probe, spec = build_probe(
152 {
153 "name": "c3",
154 "kind": "leakage",
155 "prefix_chars": 128,
156 "continuation_chars": 256,
157 }
158 )
159 ctx = RunContext(backend=backend, sections=(_prose_section("a", content),))
160 result = probe.run(spec, ctx)
161 assert result.verdict == Verdict.PASS
162
163 def test_fail_when_strong_low_fragility_leak(self) -> None:
164 backend, content = _backend(ft_recall=0.95, ft_perturbed_recall=0.9)
165 probe, spec = build_probe(
166 {
167 "name": "c3",
168 "kind": "leakage",
169 "prefix_chars": 128,
170 "continuation_chars": 256,
171 "assert_recall_lt": 0.5,
172 "min_fragility": 0.3,
173 }
174 )
175 ctx = RunContext(backend=backend, sections=(_prose_section("a", content),))
176 result = probe.run(spec, ctx)
177 # High recall + low fragility → fail.
178 assert result.verdict == Verdict.FAIL