tenseleyflow/sway / 6346e93

Browse files

tests/leakage: cover the 4 new perturbations + expanded fixture canned-responses (B11)

Authored by espadonne
SHA
6346e933b4126211e3d6667007b6e9851a2781de
Parents
69111b0
Tree
ebea639

1 changed file

StatusFile+-
M tests/unit/test_probe_leakage.py 71 2
tests/unit/test_probe_leakage.pymodified
@@ -31,6 +31,64 @@ class TestPerturb:
3131
     def test_drop_punct_removes_punct(self) -> None:
3232
         assert _perturb("a, b. c!", "drop_punct") == "a b c"
3333
 
34
+    def test_synonym_swap_replaces_first_known_word(self) -> None:
35
+        # "important" → "significant" per the curated table.
36
+        out = _perturb("This is an important fact.", "synonym_swap")
37
+        assert "important" not in out
38
+        assert "significant" in out
39
+
40
+    def test_synonym_swap_preserves_capitalization(self) -> None:
41
+        # Capitalized "Important" → "Significant" (capitalized).
42
+        out = _perturb("Important news today.", "synonym_swap")
43
+        assert out.startswith("Significant")
44
+
45
+    def test_synonym_swap_passthrough_when_no_match(self) -> None:
46
+        # No words in our table appear here.
47
+        text = "Xyzzy frobnitz quux."
48
+        assert _perturb(text, "synonym_swap") == text
49
+
50
+    def test_clause_reverse_swaps_around_comma(self) -> None:
51
+        out = _perturb("First clause, second clause.", "clause_reverse")
52
+        # "second clause" + ", " + "First clause"
53
+        assert out == "second clause, First clause"
54
+
55
+    def test_clause_reverse_passthrough_when_no_separator(self) -> None:
56
+        text = "One simple clause."
57
+        assert _perturb(text, "clause_reverse") == text
58
+
59
+    def test_prefix_inject_prepends_neutral_lead_in(self) -> None:
60
+        out = _perturb("The model said hello.", "prefix_inject")
61
+        assert out.startswith("I think that ")
62
+        # The original first letter gets lower-cased so the sentence reads
63
+        # naturally after the inserted lead-in.
64
+        assert out == "I think that the model said hello."
65
+
66
+    def test_register_shift_lowers_uppercase_head(self) -> None:
67
+        out = _perturb("Hello WORLD this is a sentence with rest.", "register_shift")
68
+        assert out[:30] == "hello world this is a sentence"
69
+
70
+    def test_register_shift_uppers_lowercase_head(self) -> None:
71
+        out = _perturb("hello world this is a sentence with rest.", "register_shift")
72
+        assert out[:30] == "HELLO WORLD THIS IS A SENTENCE"
73
+
74
+
75
+class TestPerturbationsConfigurable:
76
+    def test_default_perturbations_is_seven(self) -> None:
77
+        from dlm_sway.probes.leakage import _default_perturbations
78
+
79
+        assert len(_default_perturbations()) == 7
80
+
81
+    def test_spec_perturbations_field_subset(self) -> None:
82
+        """A spec can request a subset; default is all seven."""
83
+        probe, spec = build_probe(
84
+            {
85
+                "name": "lk",
86
+                "kind": "leakage",
87
+                "perturbations": ["typo", "synonym_swap"],
88
+            }
89
+        )
90
+        assert spec.perturbations == ["typo", "synonym_swap"]
91
+
3492
 
3593
 class TestFragility:
3694
     def test_zero_when_clean_zero(self) -> None:
@@ -59,11 +117,22 @@ def _backend(*, ft_recall: float, ft_perturbed_recall: float) -> DummyDifferenti
59117
     ft_pert = target[: int(ft_perturbed_recall * len(target))]
60118
 
61119
     base = DummyResponses()
120
+    # Cover the prompt at every default perturbation so the probe
121
+    # doesn't KeyError on a missing canned response. After B11 the
122
+    # default set has 7 entries.
123
+    perturbations = (
124
+        "typo",
125
+        "case_flip",
126
+        "drop_punct",
127
+        "synonym_swap",
128
+        "clause_reverse",
129
+        "prefix_inject",
130
+        "register_shift",
131
+    )
62132
     ft = DummyResponses(
63133
         generations={
64134
             content[:128]: ft_full,
65
-            # perturbations of the first 128 chars hit these three:
66
-            **{_perturb(content[:128], p): ft_pert for p in ("typo", "case_flip", "drop_punct")},
135
+            **{_perturb(content[:128], p): ft_pert for p in perturbations},
67136
         }
68137
     )
69138
     return DummyDifferentialBackend(base=base, ft=ft), content