tenseleyflow/sway / 76fc010

Browse files

tests: prove-value — mined paraphrases flip memorizing adapter PASS→FAIL (S17.6)

Authored by espadonne
SHA
76fc010e98be5db50cc834abdd18ba170ebe3914
Parents
cec8c23
Tree
a5ea174

1 changed file

StatusFile+-
A tests/unit/test_paraphrase_miner_prove_value.py 207 0
tests/unit/test_paraphrase_miner_prove_value.pyadded
@@ -0,0 +1,207 @@
1
+"""F11 prove-the-value: mined paraphrases flip a memorizing adapter's verdict.
2
+
3
+``paraphrase_invariance`` asks "does the adapter lift the gold answer
4
+equally when the prompt is paraphrased?" A **memorizing** adapter
5
+passes cleanly when the hand-written paraphrase list consists of
6
+near-templated rewordings of the seed prompt (the adapter memorized
7
+the seed and generalizes the templated tweaks). The **miner** searches
8
+further out — semantically different rewordings — and surfaces the
9
+paraphrases the adapter *doesn't* lift.
10
+
11
+This test plants exactly that scenario:
12
+
13
+1. **Hand-written paraphrases** are all close-template rewordings.
14
+   The memorizing adapter lifts them ≈ verbatim lift → high
15
+   ``generalization_ratio`` → PASS.
16
+2. **Mined candidates** include semantically distant rewordings the
17
+   memorizing adapter doesn't lift. The miner ranks those first.
18
+3. Substitute the mined paraphrases into the probe's spec and re-run.
19
+   ``generalization_ratio`` collapses → verdict flips to FAIL.
20
+
21
+The F11 claim, reified: the mined list surfaces a concrete gap the
22
+hand-written list missed entirely.
23
+"""
24
+
25
+from __future__ import annotations
26
+
27
+import numpy as np
28
+import pytest
29
+
30
+from dlm_sway.backends.dummy import DummyDifferentialBackend, DummyResponses
31
+from dlm_sway.core.result import Verdict
32
+from dlm_sway.mining.paraphrase_miner import mine_paraphrases
33
+from dlm_sway.probes.base import RunContext, build_probe
34
+
35
+# ---------------------------------------------------------------------
36
+# Scenario constants — a memorizing adapter on one seed case.
37
+# ---------------------------------------------------------------------
38
+
39
+SEED_PROMPT = "The capital of France is"
40
+GOLD = " Paris"
41
+
42
+# Hand-written paraphrases — the kind a well-meaning user types. Close
43
+# to the seed, mostly templated rewordings.
44
+HAND_WRITTEN = [
45
+    "Capital of France:",
46
+    "France capital equals",
47
+]
48
+
49
+# Candidates the miner will pull (stubbed nlpaug output). Mix of
50
+# near-templates (easy) and semantically distant rewordings (hard).
51
+MINER_CANDIDATES = [
52
+    "What is the capital of France?",  # near
53
+    "Tell me about French capital",  # distant
54
+    "Which city governs France?",  # distant
55
+    "Name the primary city in France",  # distant
56
+]
57
+
58
+# Token-lift model: memorizing adapter lifts verbatim + near-templates;
59
+# doesn't lift semantically distant rewordings.
60
+VERBATIM_BASE_LP = -3.0  # per-token logprob on base
61
+VERBATIM_FT_LP = -0.5  # per-token logprob on ft — big lift
62
+NEAR_BASE_LP = -3.0
63
+NEAR_FT_LP = -1.0  # moderate lift (still pattern-matched)
64
+DISTANT_BASE_LP = -3.0
65
+DISTANT_FT_LP = -3.0  # no lift — adapter doesn't recognize
66
+
67
+# Token count estimate: len(gold)//4 = 1 for " Paris"; we need a
68
+# meaningful multiplier so the per-token logprobs translate to
69
+# interpretable lifts. The probe multiplies logprob by token count;
70
+# here the gold is 6 chars → 1 token, so per-token == total.
71
+
72
+
73
+def _prompt_lp_base(prompt: str) -> float:
74
+    """Backend's base-side logprob of ``(prompt, GOLD)``. Mirrors the
75
+    probe's own per-token normalization."""
76
+    return VERBATIM_BASE_LP
77
+
78
+
79
+def _prompt_lp_ft(prompt: str) -> float:
80
+    """ft-side logprob: verbatim + near-templates get lifted; distant
81
+    rewordings don't."""
82
+    if prompt == SEED_PROMPT:
83
+        return VERBATIM_FT_LP
84
+    if prompt in {"Capital of France:", "France capital equals"}:
85
+        return NEAR_FT_LP
86
+    if prompt == "What is the capital of France?":
87
+        return NEAR_FT_LP
88
+    return DISTANT_FT_LP
89
+
90
+
91
+def _memorizing_backend(prompts: list[str]) -> DummyDifferentialBackend:
92
+    base_lp = {(p, GOLD): _prompt_lp_base(p) for p in prompts}
93
+    ft_lp = {(p, GOLD): _prompt_lp_ft(p) for p in prompts}
94
+    return DummyDifferentialBackend(
95
+        base=DummyResponses(logprobs=base_lp),
96
+        ft=DummyResponses(logprobs=ft_lp),
97
+    )
98
+
99
+
100
+def _stub_embedder(monkeypatch: pytest.MonkeyPatch) -> None:
101
+    """Stub the MiniLM embedder — every candidate gets a unique
102
+    orthogonal embedding so the diversity filter keeps all of them
103
+    (and the ranker's decisions are what the test measures)."""
104
+    table = {
105
+        SEED_PROMPT: np.array([1.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float32),
106
+        "What is the capital of France?": np.array([0.0, 1.0, 0.0, 0.0, 0.0], dtype=np.float32),
107
+        "Tell me about French capital": np.array([0.0, 0.0, 1.0, 0.0, 0.0], dtype=np.float32),
108
+        "Which city governs France?": np.array([0.0, 0.0, 0.0, 1.0, 0.0], dtype=np.float32),
109
+        "Name the primary city in France": np.array([0.0, 0.0, 0.0, 0.0, 1.0], dtype=np.float32),
110
+    }
111
+
112
+    def _encode(texts: list[str]) -> np.ndarray:
113
+        return np.stack([table[t] for t in texts])
114
+
115
+    monkeypatch.setattr(
116
+        "dlm_sway.mining.paraphrase_miner._load_embedder",
117
+        lambda _model_id: _encode,  # type: ignore[arg-type]
118
+    )
119
+
120
+
121
+def _run_probe(paraphrases: list[str], all_prompts: list[str]) -> tuple[Verdict, float]:
122
+    """Run paraphrase_invariance with the given paraphrase list and
123
+    return the verdict + the generalization_ratio for the case."""
124
+    backend = _memorizing_backend(all_prompts)
125
+    probe, spec = build_probe(
126
+        {
127
+            "name": "pi",
128
+            "kind": "paraphrase_invariance",
129
+            "cases": [
130
+                {
131
+                    "prompt": SEED_PROMPT,
132
+                    "gold": GOLD,
133
+                    "paraphrases": paraphrases,
134
+                },
135
+            ],
136
+            "intent": "generalize",
137
+            # Default threshold is 0.5 — keep it explicit for the assertion.
138
+            "min_generalization_ratio": 0.5,
139
+            "min_verbatim_lift": 0.2,
140
+        }
141
+    )
142
+    ctx = RunContext(backend=backend)
143
+    result = probe.run(spec, ctx)
144
+    ratio = float(result.evidence["generalization_ratio"])
145
+    return result.verdict, ratio
146
+
147
+
148
+def test_mined_paraphrases_flip_memorizing_adapter_from_pass_to_fail(
149
+    monkeypatch: pytest.MonkeyPatch,
150
+) -> None:
151
+    """The F11 prove-the-value demonstration in concrete form."""
152
+    _stub_embedder(monkeypatch)
153
+
154
+    # 1. Hand-written paraphrases — the memorizing adapter passes.
155
+    all_prompts_hand = [SEED_PROMPT, *HAND_WRITTEN]
156
+    hand_verdict, hand_ratio = _run_probe(HAND_WRITTEN, all_prompts_hand)
157
+    assert hand_verdict == Verdict.PASS, (
158
+        f"memorizing adapter should pass on close-template paraphrases; "
159
+        f"got verdict={hand_verdict}, ratio={hand_ratio:.3f}"
160
+    )
161
+    # Generalization_ratio is well above the 0.5 threshold.
162
+    assert hand_ratio > 0.5, hand_ratio
163
+
164
+    # 2. Mine paraphrases — the miner pulls candidates including
165
+    # semantically distant ones and ranks them by gap.
166
+    miner_backend = _memorizing_backend([SEED_PROMPT, *HAND_WRITTEN, *MINER_CANDIDATES])
167
+
168
+    def _canned(_prompt: str, *, n: int, seed: int) -> list[str]:
169
+        del n, seed
170
+        return list(MINER_CANDIDATES)
171
+
172
+    mined = mine_paraphrases(
173
+        prompt=SEED_PROMPT,
174
+        gold=GOLD,
175
+        backend=miner_backend,
176
+        generate_candidates=_canned,
177
+        n_candidates=4,
178
+        top_k=3,
179
+        seed=0,
180
+    )
181
+
182
+    # The mined list starts with the semantically-distant rewordings
183
+    # (the adapter doesn't lift them → largest gap).
184
+    mined_paraphrases = [c.prompt for c in mined.candidates]
185
+    assert mined_paraphrases[0] in {
186
+        "Tell me about French capital",
187
+        "Which city governs France?",
188
+        "Name the primary city in France",
189
+    }, f"expected a distant reworking at rank 0; got {mined_paraphrases}"
190
+
191
+    # 3. Re-run paraphrase_invariance with the mined paraphrases —
192
+    # verdict must flip to FAIL.
193
+    all_prompts_mined = [SEED_PROMPT, *mined_paraphrases]
194
+    mined_verdict, mined_ratio = _run_probe(mined_paraphrases, all_prompts_mined)
195
+    assert mined_verdict == Verdict.FAIL, (
196
+        f"mined paraphrases should flip the memorizing adapter's verdict; "
197
+        f"got verdict={mined_verdict}, ratio={mined_ratio:.3f}"
198
+    )
199
+    # The generalization_ratio collapses well below the 0.5 threshold.
200
+    assert mined_ratio < 0.5, mined_ratio
201
+
202
+    # And the ratio gap is meaningful — this is the F11 headline number:
203
+    # mined list surfaces a generalization gap the hand-list missed.
204
+    assert hand_ratio - mined_ratio > 0.3, (
205
+        f"expected ≥0.3 ratio gap between hand-list and mined-list; "
206
+        f"got hand={hand_ratio:.3f}, mined={mined_ratio:.3f}"
207
+    )