tenseleyflow/sway / cc3896e

Browse files

probes/style_fingerprint: detect zero-fp ft as ERROR; replace cosine with projection (B4)

Authored by espadonne
SHA
cc3896ec6fc4d99d7a92510cb0b7b9b79fd14e7a
Parents
0f66b70
Tree
44a9797

2 changed files

StatusFile+-
M src/dlm_sway/probes/style_fingerprint.py 48 8
M tests/unit/test_probe_style_fingerprint.py 64 0
src/dlm_sway/probes/style_fingerprint.pymodified
@@ -141,9 +141,38 @@ class StyleFingerprintProbe(Probe):
141141
         ft_fp = fingerprint("\n".join(ft_samples))
142142
         doc_fp = fingerprint(doc_text)
143143
 
144
-        shift = _cosine_shift(base_fp, ft_fp, doc_fp)
144
+        # B4 fix: a degenerate ft fingerprint (all-empty generations →
145
+        # zeros) used to coincidentally produce a positive cosine shift
146
+        # because cos(ft-base, doc-base) ≈ cos(-base, doc-base) is often
147
+        # positive. Detect that case and emit ERROR rather than PASS.
148
+        ft_is_zero = bool(np.allclose(ft_fp, 0.0))
149
+        ft_text_is_empty = all(not s.strip() for s in ft_samples)
150
+        if ft_is_zero or ft_text_is_empty:
151
+            return safe_finalize(
152
+                name=spec.name,
153
+                kind=spec.kind,
154
+                verdict=Verdict.ERROR,
155
+                score=None,
156
+                raw=None,
157
+                evidence={
158
+                    "base_fp": base_fp.tolist(),
159
+                    "ft_fp": ft_fp.tolist(),
160
+                    "doc_fp": doc_fp.tolist(),
161
+                    "ft_text_is_empty": ft_text_is_empty,
162
+                    "ft_fp_is_zero": ft_is_zero,
163
+                    "weight": spec.weight,
164
+                },
165
+                message=(
166
+                    "fine-tuned model produced empty / zero-fingerprint output — "
167
+                    "cannot measure style shift on a degenerate ft view"
168
+                ),
169
+            )
170
+
171
+        shift = _projection_shift(base_fp, ft_fp, doc_fp)
145172
         verdict = Verdict.PASS if shift >= spec.assert_shift_gte else Verdict.FAIL
146
-        score = float(np.clip((shift + 1.0) / 2.0, 0.0, 1.0))
173
+        # Score: 0 at no shift, 1 when ft moves a full doc-gap toward
174
+        # doc; clamp to [0, 1].
175
+        score = float(np.clip(shift, 0.0, 1.0))
147176
 
148177
         return safe_finalize(
149178
             name=spec.name,
@@ -166,14 +195,25 @@ class StyleFingerprintProbe(Probe):
166195
         )
167196
 
168197
 
169
-def _cosine_shift(
198
+def _projection_shift(
170199
     base: NDArray[np.float64], ft: NDArray[np.float64], doc: NDArray[np.float64]
171200
 ) -> float:
172
-    """Cosine between (ft - base) and (doc - base) in fingerprint space."""
201
+    """Project (ft - base) onto (doc - base), normalized by ||doc - base||².
202
+
203
+    Returns ``((ft - base) · (doc - base)) / ||doc - base||²``. Properties:
204
+
205
+    - ``ft == base`` → 0 (no shift)
206
+    - ``ft == doc`` → 1 (ft moved a full doc-gap toward doc)
207
+    - ``ft`` moved opposite to doc → negative
208
+    - ``doc == base`` (no doc gap to measure) → 0
209
+
210
+    This replaces the older ``cos(ft-base, doc-base)`` which silently
211
+    treated a zero ft-shift as a phantom positive correlation when
212
+    ``-base`` happened to point in roughly the doc direction (B4).
213
+    """
173214
     a = ft - base
174215
     b = doc - base
175
-    na = float(np.linalg.norm(a))
176
-    nb = float(np.linalg.norm(b))
177
-    if na == 0.0 or nb == 0.0:
216
+    nb_sq = float(np.dot(b, b))
217
+    if nb_sq == 0.0:
178218
         return 0.0
179
-    return float(np.dot(a, b) / (na * nb))
219
+    return float(np.dot(a, b) / nb_sq)
tests/unit/test_probe_style_fingerprint.pymodified
@@ -113,3 +113,67 @@ class TestProbe:
113113
         ctx = RunContext(backend=backend)
114114
         result = probe.run(spec, ctx)
115115
         assert result.verdict == Verdict.ERROR
116
+
117
+
118
+class TestB4ZeroFtFingerprint:
119
+    """Pins the B4 fix: a degenerate ft (empty generations / zero
120
+    fingerprint) must NOT pass — must produce ERROR with a clear
121
+    message. The historical bug used cos(ft-base, doc-base) which
122
+    coincidentally aligned with -base when ft was zero, producing
123
+    spurious +0.82 PASS verdicts."""
124
+
125
+    def test_empty_ft_generations_route_to_error(self) -> None:
126
+        base_samples = ["Some real prose. With multiple sentences."] * 2
127
+        ft_samples = ["", ""]  # broken ft model produces no text
128
+        doc = "Wherein clauses conjoin into meandering wholes."
129
+        backend = _backend_with_samples(base_samples, ft_samples)
130
+        probe, spec = build_probe(
131
+            {
132
+                "name": "c1",
133
+                "kind": "style_fingerprint",
134
+                "prompts": ["p0", "p1"],
135
+                "doc_reference": doc,
136
+                "assert_shift_gte": 0.0,
137
+            }
138
+        )
139
+        ctx = RunContext(backend=backend)
140
+        result = probe.run(spec, ctx)
141
+        assert result.verdict == Verdict.ERROR
142
+        assert "empty" in result.message.lower() or "degenerate" in result.message.lower()
143
+        # Evidence preserves the fingerprints for postmortem.
144
+        assert result.evidence["ft_text_is_empty"] is True
145
+
146
+    def test_whitespace_only_ft_generations_route_to_error(self) -> None:
147
+        base_samples = ["Some real prose."] * 2
148
+        ft_samples = ["   ", "\n\n"]
149
+        backend = _backend_with_samples(base_samples, ft_samples)
150
+        probe, spec = build_probe(
151
+            {
152
+                "name": "c1",
153
+                "kind": "style_fingerprint",
154
+                "prompts": ["p0", "p1"],
155
+                "doc_reference": "doc",
156
+            }
157
+        )
158
+        ctx = RunContext(backend=backend)
159
+        result = probe.run(spec, ctx)
160
+        assert result.verdict == Verdict.ERROR
161
+
162
+    def test_projection_shift_zero_when_ft_equals_base(self) -> None:
163
+        """ft == base → 0 shift, regardless of where doc sits."""
164
+        same = "Same prose. Same words."
165
+        backend = _backend_with_samples([same, same], [same, same])
166
+        probe, spec = build_probe(
167
+            {
168
+                "name": "c1",
169
+                "kind": "style_fingerprint",
170
+                "prompts": ["p0", "p1"],
171
+                "doc_reference": "Wholly different doc style with many words.",
172
+                "assert_shift_gte": 0.01,
173
+            }
174
+        )
175
+        ctx = RunContext(backend=backend)
176
+        result = probe.run(spec, ctx)
177
+        # ft fp == base fp → projection is exactly 0.
178
+        assert result.raw == 0.0
179
+        assert result.verdict == Verdict.FAIL  # no shift, gate fails