probes/style_fingerprint: detect zero-fp ft as ERROR; replace cosine with projection (B4)
- SHA
cc3896ec6fc4d99d7a92510cb0b7b9b79fd14e7a- Parents
-
0f66b70 - Tree
44a9797
cc3896e
cc3896ec6fc4d99d7a92510cb0b7b9b79fd14e7a0f66b70
44a9797| Status | File | + | - |
|---|---|---|---|
| M |
src/dlm_sway/probes/style_fingerprint.py
|
48 | 8 |
| M |
tests/unit/test_probe_style_fingerprint.py
|
64 | 0 |
src/dlm_sway/probes/style_fingerprint.pymodified@@ -141,9 +141,38 @@ class StyleFingerprintProbe(Probe): | ||
| 141 | 141 | ft_fp = fingerprint("\n".join(ft_samples)) |
| 142 | 142 | doc_fp = fingerprint(doc_text) |
| 143 | 143 | |
| 144 | - shift = _cosine_shift(base_fp, ft_fp, doc_fp) | |
| 144 | + # B4 fix: a degenerate ft fingerprint (all-empty generations → | |
| 145 | + # zeros) used to coincidentally produce a positive cosine shift | |
| 146 | + # because cos(ft-base, doc-base) ≈ cos(-base, doc-base) is often | |
| 147 | + # positive. Detect that case and emit ERROR rather than PASS. | |
| 148 | + ft_is_zero = bool(np.allclose(ft_fp, 0.0)) | |
| 149 | + ft_text_is_empty = all(not s.strip() for s in ft_samples) | |
| 150 | + if ft_is_zero or ft_text_is_empty: | |
| 151 | + return safe_finalize( | |
| 152 | + name=spec.name, | |
| 153 | + kind=spec.kind, | |
| 154 | + verdict=Verdict.ERROR, | |
| 155 | + score=None, | |
| 156 | + raw=None, | |
| 157 | + evidence={ | |
| 158 | + "base_fp": base_fp.tolist(), | |
| 159 | + "ft_fp": ft_fp.tolist(), | |
| 160 | + "doc_fp": doc_fp.tolist(), | |
| 161 | + "ft_text_is_empty": ft_text_is_empty, | |
| 162 | + "ft_fp_is_zero": ft_is_zero, | |
| 163 | + "weight": spec.weight, | |
| 164 | + }, | |
| 165 | + message=( | |
| 166 | + "fine-tuned model produced empty / zero-fingerprint output — " | |
| 167 | + "cannot measure style shift on a degenerate ft view" | |
| 168 | + ), | |
| 169 | + ) | |
| 170 | + | |
| 171 | + shift = _projection_shift(base_fp, ft_fp, doc_fp) | |
| 145 | 172 | verdict = Verdict.PASS if shift >= spec.assert_shift_gte else Verdict.FAIL |
| 146 | - score = float(np.clip((shift + 1.0) / 2.0, 0.0, 1.0)) | |
| 173 | + # Score: 0 at no shift, 1 when ft moves a full doc-gap toward | |
| 174 | + # doc; clamp to [0, 1]. | |
| 175 | + score = float(np.clip(shift, 0.0, 1.0)) | |
| 147 | 176 | |
| 148 | 177 | return safe_finalize( |
| 149 | 178 | name=spec.name, |
@@ -166,14 +195,25 @@ class StyleFingerprintProbe(Probe): | ||
| 166 | 195 | ) |
| 167 | 196 | |
| 168 | 197 | |
| 169 | -def _cosine_shift( | |
| 198 | +def _projection_shift( | |
| 170 | 199 | base: NDArray[np.float64], ft: NDArray[np.float64], doc: NDArray[np.float64] |
| 171 | 200 | ) -> float: |
| 172 | - """Cosine between (ft - base) and (doc - base) in fingerprint space.""" | |
| 201 | + """Project (ft - base) onto (doc - base), normalized by ||doc - base||². | |
| 202 | + | |
| 203 | + Returns ``((ft - base) · (doc - base)) / ||doc - base||²``. Properties: | |
| 204 | + | |
| 205 | + - ``ft == base`` → 0 (no shift) | |
| 206 | + - ``ft == doc`` → 1 (ft moved a full doc-gap toward doc) | |
| 207 | + - ``ft`` moved opposite to doc → negative | |
| 208 | + - ``doc == base`` (no doc gap to measure) → 0 | |
| 209 | + | |
| 210 | + This replaces the older ``cos(ft-base, doc-base)`` which silently | |
| 211 | + treated a zero ft-shift as a phantom positive correlation when | |
| 212 | + ``-base`` happened to point in roughly the doc direction (B4). | |
| 213 | + """ | |
| 173 | 214 | a = ft - base |
| 174 | 215 | b = doc - base |
| 175 | - na = float(np.linalg.norm(a)) | |
| 176 | - nb = float(np.linalg.norm(b)) | |
| 177 | - if na == 0.0 or nb == 0.0: | |
| 216 | + nb_sq = float(np.dot(b, b)) | |
| 217 | + if nb_sq == 0.0: | |
| 178 | 218 | return 0.0 |
| 179 | - return float(np.dot(a, b) / (na * nb)) | |
| 219 | + return float(np.dot(a, b) / nb_sq) | |
tests/unit/test_probe_style_fingerprint.pymodified@@ -113,3 +113,67 @@ class TestProbe: | ||
| 113 | 113 | ctx = RunContext(backend=backend) |
| 114 | 114 | result = probe.run(spec, ctx) |
| 115 | 115 | assert result.verdict == Verdict.ERROR |
| 116 | + | |
| 117 | + | |
| 118 | +class TestB4ZeroFtFingerprint: | |
| 119 | + """Pins the B4 fix: a degenerate ft (empty generations / zero | |
| 120 | + fingerprint) must NOT pass — must produce ERROR with a clear | |
| 121 | + message. The historical bug used cos(ft-base, doc-base) which | |
| 122 | + coincidentally aligned with -base when ft was zero, producing | |
| 123 | + spurious +0.82 PASS verdicts.""" | |
| 124 | + | |
| 125 | + def test_empty_ft_generations_route_to_error(self) -> None: | |
| 126 | + base_samples = ["Some real prose. With multiple sentences."] * 2 | |
| 127 | + ft_samples = ["", ""] # broken ft model produces no text | |
| 128 | + doc = "Wherein clauses conjoin into meandering wholes." | |
| 129 | + backend = _backend_with_samples(base_samples, ft_samples) | |
| 130 | + probe, spec = build_probe( | |
| 131 | + { | |
| 132 | + "name": "c1", | |
| 133 | + "kind": "style_fingerprint", | |
| 134 | + "prompts": ["p0", "p1"], | |
| 135 | + "doc_reference": doc, | |
| 136 | + "assert_shift_gte": 0.0, | |
| 137 | + } | |
| 138 | + ) | |
| 139 | + ctx = RunContext(backend=backend) | |
| 140 | + result = probe.run(spec, ctx) | |
| 141 | + assert result.verdict == Verdict.ERROR | |
| 142 | + assert "empty" in result.message.lower() or "degenerate" in result.message.lower() | |
| 143 | + # Evidence preserves the fingerprints for postmortem. | |
| 144 | + assert result.evidence["ft_text_is_empty"] is True | |
| 145 | + | |
| 146 | + def test_whitespace_only_ft_generations_route_to_error(self) -> None: | |
| 147 | + base_samples = ["Some real prose."] * 2 | |
| 148 | + ft_samples = [" ", "\n\n"] | |
| 149 | + backend = _backend_with_samples(base_samples, ft_samples) | |
| 150 | + probe, spec = build_probe( | |
| 151 | + { | |
| 152 | + "name": "c1", | |
| 153 | + "kind": "style_fingerprint", | |
| 154 | + "prompts": ["p0", "p1"], | |
| 155 | + "doc_reference": "doc", | |
| 156 | + } | |
| 157 | + ) | |
| 158 | + ctx = RunContext(backend=backend) | |
| 159 | + result = probe.run(spec, ctx) | |
| 160 | + assert result.verdict == Verdict.ERROR | |
| 161 | + | |
| 162 | + def test_projection_shift_zero_when_ft_equals_base(self) -> None: | |
| 163 | + """ft == base → 0 shift, regardless of where doc sits.""" | |
| 164 | + same = "Same prose. Same words." | |
| 165 | + backend = _backend_with_samples([same, same], [same, same]) | |
| 166 | + probe, spec = build_probe( | |
| 167 | + { | |
| 168 | + "name": "c1", | |
| 169 | + "kind": "style_fingerprint", | |
| 170 | + "prompts": ["p0", "p1"], | |
| 171 | + "doc_reference": "Wholly different doc style with many words.", | |
| 172 | + "assert_shift_gte": 0.01, | |
| 173 | + } | |
| 174 | + ) | |
| 175 | + ctx = RunContext(backend=backend) | |
| 176 | + result = probe.run(spec, ctx) | |
| 177 | + # ft fp == base fp → projection is exactly 0. | |
| 178 | + assert result.raw == 0.0 | |
| 179 | + assert result.verdict == Verdict.FAIL # no shift, gate fails | |