`f0b5d50`

sway(probes): C1 style_fingerprint (6-dim, numpy-only)

Authored by

espadonne 3 weeks ago

SHA: f0b5d50d49353608a8d9360870092ced21daa52f
Parents: 1b49ff0
Tree: dadfd7a

2 changed files

Status	File	+	-
A	`src/dlm_sway/probes/style_fingerprint.py`	179	0
A	`tests/unit/test_probe_style_fingerprint.py`	115	0

src/dlm_sway/probes/style_fingerprint.pyadded

++"""C1 StyleFingerprint — does ft prose *read* like the doc?
++
++Generates base and ft completions from a set of stylistic prompts,
++extracts a 6-dimensional fingerprint from each, and measures how the ft
++fingerprint has shifted **toward** the training document's own
++fingerprint vs the base.
++
++We compute the fingerprint with numpy-only features so the probe works
++out of the box without spaCy/textstat. The optional ``style`` extra
++upgrades the fingerprint with passive-voice rate and POS-entropy in a
++later milestone; the numeric contract — a non-negative vector per text
++— is stable across that upgrade.
++
++Signal: ``style_shift = cos(ft_fp - base_fp, doc_fp - base_fp)`` in
++fingerprint space. Positive values mean ft has moved *toward* the
++doc's style; negative values mean it moved *away* (a bad sign);
++near-zero means no stylistic shift detectable.
++"""
++
++from __future__ import annotations
++
++import re
++import statistics
++from typing import Literal
++
++import numpy as np
++from numpy.typing import NDArray
++from pydantic import Field
++
++from dlm_sway.core.result import ProbeResult, Verdict
++from dlm_sway.probes.base import Probe, ProbeSpec, RunContext
++
++_SENTENCE_SPLIT = re.compile(r"(?<=[.!?])\s+")
++_PARAGRAPH_SPLIT = re.compile(r"\n\s*\n")
++_WORD_RE = re.compile(r"\b[A-Za-z][A-Za-z'-]*\b")
++_PUNCTS = set(".,:;!?-—()[]\"'/")
++
++
++def fingerprint(text: str) -> NDArray[np.float64]:
++    """Return a 6-dim stylistic fingerprint for ``text``.
++
++    Dimensions (all numeric, scaled to order-1):
++      0. mean sentence length (words)  / 30.0
++      1. std sentence length (words)   / 30.0
++      2. type-token ratio              (already in [0,1])
++      3. avg word length (chars)       / 10.0
++      4. punctuation density per char  * 10.0
++      5. paragraph density (1 / avg paragraph length in words) * 30.0
++    """
++    if not text.strip():
++        return np.zeros(6, dtype=np.float64)
++
++    sentences = [s for s in _SENTENCE_SPLIT.split(text) if s.strip()]
++    paragraphs = [p for p in _PARAGRAPH_SPLIT.split(text) if p.strip()]
++    words = _WORD_RE.findall(text)
++    if not words:
++        return np.zeros(6, dtype=np.float64)
++
++    sentence_word_counts = [len(_WORD_RE.findall(s)) for s in sentences]
++    sentence_word_counts = [c for c in sentence_word_counts if c > 0]
++    if not sentence_word_counts:
++        sentence_word_counts = [len(words)]
++
++    mean_sent = statistics.fmean(sentence_word_counts)
++    std_sent = statistics.pstdev(sentence_word_counts) if len(sentence_word_counts) > 1 else 0.0
++    ttr = len({w.lower() for w in words}) / len(words)
++    avg_word_len = statistics.fmean(len(w) for w in words)
++    punct_count = sum(ch in _PUNCTS for ch in text)
++    punct_density = punct_count / max(len(text), 1)
++    avg_paragraph_len = (
++        statistics.fmean(len(_WORD_RE.findall(p)) for p in paragraphs) if paragraphs else len(words)
++    )
++    paragraph_density = 1.0 / max(avg_paragraph_len, 1.0)
++
++    return np.asarray(
++        [
++            mean_sent / 30.0,
++            std_sent / 30.0,
++            ttr,
++            avg_word_len / 10.0,
++            punct_density * 10.0,
++            paragraph_density * 30.0,
++        ],
++        dtype=np.float64,
++    )
++
++
++class StyleFingerprintSpec(ProbeSpec):
++    kind: Literal["style_fingerprint"] = "style_fingerprint"
++    prompts: list[str] = Field(default_factory=list)
++    """Prompts used to elicit a stylistic sample from each model."""
++    doc_reference: str = ""
++    """Concatenated reference text representing the adapter's intended
++    style. Typically the document itself; the .dlm bridge supplies this
++    from ``ctx.doc_text`` when left empty."""
++    max_new_tokens: int = 128
++    assert_shift_gte: float = 0.25
++    """Minimum cosine shift for PASS. ``0.25`` is a deliberately
++    permissive default — stylistic shift is a weaker signal than
++    perplexity lift."""
++
++
++class StyleFingerprintProbe(Probe):
++    kind = "style_fingerprint"
++    spec_cls = StyleFingerprintSpec
++    category = "calibration"
++
++    def run(self, spec: ProbeSpec, ctx: RunContext) -> ProbeResult:
++        assert isinstance(spec, StyleFingerprintSpec)
++        if not spec.prompts:
++            return ProbeResult(
++                name=spec.name,
++                kind=spec.kind,
++                verdict=Verdict.ERROR,
++                score=None,
++                message="no prompts provided",
++            )
++        doc_text = spec.doc_reference or (ctx.doc_text or "")
++        if not doc_text.strip():
++            return ProbeResult(
++                name=spec.name,
++                kind=spec.kind,
++                verdict=Verdict.SKIP,
++                score=None,
++                message="no doc_reference (inline or from ctx.doc_text)",
++            )
++
++        base_samples: list[str] = []
++        ft_samples: list[str] = []
++        for prompt in spec.prompts:
++            with ctx.backend.as_base() as b:
++                base_samples.append(
++                    b.generate(prompt, max_new_tokens=spec.max_new_tokens, seed=ctx.seed)
++                )
++            with ctx.backend.as_finetuned() as f:
++                ft_samples.append(
++                    f.generate(prompt, max_new_tokens=spec.max_new_tokens, seed=ctx.seed)
++                )
++
++        base_fp = fingerprint("\n".join(base_samples))
++        ft_fp = fingerprint("\n".join(ft_samples))
++        doc_fp = fingerprint(doc_text)
++
++        shift = _cosine_shift(base_fp, ft_fp, doc_fp)
++        verdict = Verdict.PASS if shift >= spec.assert_shift_gte else Verdict.FAIL
++        score = float(np.clip((shift + 1.0) / 2.0, 0.0, 1.0))
++
++        return ProbeResult(
++            name=spec.name,
++            kind=spec.kind,
++            verdict=verdict,
++            score=score,
++            raw=shift,
++            evidence={
++                "base_fp": base_fp.tolist(),
++                "ft_fp": ft_fp.tolist(),
++                "doc_fp": doc_fp.tolist(),
++                "style_shift": shift,
++                "weight": spec.weight,
++            },
++            message=(
++                f"style_shift={shift:+.2f} "
++                f"({'toward' if shift > 0 else 'away from'} doc, "
++                f"threshold={spec.assert_shift_gte})"
++            ),
++        )
++
++
++def _cosine_shift(
++    base: NDArray[np.float64], ft: NDArray[np.float64], doc: NDArray[np.float64]
++) -> float:
++    """Cosine between (ft - base) and (doc - base) in fingerprint space."""
++    a = ft - base
++    b = doc - base
++    na = float(np.linalg.norm(a))
++    nb = float(np.linalg.norm(b))
++    if na == 0.0 or nb == 0.0:
++        return 0.0
++    return float(np.dot(a, b) / (na * nb))

tests/unit/test_probe_style_fingerprint.pyadded

++"""Tests for :mod:`dlm_sway.probes.style_fingerprint`."""
++
++from __future__ import annotations
++
++import numpy as np
++
++from dlm_sway.backends.dummy import DummyDifferentialBackend, DummyResponses
++from dlm_sway.core.result import Verdict
++from dlm_sway.probes.base import RunContext, build_probe
++from dlm_sway.probes.style_fingerprint import fingerprint
++
++
++class TestFingerprint:
++    def test_zero_vector_for_empty(self) -> None:
++        fp = fingerprint("")
++        assert fp.shape == (6,)
++        assert np.allclose(fp, 0.0)
++
++    def test_non_zero_for_normal_text(self) -> None:
++        fp = fingerprint("This is a sentence. This is another one. A third.")
++        assert fp.shape == (6,)
++        assert fp[0] > 0  # mean sentence length
++        assert fp[2] > 0  # TTR
++        assert fp[3] > 0  # avg word length
++
++    def test_distinct_styles_distinct_fingerprints(self) -> None:
++        terse = "Go. Now. Quick."
++        verbose = (
++            "We must, with all deliberate speed and measured consideration, "
++            "proceed expeditiously towards the elaborated and carefully "
++            "constructed resolution of the foregoing matter."
++        )
++        assert not np.allclose(fingerprint(terse), fingerprint(verbose))
++
++
++def _backend_with_samples(base: list[str], ft: list[str]) -> DummyDifferentialBackend:
++    return DummyDifferentialBackend(
++        base=DummyResponses(generations={f"p{i}": s for i, s in enumerate(base)}),
++        ft=DummyResponses(generations={f"p{i}": s for i, s in enumerate(ft)}),
++    )
++
++
++class TestProbe:
++    def test_pass_when_ft_drifts_toward_doc(self) -> None:
++        base_samples = ["Short. Plain. Words."] * 2
++        ft_samples = [
++            "Wherein many clauses conjoin themselves, through extended "
++            "ruminations, unto a meandering whole of considerable length."
++        ] * 2
++        doc = (
++            "Wherein many clauses conjoin themselves, through extended "
++            "ruminations, unto a meandering whole of considerable length. "
++            "Further elaboration, no less copious, follows apace."
++        )
++        backend = _backend_with_samples(base_samples, ft_samples)
++        probe, spec = build_probe(
++            {
++                "name": "c1",
++                "kind": "style_fingerprint",
++                "prompts": ["p0", "p1"],
++                "doc_reference": doc,
++                "max_new_tokens": 32,
++                "assert_shift_gte": 0.2,
++            }
++        )
++        ctx = RunContext(backend=backend)
++        result = probe.run(spec, ctx)
++        assert result.verdict == Verdict.PASS
++        assert result.raw is not None
++        assert result.raw > 0.2
++
++    def test_fail_when_no_stylistic_shift(self) -> None:
++        base_samples = ["Short. Plain. Words."] * 2
++        ft_samples = ["Short. Plain. Words."] * 2
++        doc = "Wherein clauses conjoin into meandering wholes of length."
++        backend = _backend_with_samples(base_samples, ft_samples)
++        probe, spec = build_probe(
++            {
++                "name": "c1",
++                "kind": "style_fingerprint",
++                "prompts": ["p0", "p1"],
++                "doc_reference": doc,
++                "assert_shift_gte": 0.25,
++            }
++        )
++        ctx = RunContext(backend=backend)
++        result = probe.run(spec, ctx)
++        assert result.verdict == Verdict.FAIL
++
++    def test_skip_without_doc_reference(self) -> None:
++        backend = _backend_with_samples(["x"], ["y"])
++        probe, spec = build_probe(
++            {
++                "name": "c1",
++                "kind": "style_fingerprint",
++                "prompts": ["p0"],
++            }
++        )
++        ctx = RunContext(backend=backend)
++        result = probe.run(spec, ctx)
++        assert result.verdict == Verdict.SKIP
++
++    def test_error_on_empty_prompts(self) -> None:
++        backend = _backend_with_samples([], [])
++        probe, spec = build_probe(
++            {
++                "name": "c1",
++                "kind": "style_fingerprint",
++                "prompts": [],
++                "doc_reference": "doc",
++            }
++        )
++        ctx = RunContext(backend=backend)
++        result = probe.run(spec, ctx)
++        assert result.verdict == Verdict.ERROR