`c666cbf`

sway(probes): real NullAdapterProbe calibration — 3 seeds, JS divergence

Authored by

espadonne 3 weeks ago

SHA: c666cbfdb1109527c06897f5f52708ece6aba816
Parents: 93c3098
Tree: 8fcd8b2

3 changed files

Status	File	+	-
M	`src/dlm_sway/probes/null_adapter.py`	92	36
A	`tests/unit/test_null_calibration.py`	123	0
M	`tests/unit/test_suite_runner.py`	6	5

src/dlm_sway/probes/null_adapter.pymodified

  Every numeric primitive reports its raw metric *and* a z-score against a
  null-adapter distribution. This probe is the runtime engine that
--establishes that distribution — running each configured primitive
++establishes that distribution — it builds random-init "null" adapters
--against a series of random-init-style "null" adapters (structurally
++(structurally identical to the real adapter but with weights drawn from
--identical to the real adapter but with weights indistinguishable from
++a Gaussian) and measures how much signal they produce.
--noise) and caching the resulting ``(mean, std, n)`` per primitive kind.
++
--
++The resulting ``(mean, std, n)`` per kind is attached to this probe's
--The heavy lifting — materializing random-init LoRAs on the loaded model
++``evidence["null_stats"]``. The runner picks it up and threads it into
--and running probes with them — lives in the HF backend (later
++:attr:`RunContext.null_stats`, where every downstream probe can read it
--milestone). For now this module ships the spec + the lookup API that
++and turn a raw metric into a z-score.
--probes will use to z-score their results once stats are populated.
++
++Backends that don't implement :class:`~dlm_sway.core.scoring.NullCalibratedBackend`
++cause this probe to :attr:`Verdict.SKIP` — downstream probes fall back
++to their fixed thresholds in that case.
  """
  from __future__ import annotations
++import statistics
  from typing import Literal
  from pydantic import Field
  from dlm_sway.core.result import ProbeResult, Verdict
++from dlm_sway.core.scoring import NullCalibratedBackend
++from dlm_sway.probes._divergence import divergence
  from dlm_sway.probes.base import Probe, ProbeSpec, RunContext
  class NullAdapterSpec(ProbeSpec):
      """Spec for ``kind: null_adapter``.
--    This is a meta-probe: it doesn't test the adapter, it calibrates
++    Authors place this probe **first** in the suite so its output
--    *other* probes. Place it first in the suite so its output is in
++    populates :attr:`RunContext.null_stats` before subsequent probes
--    :attr:`~dlm_sway.probes.base.RunContext.null_stats` when later
++    consult it.
--    probes run.
      """
      kind: Literal["null_adapter"] = "null_adapter"
      runs: int = Field(default=3, ge=1, le=10)
      """Number of independent null adapters to evaluate. Three is the
--    smallest that gives a usable std estimate; more is better but quickly
++    smallest that yields a usable std; more is better but quickly
      dominates suite runtime."""
--    rank: int | None = None
++    prompts: list[str] = Field(default_factory=list)
--    """LoRA rank for the null adapter. ``None`` → match the real adapter."""
++    """Prompt set for null calibration. Keep small — calibration runs
--    alpha: int | None = None
++    ``runs × len(prompts)`` forward passes. 4–8 prompts is typical.
--    """LoRA alpha. ``None`` → match the real adapter."""
++    If empty, a minimal built-in prompt set is used so the probe
++    always produces stats."""
      init_scale: float = 0.02
--    """Standard deviation of the zero-mean Gaussian used to init
++    """Stddev of the zero-mean Gaussian used to fill lora_A/lora_B."""
--    lora_A/lora_B. Matches typical post-init scale."""
++    seed_base: int = 1000
++    """First seed; successive runs use ``seed_base + run_idx``."""
++
++
++_DEFAULT_PROMPTS: tuple[str, ...] = (
++    "The quick brown fox",
++    "Once upon a time",
++    "In this document we explain",
++    "The key takeaway is",
++    "An important point to remember",
++)
  class NullAdapterProbe(Probe):
--    """Populate ``ctx.null_stats``; report a :attr:`Verdict.SKIP` verdict itself.
++    """Populate ``ctx.null_stats``; report a :attr:`Verdict.PASS` verdict itself.
--    The probe never fails on its own terms — its *job* is calibration,
++    The probe never fails on its own terms — its *job* is calibration.
--    not judgment. Downstream probes consult
++    Downstream probes pick up :attr:`RunContext.null_stats` keyed by
--    :meth:`get_null_stats` to turn their raw metric into a z-score.
++    probe kind (``delta_kl``, ``adapter_ablation`` …) and use the
++    populated mean/std to z-score their own raw metrics.
      """
      kind = "null_adapter"
      category = "baseline"
      def run(self, spec: ProbeSpec, ctx: RunContext) -> ProbeResult:
--        # Concrete null-adapter materialization is backend-specific. For
--        # the HF backend it will build random-init LoRAs with matched
--        # rank/alpha. That path is wired in a later milestone; this probe
--        # currently reports SKIP so suite composition stays stable.
--        del ctx  # unused until HF-level materialization lands
          assert isinstance(spec, NullAdapterSpec)
++        if not isinstance(ctx.backend, NullCalibratedBackend):
++            return ProbeResult(
++                name=spec.name,
++                kind=spec.kind,
++                verdict=Verdict.SKIP,
++                score=None,
++                message=(
++                    "backend does not implement NullCalibratedBackend — "
++                    "numeric probes will fall back to fixed thresholds"
++                ),
++            )
++        prompts = list(spec.prompts) or list(_DEFAULT_PROMPTS)
++
++        per_seed_means: list[float] = []
++        for run_idx in range(spec.runs):
++            seed = spec.seed_base + run_idx
++            per_prompt: list[float] = []
++            for prompt in prompts:
++                with ctx.backend.as_base() as base_view:
++                    base_dist = base_view.next_token_dist(prompt, top_k=ctx.top_k)
++                with ctx.backend.as_null_adapter(seed, init_scale=spec.init_scale) as null_view:
++                    null_dist = null_view.next_token_dist(prompt, top_k=ctx.top_k)
++                per_prompt.append(divergence(base_dist, null_dist, kind="js"))
++            per_seed_means.append(statistics.fmean(per_prompt) if per_prompt else 0.0)
++
++        mean = statistics.fmean(per_seed_means)
++        std = statistics.pstdev(per_seed_means) if len(per_seed_means) > 1 else 0.0
++
++        # Publish per-kind stats. delta_kl is the primary kind; other
++        # divergence-based probes (adapter_ablation) share this scale.
++        null_stats = {
++            "delta_kl": {"mean": mean, "std": max(std, 1e-6), "n": float(spec.runs)},
++            "adapter_ablation": {"mean": mean, "std": max(std, 1e-6), "n": float(spec.runs)},
++        }
++
          return ProbeResult(
              name=spec.name,
              kind=spec.kind,
--            verdict=Verdict.SKIP,
++            verdict=Verdict.PASS,
--            score=None,
++            score=1.0,
++            raw=mean,
++            evidence={
++                "null_stats": null_stats,
++                "per_seed_mean_js": per_seed_means,
++                "init_scale": spec.init_scale,
++                "runs": spec.runs,
++                "num_prompts": len(prompts),
++                "weight": spec.weight,
++            },
              message=(
--                "null-adapter calibration pending — downstream probes will fall back to "
++                f"null JS divergence μ={mean:.4f} ± {std:.4f} "
--                "fixed thresholds until the backend-level materialization lands"
++                f"(over {spec.runs} seeds × {len(prompts)} prompts) — "
++                f"downstream probes will z-score against this baseline"
              ),
--            evidence={"runs": spec.runs, "rank": spec.rank, "alpha": spec.alpha},
+         )
      """Look up null-adapter stats for ``probe_kind``.
      Returns ``{"mean": …, "std": …, "n": …}`` when calibration ran for
--    this kind, else ``None``. Probes should treat ``None`` as "fall back
++    this kind, else ``None``. Probes treat ``None`` as "fall back to the
--    to the fixed threshold from your spec."
++    fixed threshold from your spec."
      """
      return ctx.null_stats.get(probe_kind)

tests/unit/test_null_calibration.pyadded

++"""Tests for null-adapter calibration.
++
++Covers: dummy backend ``as_null_adapter`` yields a plausibly noisy
++view; ``NullAdapterProbe`` populates ``ctx.null_stats`` in a way
++downstream probes pick up end-to-end; missing-capability SKIP path.
++"""
++
++from __future__ import annotations
++
++import numpy as np
++
++from dlm_sway.backends.dummy import DummyDifferentialBackend, DummyResponses
++from dlm_sway.core.result import Verdict
++from dlm_sway.core.scoring import NullCalibratedBackend
++from dlm_sway.probes.base import RunContext, build_probe
++from dlm_sway.suite.runner import run as run_suite
++from dlm_sway.suite.spec import SwaySpec
++
++
++def _diverging_backend() -> DummyDifferentialBackend:
++    base = DummyResponses()
++    ft = DummyResponses()
++    return DummyDifferentialBackend(base=base, ft=ft)
++
++
++class TestProtocolConformance:
++    def test_dummy_is_null_calibrated(self) -> None:
++        assert isinstance(_diverging_backend(), NullCalibratedBackend)
++
++
++class TestAsNullAdapter:
++    def test_yields_perturbed_view(self) -> None:
++        backend = _diverging_backend()
++        with backend.as_base() as base:
++            base_dist = base.next_token_dist("hello")
++        with backend.as_null_adapter(seed=0) as null:
++            null_dist = null.next_token_dist("hello")
++        # Some perturbation, but bounded.
++        assert not np.allclose(base_dist.logprobs, null_dist.logprobs)
++
++    def test_different_seeds_yield_different_views(self) -> None:
++        backend = _diverging_backend()
++        with backend.as_null_adapter(seed=1) as v1:
++            d1 = v1.next_token_dist("hello")
++        with backend.as_null_adapter(seed=2) as v2:
++            d2 = v2.next_token_dist("hello")
++        assert not np.allclose(d1.logprobs, d2.logprobs)
++
++    def test_view_exclusion_enforced(self) -> None:
++        import pytest
++
++        backend = _diverging_backend()
++        with backend.as_null_adapter(seed=0), pytest.raises(RuntimeError):
++            with backend.as_base():
++                pass
++
++
++class TestProbe:
++    def test_populates_null_stats(self) -> None:
++        backend = _diverging_backend()
++        probe, spec = build_probe(
++            {
++                "name": "null",
++                "kind": "null_adapter",
++                "runs": 3,
++                "prompts": ["q1", "q2"],
++            }
++        )
++        ctx = RunContext(backend=backend)
++        result = probe.run(spec, ctx)
++        assert result.verdict == Verdict.PASS
++        stats = result.evidence["null_stats"]
++        assert "delta_kl" in stats
++        assert stats["delta_kl"]["n"] == 3.0
++        assert stats["delta_kl"]["std"] > 0.0  # seeded perturbations produce variance
++
++    def test_runner_threads_null_stats_to_subsequent_probes(self) -> None:
++        """End-to-end: null_adapter first → delta_kl picks up z-score path."""
++        backend = _diverging_backend()
++        raw_spec = SwaySpec.model_validate(
++            {
++                "version": 1,
++                "models": {"base": {"base": "b"}, "ft": {"base": "b", "adapter": "/tmp/a"}},
++                "suite": [
++                    {
++                        "name": "null",
++                        "kind": "null_adapter",
++                        "runs": 3,
++                        "prompts": ["p1", "p2"],
++                    },
++                    {
++                        "name": "dk",
++                        "kind": "delta_kl",
++                        "prompts": ["p1", "p2"],
++                        "assert_z_gte": -10.0,  # permissive so we pass regardless
++                    },
++                ],
++            }
++        )
++        result = run_suite(raw_spec, backend)
++        assert len(result.probes) == 2
++        null_result = result.probes[0]
++        dk_result = result.probes[1]
++        assert null_result.verdict == Verdict.PASS
++        # The delta_kl probe should have computed a z_score because null_stats was present.
++        assert dk_result.z_score is not None, (
++            "delta_kl should have z-scored against null baseline, got "
++            f"evidence={dk_result.evidence}, message={dk_result.message}"
++        )
++
++    def test_skip_when_backend_not_null_calibrated(self) -> None:
++        class _Bare:
++            def as_base(self):  # noqa: ANN202
++                raise NotImplementedError
++
++            def as_finetuned(self):  # noqa: ANN202
++                raise NotImplementedError
++
++        probe, spec = build_probe({"name": "null", "kind": "null_adapter"})
++        ctx = RunContext(backend=_Bare())  # type: ignore[arg-type]
++        result = probe.run(spec, ctx)
++        assert result.verdict == Verdict.SKIP
++        assert "NullCalibratedBackend" in result.message

tests/unit/test_suite_runner.pymodified

          assert result.wall_seconds >= 0
          assert result.probes[0].duration_s >= 0
--    def test_null_adapter_skipped_stable_suite_shape(
++    def test_null_adapter_passes_on_null_calibrated_backend(
          self, backend: DummyDifferentialBackend
      ) -> None:
--        spec = _spec({"name": "null", "kind": "null_adapter", "runs": 3})
++        # Dummy backend implements NullCalibratedBackend, so calibration runs.
++        spec = _spec({"name": "null", "kind": "null_adapter", "runs": 2, "prompts": ["q1"]})
          result = run(spec, backend)
--        # Until the HF-level implementation lands, null_adapter reports SKIP
--        # but must not crash the suite.
          assert result.probes[0].kind == "null_adapter"
--        assert result.probes[0].verdict == Verdict.SKIP
++        assert result.probes[0].verdict == Verdict.PASS
++        # And the suite's null_stats bubbles up onto the result.
++        assert "delta_kl" in result.null_stats