`1e28540`

probes/null_adapter: per-kind calibration matrix (fixes P02, B2, C9)

Authored by

espadonne 3 weeks ago

SHA: 1e28540187e96e9aae50146a8d6faf2f8306d475
Parents: fc49f7e
Tree: 640aab0

3 changed files

Status	File	+	-
M	`src/dlm_sway/probes/null_adapter.py`	160	82
M	`tests/unit/test_null_calibration.py`	61	2
M	`tests/unit/test_suite_runner.py`	9	1

src/dlm_sway/probes/null_adapter.pymodified

--"""Null-adapter baseline probe.
++"""Null-adapter baseline probe — per-kind calibration matrix (S02).
--
++
--Every numeric primitive reports its raw metric *and* a z-score against a
++Every numeric primitive reports its raw metric *and* a z-score against
--null-adapter distribution. This probe is the runtime engine that
++a null-adapter distribution. This probe is the runtime engine that
--establishes that distribution — it builds random-init "null" adapters
++establishes that distribution — for **every** numeric probe kind the
--(structurally identical to the real adapter but with weights drawn from
++user has downstream in the suite, not just one.
--a Gaussian) and measures how much signal they produce.
++
--
++How it works:
--The resulting ``(mean, std, n)`` per kind is attached to this probe's
++
--``evidence["null_stats"]``. The runner picks it up and threads it into
++1. The runner populates ``ctx.downstream_kinds`` with every probe kind
--:attr:`RunContext.null_stats`, where every downstream probe can read it
++   that appears after this one in the suite.
--and turn a raw metric into a z-score.
++2. For each target kind, we ask its probe class for a
--
++   :meth:`~dlm_sway.probes.base.Probe.calibrate_spec` — a small spec
--Backends that don't implement :class:`~dlm_sway.core.scoring.NullCalibratedBackend`
++   suitable for null calibration. A probe that returns ``None`` opts
--cause this probe to :attr:`Verdict.SKIP` — downstream probes fall back
++   out (typically because its inputs can't be synthesized, e.g.
--to their fixed thresholds in that case.
++   ``adapter_revert`` without an embedder, or ``adapter_ablation``
++   which needs ``as_scaled_adapter`` that the proxy doesn't expose).
++3. For each calibrating kind × seed, we run the probe through a
++   :class:`~dlm_sway.probes._null_proxy.NullCalibrationBackendProxy`
++   which makes ``as_finetuned()`` yield ``as_null_adapter(seed)`` —
++   so the probe's own math is computing "what does my metric look
++   like when the fine-tune is structural noise?".
++4. We harvest each run's ``raw`` value, aggregate to ``(mean, std, n)``
++   per kind, and publish under ``evidence["null_stats"]``.
++5. The runner threads ``null_stats`` into ``RunContext`` for every
++   subsequent probe, which then prefers the z-score path over the
++   fixed-threshold path (see :mod:`dlm_sway.probes._zscore`).
++
++Backends that don't implement
++:class:`~dlm_sway.core.scoring.NullCalibratedBackend` cause this probe
++to ``Verdict.SKIP``; every downstream probe falls back to fixed
++thresholds and surfaces ``(no calibration)`` in the report.
  """
  from __future__ import annotations
++import math
  import statistics
--from typing import Literal
++from typing import Any, Literal
  from pydantic import Field
--from dlm_sway.core.result import ProbeResult, Verdict
++from dlm_sway.core.result import ProbeResult, Verdict, safe_finalize
  from dlm_sway.core.scoring import NullCalibratedBackend
--from dlm_sway.probes._divergence import divergence
++from dlm_sway.probes._null_proxy import NullCalibrationBackendProxy
--from dlm_sway.probes.base import Probe, ProbeSpec, RunContext
++from dlm_sway.probes.base import Probe, ProbeSpec, RunContext, registry
  class NullAdapterSpec(ProbeSpec):
      """Spec for ``kind: null_adapter``.
--    Authors place this probe **first** in the suite so its output
++    Place this probe **first** in the suite so its output populates
--    populates :attr:`RunContext.null_stats` before subsequent probes
++    :attr:`RunContext.null_stats` before subsequent probes consult it.
--    consult it.
      """
      kind: Literal["null_adapter"] = "null_adapter"
      """Number of independent null adapters to evaluate. Three is the
      smallest that yields a usable std; more is better but quickly
      dominates suite runtime."""
--    prompts: list[str] = Field(default_factory=list)
--    """Prompt set for null calibration. Keep small — calibration runs
--    ``runs × len(prompts)`` forward passes. 4–8 prompts is typical.
--    If empty, a minimal built-in prompt set is used so the probe
--    always produces stats."""
      init_scale: float = 0.02
      """Stddev of the zero-mean Gaussian used to fill lora_A/lora_B."""
      seed_base: int = 1000
      """First seed; successive runs use ``seed_base + run_idx``."""
--
++    calibrate_kinds: list[str] = Field(default_factory=list)
--
++    """Which probe kinds to calibrate. Empty = auto-populate from
--_DEFAULT_PROMPTS: tuple[str, ...] = (
++    ``ctx.downstream_kinds`` (the kinds that appear after this probe
--    "The quick brown fox",
++    in the suite). Set explicitly to force calibration of specific
--    "Once upon a time",
++    kinds regardless of suite order."""
--    "In this document we explain",
--    "The key takeaway is",
--    "An important point to remember",
--)
  class NullAdapterProbe(Probe):
--    """Populate ``ctx.null_stats``; report a :attr:`Verdict.PASS` verdict itself.
++    """Populate ``ctx.null_stats`` with per-kind null distributions.
--    The probe never fails on its own terms — its *job* is calibration.
++    The probe itself reports ``Verdict.PASS`` on success — its job is
--    Downstream probes pick up :attr:`RunContext.null_stats` keyed by
++    calibration, not judgment. If the backend can't support null-view
--    probe kind (``delta_kl``, ``adapter_ablation`` …) and use the
++    substitution, reports ``Verdict.SKIP`` with a clear message; every
--    populated mean/std to z-score their own raw metrics.
++    downstream numeric probe then falls back to fixed thresholds.
      """
      kind = "null_adapter"
                      "numeric probes will fall back to fixed thresholds"
                  ),
+             )
--        prompts = list(spec.prompts) or list(_DEFAULT_PROMPTS)
++
--
++        registered = registry()
--        per_seed_means: list[float] = []
++
--        for run_idx in range(spec.runs):
++        # Decide which kinds to calibrate. Explicit spec field wins;
--            seed = spec.seed_base + run_idx
++        # otherwise auto-populate from downstream_kinds.
--            per_prompt: list[float] = []
++        target_kinds: list[str] = list(spec.calibrate_kinds)
--            for prompt in prompts:
++        if not target_kinds:
--                with ctx.backend.as_base() as base_view:
++            target_kinds = [k for k in ctx.downstream_kinds if k and k != spec.kind]
--                    base_dist = base_view.next_token_dist(prompt, top_k=ctx.top_k)
++        # De-dupe while preserving order; drop self and unregistered.
--                with ctx.backend.as_null_adapter(seed, init_scale=spec.init_scale) as null_view:
++        seen: set[str] = set()
--                    null_dist = null_view.next_token_dist(prompt, top_k=ctx.top_k)
++        filtered: list[str] = []
--                per_prompt.append(divergence(base_dist, null_dist, kind="js"))
++        for k in target_kinds:
--            per_seed_means.append(statistics.fmean(per_prompt) if per_prompt else 0.0)
++            if k == spec.kind or k in seen or k not in registered:
--
++                continue
--        mean = statistics.fmean(per_seed_means)
++            seen.add(k)
--        std = statistics.pstdev(per_seed_means) if len(per_seed_means) > 1 else 0.0
++            filtered.append(k)
--
++        target_kinds = filtered
--        # Publish per-kind stats. delta_kl is the primary kind; other
++
--        # divergence-based probes (adapter_ablation) share this scale.
++        per_kind_stats: dict[str, dict[str, float]] = {}
--        null_stats = {
++        per_kind_samples: dict[str, list[float]] = {}
--            "delta_kl": {"mean": mean, "std": max(std, 1e-6), "n": float(spec.runs)},
++        skipped_kinds: list[dict[str, str]] = []
--            "adapter_ablation": {"mean": mean, "std": max(std, 1e-6), "n": float(spec.runs)},
++
++        for kind in target_kinds:
++            probe_cls = registered[kind]
++            try:
++                cal_spec = probe_cls.calibrate_spec(ctx)
++            except Exception as exc:  # noqa: BLE001 — defensive
++                skipped_kinds.append(
++                    {"kind": kind, "reason": f"calibrate_spec raised: {exc}"}
++                )
++                continue
++            if cal_spec is None:
++                skipped_kinds.append(
++                    {
++                        "kind": kind,
++                        "reason": "probe opted out (calibrate_spec returned None)",
++                    }
++                )
++                continue
++
++            probe = probe_cls()
++            raws: list[float] = []
++            errors: list[str] = []
++            for run_idx in range(spec.runs):
++                seed = spec.seed_base + run_idx
++                proxy = NullCalibrationBackendProxy(
++                    ctx.backend, seed=seed, init_scale=spec.init_scale
++                )
++                cal_ctx = RunContext(
++                    backend=proxy,
++                    seed=seed,
++                    top_k=ctx.top_k,
++                    sections=ctx.sections,
++                    doc_text=ctx.doc_text,
++                    null_stats={},  # calibration uses fixed thresholds — no recursion
++                    downstream_kinds=(),
++                )
++                try:
++                    cal_result = probe.run(cal_spec, cal_ctx)
++                except Exception as exc:  # noqa: BLE001
++                    errors.append(f"seed={seed}: {type(exc).__name__}: {exc}")
++                    continue
++                raw = cal_result.raw
++                if raw is not None and math.isfinite(raw):
++                    raws.append(float(raw))
++                elif cal_result.verdict == Verdict.ERROR:
++                    errors.append(
++                        f"seed={seed}: probe ERROR — {cal_result.message}"
++                    )
++
++            if raws:
++                mean = statistics.fmean(raws)
++                std = statistics.pstdev(raws) if len(raws) > 1 else 0.0
++                per_kind_stats[kind] = {
++                    "mean": mean,
++                    # C9: clamp the std floor so the downstream z-score
++                    # path doesn't blow up when every seed produces
++                    # identical raws.
++                    "std": max(std, 1e-6),
++                    "n": float(len(raws)),
++                }
++                per_kind_samples[kind] = raws
++            else:
++                reason = "no finite raws across all seeds"
++                if errors:
++                    reason += f" ({errors[0]})"
++                skipped_kinds.append({"kind": kind, "reason": reason})
++
++        evidence: dict[str, Any] = {
++            "null_stats": per_kind_stats,
++            "per_kind_raw_samples": per_kind_samples,
++            "skipped_kinds": skipped_kinds,
++            "calibrated_kinds": list(per_kind_stats.keys()),
++            "runs": spec.runs,
++            "init_scale": spec.init_scale,
++            "seed_base": spec.seed_base,
++            "weight": spec.weight,
+         }
--        return ProbeResult(
++        message = (
++            f"null calibration: {len(per_kind_stats)} kinds calibrated "
++            f"over {spec.runs} seeds"
++        )
++        if skipped_kinds:
++            message += f" ({len(skipped_kinds)} opted out)"
++
++        return safe_finalize(
              name=spec.name,
              kind=spec.kind,
              verdict=Verdict.PASS,
              score=1.0,
--            raw=mean,
++            evidence=evidence,
--            evidence={
++            message=message,
--                "null_stats": null_stats,
--                "per_seed_mean_js": per_seed_means,
--                "init_scale": spec.init_scale,
--                "runs": spec.runs,
--                "num_prompts": len(prompts),
--                "weight": spec.weight,
--            },
--            message=(
--                f"null JS divergence μ={mean:.4f} ± {std:.4f} "
--                f"(over {spec.runs} seeds × {len(prompts)} prompts) — "
--                f"downstream probes will z-score against this baseline"
--            ),
+         )
  def get_null_stats(ctx: RunContext, probe_kind: str) -> dict[str, float] | None:
--    """Look up null-adapter stats for ``probe_kind``.
++    """Look up null-adapter stats for ``probe_kind`` in the run context.
      Returns ``{"mean": …, "std": …, "n": …}`` when calibration ran for
--    this kind, else ``None``. Probes treat ``None`` as "fall back to the
++    this kind, else ``None``. Probes treat ``None`` as "fall back to
--    fixed threshold from your spec."
++    the fixed threshold from your spec" and surface ``(no calibration)``
++    in the report.
      """
      return ctx.null_stats.get(probe_kind)

tests/unit/test_null_calibration.pymodified

  class TestProbe:
      def test_populates_null_stats(self) -> None:
++        """Explicit `calibrate_kinds` calibrates regardless of suite order."""
          backend = _diverging_backend()
          probe, spec = build_probe(
+             {
                  "name": "null",
                  "kind": "null_adapter",
                  "runs": 3,
--                "prompts": ["q1", "q2"],
++                "calibrate_kinds": ["delta_kl"],
+             }
+         )
          ctx = RunContext(backend=backend)
          assert stats["delta_kl"]["n"] == 3.0
          assert stats["delta_kl"]["std"] > 0.0  # seeded perturbations produce variance
++    def test_auto_populates_from_downstream_kinds(self) -> None:
++        """When `calibrate_kinds` is empty, falls back to `ctx.downstream_kinds`."""
++        backend = _diverging_backend()
++        probe, spec = build_probe({"name": "null", "kind": "null_adapter", "runs": 2})
++        ctx = RunContext(
++            backend=backend,
++            downstream_kinds=("delta_kl", "prompt_collapse"),
++        )
++        result = probe.run(spec, ctx)
++        assert result.verdict == Verdict.PASS
++        stats = result.evidence["null_stats"]
++        # Every downstream numeric kind that opts in gets stats.
++        assert "delta_kl" in stats
++        assert "prompt_collapse" in stats
++
++    def test_empty_calibrate_kinds_with_no_downstream_is_noop(self) -> None:
++        """No kinds, no calibration — probe still PASSes with empty stats."""
++        backend = _diverging_backend()
++        probe, spec = build_probe({"name": "null", "kind": "null_adapter", "runs": 2})
++        ctx = RunContext(backend=backend)  # no downstream_kinds
++        result = probe.run(spec, ctx)
++        assert result.verdict == Verdict.PASS
++        assert result.evidence["null_stats"] == {}
++        assert result.evidence["calibrated_kinds"] == []
++
++    def test_unregistered_kind_is_silently_skipped(self) -> None:
++        backend = _diverging_backend()
++        probe, spec = build_probe(
++            {
++                "name": "null",
++                "kind": "null_adapter",
++                "runs": 2,
++                "calibrate_kinds": ["delta_kl", "nonexistent_kind"],
++            }
++        )
++        ctx = RunContext(backend=backend)
++        result = probe.run(spec, ctx)
++        assert "delta_kl" in result.evidence["null_stats"]
++        assert "nonexistent_kind" not in result.evidence["null_stats"]
++
++    def test_opt_out_probe_is_reported_as_skipped(self) -> None:
++        """A kind whose calibrate_spec returns None surfaces in skipped_kinds."""
++        backend = _diverging_backend()
++        probe, spec = build_probe(
++            {
++                "name": "null",
++                "kind": "null_adapter",
++                "runs": 2,
++                # adapter_revert.calibrate_spec returns None by default
++                # (inherits from base), so we expect it to opt out.
++                "calibrate_kinds": ["adapter_revert", "delta_kl"],
++            }
++        )
++        ctx = RunContext(backend=backend)
++        result = probe.run(spec, ctx)
++        assert "delta_kl" in result.evidence["null_stats"]
++        skipped = [s["kind"] for s in result.evidence["skipped_kinds"]]
++        assert "adapter_revert" in skipped
++
      def test_runner_threads_null_stats_to_subsequent_probes(self) -> None:
          """End-to-end: null_adapter first → delta_kl picks up z-score path."""
          backend = _diverging_backend()
                          "name": "null",
                          "kind": "null_adapter",
                          "runs": 3,
--                        "prompts": ["p1", "p2"],
                      },
+                     {
                          "name": "dk",

tests/unit/test_suite_runner.pymodified

          self, backend: DummyDifferentialBackend
      ) -> None:
          # Dummy backend implements NullCalibratedBackend, so calibration runs.
--        spec = _spec({"name": "null", "kind": "null_adapter", "runs": 2, "prompts": ["q1"]})
++        # Explicit calibrate_kinds so it runs even without downstream probes.
++        spec = _spec(
++            {
++                "name": "null",
++                "kind": "null_adapter",
++                "runs": 2,
++                "calibrate_kinds": ["delta_kl"],
++            }
++        )
          result = run(spec, backend)
          assert result.probes[0].kind == "null_adapter"
          assert result.probes[0].verdict == Verdict.PASS