`74d94e2`

probes: RunContext.backend optional + ctx.backend.as_* → ctx.require_backend.as_* sweep (S25 P4 prep)

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 2 weeks ago

SHA: 74d94e2d0ce18025ec24be6cd483e4cab092c0d0
Parents: 3021f24
Tree: 3218183

13 changed files

Status	File	+	-
M	`src/dlm_sway/probes/adapter_ablation.py`	7	3
M	`src/dlm_sway/probes/adapter_revert.py`	2	2
M	`src/dlm_sway/probes/base.py`	32	1
M	`src/dlm_sway/probes/calibration_drift.py`	2	2
M	`src/dlm_sway/probes/cluster_kl.py`	2	2
M	`src/dlm_sway/probes/delta_kl.py`	2	2
M	`src/dlm_sway/probes/external_perplexity.py`	2	2
M	`src/dlm_sway/probes/leakage.py`	1	1
M	`src/dlm_sway/probes/paraphrase_invariance.py`	2	2
M	`src/dlm_sway/probes/preference_flip.py`	2	2
M	`src/dlm_sway/probes/prompt_collapse.py`	2	2
M	`src/dlm_sway/probes/section_internalization.py`	2	2
M	`src/dlm_sway/probes/style_fingerprint.py`	2	2

src/dlm_sway/probes/adapter_ablation.pymodified

                  score=None,
                  message="no prompts provided",
+             )
 -        if not isinstance(ctx.backend, ScalableDifferentialBackend):
 +        # Local binding so mypy keeps the ScalableDifferentialBackend
 +        # narrowing across the loop below (require_backend's return
 +        # type is the base DifferentialBackend; we narrow once here).
 +        scalable = ctx.backend
 +        if not isinstance(scalable, ScalableDifferentialBackend):
              return ProbeResult(
                  name=spec.name,
                  kind=spec.kind,
          for lam in spec.lambdas:
              divs_for_lam: list[float] = []
              for prompt in spec.prompts:
 -                with ctx.backend.as_scaled_adapter(lam_zero) as ref:
 +                with scalable.as_scaled_adapter(lam_zero) as ref:
                      ref_dist = ref.next_token_dist(prompt, top_k=top_k)
 -                with ctx.backend.as_scaled_adapter(lam) as scaled:
 +                with scalable.as_scaled_adapter(lam) as scaled:
                      scaled_dist = scaled.next_token_dist(prompt, top_k=top_k)
                  divs_for_lam.append(divergence(ref_dist, scaled_dist, kind=spec.divergence))
              per_lambda.append(float(np.mean(divs_for_lam)))

src/dlm_sway/probes/adapter_revert.pymodified

          for case in spec.cases:
              gold_vec = embed([case.gold])[0]
              for pp in case.paraphrases:
 -                with ctx.backend.as_base() as bv:
 +                with ctx.require_backend.as_base() as bv:
                      base_gen = bv.generate(pp, max_new_tokens=spec.max_new_tokens, seed=ctx.seed)
 -                with ctx.backend.as_finetuned() as fv:
 +                with ctx.require_backend.as_finetuned() as fv:
                      ft_gen = fv.generate(pp, max_new_tokens=spec.max_new_tokens, seed=ctx.seed)
                  vecs = embed([base_gen, ft_gen])
                  base_vec, ft_vec = vecs[0], vecs[1]

src/dlm_sway/probes/base.pymodified

          to calibrate per-kind null stats for.
      """
 -    backend: DifferentialBackend
 +    backend: DifferentialBackend | None = None
 +    """The model-scoring backend. Required for every probe with
 +    ``needs_backend=True`` (the default). Pre-run probes
 +    (``needs_backend=False``, e.g. S25 ``gradient_ghost``) tolerate
 +    ``None`` here so the runner can skip backend construction
 +    entirely when only pre-flight probes are scheduled.
++
 +    Existing probes access ``self.require_backend`` instead of
 +    ``backend`` directly — the property narrows the type for mypy
 +    and gives a clear runtime error if the runner ever passes
 +    ``None`` to a probe that needs the backend.
 +    """
      seed: int = 0
      top_k: int = 256
      sections: tuple[Section, ...] | None = None
+     )
      downstream_kinds: tuple[str, ...] = field(default_factory=tuple)
 +    @property
 +    def require_backend(self) -> DifferentialBackend:
 +        """Return :attr:`backend`, asserting non-None.
++
 +        Probes with ``needs_backend=True`` (default) call this to
 +        narrow the type from ``DifferentialBackend | None`` to
 +        ``DifferentialBackend``. The runner contract guarantees
 +        non-None when scheduling backend-dependent probes; this
 +        accessor turns a runner bug into a clear error rather than
 +        a confusing AttributeError on ``None.as_base()``.
 +        """
 +        if self.backend is None:
 +            raise RuntimeError(
 +                "RunContext.backend is None — probe requires a backend "
 +                "(needs_backend=True) but the runner did not provide one. "
 +                "If this is a pre-run probe, set needs_backend=False on "
 +                "the Probe subclass."
 +            )
 +        return self.backend
++
  _REGISTRY: dict[str, type[Probe]] = {}

src/dlm_sway/probes/calibration_drift.pymodified

          for prompt, gold in items:
              tokens = max(_token_estimate(gold), 1)
 -            with ctx.backend.as_base() as b:
 +            with ctx.require_backend.as_base() as b:
                  lp_base = b.logprob_of(prompt, gold) / tokens
 -            with ctx.backend.as_finetuned() as f:
 +            with ctx.require_backend.as_finetuned() as f:
                  lp_ft = f.logprob_of(prompt, gold) / tokens
              delta = lp_ft - lp_base
              deltas.append(delta)

src/dlm_sway/probes/cluster_kl.pymodified

          # S23 — per-prompt divergences, now via one batched forward
          # per view (same math as ``delta_kl``).
          top_k = spec.top_k if spec.top_k is not None else ctx.top_k
 -        with ctx.backend.as_base() as base_view:
 +        with ctx.require_backend.as_base() as base_view:
              base_dists = base_view.next_token_dist_batch(list(spec.prompts), top_k=top_k)
 -        with ctx.backend.as_finetuned() as ft_view:
 +        with ctx.require_backend.as_finetuned() as ft_view:
              ft_dists = ft_view.next_token_dist_batch(list(spec.prompts), top_k=top_k)
          divergences: list[float] = [
              divergence(b, f, kind=spec.divergence)

src/dlm_sway/probes/delta_kl.pymodified

          # manager entries. next_token_dist_batch falls back to a
          # per-prompt loop on backends without real batching so the
          # result list stays identical to the pre-S23 path.
 -        with ctx.backend.as_base() as base_view:
 +        with ctx.require_backend.as_base() as base_view:
              base_dists = base_view.next_token_dist_batch(list(spec.prompts), top_k=top_k)
 -        with ctx.backend.as_finetuned() as ft_view:
 +        with ctx.require_backend.as_finetuned() as ft_view:
              ft_dists = ft_view.next_token_dist_batch(list(spec.prompts), top_k=top_k)
          divergences: list[float] = [
              divergence(b, f, kind=spec.divergence)

src/dlm_sway/probes/external_perplexity.pymodified

          total_base_lp = 0.0
          total_ft_lp = 0.0
          for chunk in chunks:
 -            with ctx.backend.as_base() as b:
 +            with ctx.require_backend.as_base() as b:
                  base_rl = b.rolling_logprob(chunk)
 -            with ctx.backend.as_finetuned() as f:
 +            with ctx.require_backend.as_finetuned() as f:
                  ft_rl = f.rolling_logprob(chunk)
              # Per-token mean logprob for this chunk. ``logprobs.size``
              # is ``num_tokens - 1`` by the RollingLogprob contract.

src/dlm_sway/probes/leakage.pymodified

          perturbed_recalls: list[float] = []
          per_section: list[dict[str, float | str]] = []
 -        with ctx.backend.as_finetuned() as ft:
 +        with ctx.require_backend.as_finetuned() as ft:
              for s in prose:
                  prefix = s.content[: spec.prefix_chars]
                  target = s.content[spec.prefix_chars : spec.prefix_chars + spec.continuation_chars]

src/dlm_sway/probes/paraphrase_invariance.pymodified

          for case in spec.cases:
              tokens = max(_token_estimate(case.gold), 1)
 -            with ctx.backend.as_base() as b:
 +            with ctx.require_backend.as_base() as b:
                  lp_base_verb = b.logprob_of(case.prompt, case.gold) / tokens
                  lp_base_par = [b.logprob_of(p, case.gold) / tokens for p in case.paraphrases]
 -            with ctx.backend.as_finetuned() as f:
 +            with ctx.require_backend.as_finetuned() as f:
                  lp_ft_verb = f.logprob_of(case.prompt, case.gold) / tokens
                  lp_ft_par = [f.logprob_of(p, case.gold) / tokens for p in case.paraphrases]

src/dlm_sway/probes/preference_flip.pymodified

              # batch down. Fence per triple so probes degrade gracefully:
              # drop the offending triple, count it, surface in evidence.
              try:
 -                with ctx.backend.as_base() as b:
 +                with ctx.require_backend.as_base() as b:
                      base_margin = b.logprob_of(t.prompt, t.chosen) - b.logprob_of(
                          t.prompt, t.rejected
+                     )
 -                with ctx.backend.as_finetuned() as f:
 +                with ctx.require_backend.as_finetuned() as f:
                      ft_margin = f.logprob_of(t.prompt, t.chosen) - f.logprob_of(
                          t.prompt, t.rejected
+                     )

src/dlm_sway/probes/prompt_collapse.pymodified

              divs: list[float] = []
              for prompt in spec.prompts:
                  full_prompt = prefix + prompt
 -                with ctx.backend.as_base() as bv:
 +                with ctx.require_backend.as_base() as bv:
                      base_dist = bv.next_token_dist(full_prompt, top_k=top_k)
 -                with ctx.backend.as_finetuned() as fv:
 +                with ctx.require_backend.as_finetuned() as fv:
                      ft_dist = fv.next_token_dist(full_prompt, top_k=top_k)
                  divs.append(divergence(base_dist, ft_dist, kind=spec.divergence))
              mean_divs.append(float(np.mean(divs)))

src/dlm_sway/probes/section_internalization.pymodified

          # re-running the forward pass for leak-checks.
          base_nll: dict[str, float] = {}
          ft_nll: dict[str, float] = {}
 -        with ctx.backend.as_base() as base_view:
 +        with ctx.require_backend.as_base() as base_view:
              for s in eligible:
                  base_nll[s.id] = _section_nll(s, base_view, spec.max_prose_chars)
 -        with ctx.backend.as_finetuned() as ft_view:
 +        with ctx.require_backend.as_finetuned() as ft_view:
              for s in eligible:
                  ft_nll[s.id] = _section_nll(s, ft_view, spec.max_prose_chars)

src/dlm_sway/probes/style_fingerprint.pymodified

          base_samples: list[str] = []
          ft_samples: list[str] = []
          for prompt in spec.prompts:
 -            with ctx.backend.as_base() as b:
 +            with ctx.require_backend.as_base() as b:
                  base_samples.append(
                      b.generate(prompt, max_new_tokens=spec.max_new_tokens, seed=ctx.seed)
+                 )
 -            with ctx.backend.as_finetuned() as f:
 +            with ctx.require_backend.as_finetuned() as f:
                  ft_samples.append(
                      f.generate(prompt, max_new_tokens=spec.max_new_tokens, seed=ctx.seed)
+                 )