`93c3098`

sway(backends): as_null_adapter with in-place lora weight randomization

Authored by

espadonne 3 weeks ago

SHA: 93c3098bfa4d7d0ea9c768f6c77f421b2c6c3943
Parents: fadb0ae
Tree: 4d5b21e

2 changed files

Status	File	+	-
M	`src/dlm_sway/backends/dummy.py`	39	0
M	`src/dlm_sway/backends/hf.py`	37	0

src/dlm_sway/backends/dummy.pymodified

+         )
++class _NullView(_DummyView):
++    """A dummy view that perturbs the base distribution with seeded noise.
++
++    Used by :meth:`DummyDifferentialBackend.as_null_adapter`. The
++    perturbation is small (matches an ``init_scale=0.02`` adapter) so
++    the null-vs-base divergence stays well below real-adapter territory
++    in probe tests.
++    """
++
++    def __init__(self, base_responses: DummyResponses, seed: int, init_scale: float) -> None:
++        super().__init__("base", base_responses)
++        self._seed = seed
++        self._init_scale = init_scale
++
++    def next_token_dist(self, prompt: str, *, top_k: int = 256) -> TokenDist:
++        base_dist = super().next_token_dist(prompt, top_k=top_k)
++        rng = np.random.default_rng(self._seed + hash(prompt) % 1_000_003)
++        noise = rng.normal(0.0, self._init_scale, size=base_dist.logprobs.shape).astype(np.float32)
++        new_lp = base_dist.logprobs + noise
++        # Re-normalize (within the top-k slice) so a valid distribution comes back.
++        max_lp = new_lp.max()
++        new_probs = np.exp(new_lp - max_lp)
++        new_probs /= new_probs.sum()
++        return TokenDist(
++            token_ids=base_dist.token_ids,
++            logprobs=np.log(new_probs).astype(np.float32),
++            vocab_size=base_dist.vocab_size,
++            tail_logprob=base_dist.tail_logprob,
++        )
++
++
  class _InterpolatedView(_DummyView):
      """A dummy view where logits/dists are a lam-blend of base and ft.
          finally:
              self._exit()
++    @contextmanager
++    def as_null_adapter(self, seed: int, *, init_scale: float = 0.02) -> Iterator[_DummyView]:
++        self._enter(f"null({seed})")
++        try:
++            yield _NullView(self._base_r, seed=seed, init_scale=init_scale)
++        finally:
++            self._exit()
++
      def _enter(self, mode: str) -> None:
          if self._active is not None:
              raise RuntimeError(

src/dlm_sway/backends/hf.pymodified

                  module.scaling[key] = original  # type: ignore[attr-defined]
              self._exit()
++    @contextmanager
++    def as_null_adapter(self, seed: int, *, init_scale: float = 0.02) -> Iterator[_HFView]:
++        """Temporarily replace every LoRA ``A``/``B`` tensor with random noise.
++
++        Same rank, alpha, and target modules as the real adapter — only
++        the weights differ. This is the denominator in every z-score
++        path: "how much signal does structural noise produce?"
++
++        Implementation walks the PEFT module tree for ``lora_A``/``lora_B``
++        parameters, saves a clone of each current value, overwrites in
++        place with a zero-mean Gaussian at ``init_scale``, and restores
++        on exit (including on exception).
++        """
++        import torch
++
++        self._enter(f"null({seed})")
++        gen = torch.Generator(device="cpu").manual_seed(int(seed))
++        saved: list[tuple[torch.nn.Parameter, torch.Tensor]] = []
++        try:
++            for pname, param in self._peft_model.named_parameters():
++                if not any(key in pname for key in ("lora_A", "lora_B")):
++                    continue
++                saved.append((param, param.detach().clone()))
++                with torch.no_grad():
++                    noise = torch.randn(
++                        *param.shape,
++                        generator=gen,
++                        dtype=torch.float32,
++                    ).to(dtype=param.dtype, device=param.device)
++                    param.copy_(noise * init_scale)
++            yield self._make_view(f"null_{seed}")
++        finally:
++            with torch.no_grad():
++                for param, original in saved:
++                    param.copy_(original)
++            self._exit()
++
      def close(self) -> None:
          """Release GPU memory. Safe to call more than once."""
          if getattr(self, "_peft_model", None) is not None: