"""Integration test: ``HF.as_scaled_adapter`` and the response-curve invariants. The adapter-ablation probe (the sway signature primitive) leans on ``as_scaled_adapter(lam)`` to walk a λ sweep. Two things must hold: 1. **Monotonicity of the *signal* across λ**: divergence at λ=1.25 should be strictly larger than divergence at λ=0 (which is base). We don't claim a smooth curve here — the unit tests on the probe itself cover curve shape — only that the scaling actually scales. 2. **State restoration on exit**: every ``LoraLayer.scaling[adapter_name]`` value the context manager touched must be back to its original number after the ``with`` block. Anything else corrupts subsequent probes. Marked ``slow+online`` to share the tiny-model fixture with the rest of the integration suite. """ from __future__ import annotations from pathlib import Path from typing import Any import numpy as np import pytest from dlm_sway.backends.hf import HuggingFaceDifferentialBackend from dlm_sway.core.model import ModelSpec from dlm_sway.probes._divergence import divergence pytestmark = [pytest.mark.slow, pytest.mark.online] def _build_random_lora_adapter(base_dir: Path, out_dir: Path) -> None: """Same shape as the toggle-test adapter — a small but non-zero LoRA.""" import torch from peft import LoraConfig, get_peft_model from transformers import AutoModelForCausalLM, AutoTokenizer torch.manual_seed(0) tokenizer = AutoTokenizer.from_pretrained(str(base_dir)) if tokenizer.pad_token_id is None: tokenizer.pad_token = tokenizer.eos_token base = AutoModelForCausalLM.from_pretrained(str(base_dir), torch_dtype=torch.float32) cfg = LoraConfig( r=8, lora_alpha=16, target_modules=["q_proj", "v_proj"], lora_dropout=0.0, bias="none", task_type="CAUSAL_LM", ) peft_model = get_peft_model(base, cfg) with torch.no_grad(): for name, param in peft_model.named_parameters(): if "lora_B" in name: param.copy_(torch.randn_like(param) * 0.05) peft_model.save_pretrained(str(out_dir)) tokenizer.save_pretrained(str(out_dir)) @pytest.fixture(scope="module") def random_adapter(tiny_model_dir: Path, tmp_path_factory: pytest.TempPathFactory) -> Path: adapter_dir = tmp_path_factory.mktemp("scaled-random-adapter") _build_random_lora_adapter(tiny_model_dir, adapter_dir) return adapter_dir @pytest.fixture(scope="module") def hf_backend(tiny_model_dir: Path, random_adapter: Path) -> HuggingFaceDifferentialBackend: backend = HuggingFaceDifferentialBackend( base_spec=ModelSpec(base=str(tiny_model_dir), kind="hf", dtype="fp32", device="cpu"), adapter_path=random_adapter, ) yield backend backend.close() def _captured_scalings(backend: HuggingFaceDifferentialBackend) -> dict[tuple[int, str], float]: """Snapshot every ``LoraLayer.scaling[key]`` keyed by (id, key).""" import peft lora_cls: Any = peft.tuners.lora.LoraLayer out: dict[tuple[int, str], float] = {} for module in backend._peft_model.modules(): # type: ignore[attr-defined] if not isinstance(module, lora_cls): continue scaling = getattr(module, "scaling", None) if not isinstance(scaling, dict): continue for key, value in scaling.items(): out[(id(module), key)] = float(value) return out def test_lambda_sweep_monotonic_in_signal(hf_backend: HuggingFaceDifferentialBackend) -> None: """Divergence(@λ=0, @λ=1.25) > divergence(@λ=0, @λ=0) (which is 0).""" prompt = "The quick brown fox" with hf_backend.as_scaled_adapter(0.0) as v0: d0 = v0.next_token_dist(prompt, top_k=64) with hf_backend.as_scaled_adapter(1.25) as v_over: d_over = v_over.next_token_dist(prompt, top_k=64) div_at_zero = divergence(d0, d0, kind="js") div_at_overshoot = divergence(d0, d_over, kind="js") assert div_at_zero == pytest.approx(0.0, abs=1e-9), ( f"self-divergence at λ=0 should be ~0; got {div_at_zero}" ) assert div_at_overshoot > 1e-6, f"λ=1.25 should drift far from λ=0; got {div_at_overshoot}" def test_lambda_one_matches_finetuned_within_tolerance( hf_backend: HuggingFaceDifferentialBackend, ) -> None: """``as_scaled_adapter(1.0)`` should be functionally identical to ``as_finetuned()``.""" prompt = "hello" with hf_backend.as_finetuned() as ft: d_ft = ft.next_token_dist(prompt, top_k=32) with hf_backend.as_scaled_adapter(1.0) as v1: d1 = v1.next_token_dist(prompt, top_k=32) np.testing.assert_allclose(d_ft.logprobs, d1.logprobs, rtol=1e-5, atol=1e-6) def test_scaling_restored_on_clean_exit(hf_backend: HuggingFaceDifferentialBackend) -> None: """Every LoraLayer.scaling[key] is back to its original value after exit.""" before = _captured_scalings(hf_backend) with hf_backend.as_scaled_adapter(0.42) as v: v.next_token_dist("anything", top_k=8) after = _captured_scalings(hf_backend) assert before == after, "scaling table not restored after as_scaled_adapter context" def test_scaling_restored_on_exception(hf_backend: HuggingFaceDifferentialBackend) -> None: """Same restoration invariant when the body raises.""" before = _captured_scalings(hf_backend) with pytest.raises(RuntimeError, match="boom"): with hf_backend.as_scaled_adapter(0.7): raise RuntimeError("boom") after = _captured_scalings(hf_backend) assert before == after, "scaling table not restored after exception inside the context"