"""S24 — end-to-end PEFT → MLX adapter conversion (darwin-arm64-only). Closes the F01 audit gap: ``dlm train`` writes a PEFT-shaped adapter, ``MLXDifferentialBackend`` is pointed at it, the backend auto-converts into the user's cache, ``mlx_lm.load`` consumes the result, scoring returns finite logprobs. This is the **prove-the-value** test the sprint file calls out — every other layer of testing (synthetic-input unit tests, CLI smoke tests) is upstream of this. If this passes locally on darwin-arm64, the headline ``.dlm → MLX`` flow works. Skips cleanly on: - non-darwin (mlx is Apple Silicon only) - non-arm64 - ``mlx_lm`` not installed (the ``[mlx]`` extra is optional) - ``peft`` / ``transformers`` not installed (the ``[hf]`` extra needed to *build* the source PEFT adapter) """ from __future__ import annotations import math import platform import sys from pathlib import Path import numpy as np import pytest pytestmark = [pytest.mark.slow, pytest.mark.online] # Default to the unquantized MLX repo because the 4-bit variant has # slipped into a gated/auth state on HF Hub. Either repo's adapter # slot works for the converter — the test only cares that mlx-lm # loads our converted ``adapters.safetensors``. _MODEL_ID = "mlx-community/SmolLM2-135M-Instruct" def _platform_supports_mlx() -> bool: return sys.platform == "darwin" and platform.machine() == "arm64" def _build_random_peft_lora(base_dir: Path, out_dir: Path) -> None: """Same deterministic LoRA the HF integration tests use, shipped here because we don't want to import from another test file.""" import torch from peft import LoraConfig, get_peft_model from transformers import AutoModelForCausalLM, AutoTokenizer torch.manual_seed(0) tokenizer = AutoTokenizer.from_pretrained(str(base_dir)) if tokenizer.pad_token_id is None: tokenizer.pad_token = tokenizer.eos_token base = AutoModelForCausalLM.from_pretrained(str(base_dir), torch_dtype=torch.float32) cfg = LoraConfig( r=8, lora_alpha=16, target_modules=["q_proj", "v_proj"], lora_dropout=0.0, bias="none", task_type="CAUSAL_LM", ) peft_model = get_peft_model(base, cfg) with torch.no_grad(): for name, param in peft_model.named_parameters(): if "lora_B" in name: param.copy_(torch.randn_like(param) * 0.05) peft_model.save_pretrained(str(out_dir)) tokenizer.save_pretrained(str(out_dir)) @pytest.fixture(scope="module") def peft_adapter(tiny_model_dir: Path, tmp_path_factory: pytest.TempPathFactory) -> Path: if not _platform_supports_mlx(): pytest.skip("MLX requires darwin-arm64") pytest.importorskip("peft", reason="needs the [hf] extra to build a PEFT adapter") out = tmp_path_factory.mktemp("peft-for-mlx-convert") _build_random_peft_lora(tiny_model_dir, out) return out @pytest.fixture(scope="module") def mlx_backend(peft_adapter: Path, tmp_path_factory: pytest.TempPathFactory): """Point the MLX backend at a PEFT-shaped adapter dir; the backend auto-converts into a tmp cache (XDG_CACHE_HOME redirected so we don't pollute the user's real cache).""" pytest.importorskip("mlx_lm", reason="install the [mlx] extra to run MLX tests") # Redirect the cache so this test doesn't write to the user's # ~/.cache/dlm-sway/. Each fixture invocation gets a fresh dir. import os cache_root = tmp_path_factory.mktemp("mlx-convert-cache") prev = os.environ.get("XDG_CACHE_HOME") os.environ["XDG_CACHE_HOME"] = str(cache_root) try: from dlm_sway.backends.mlx import MLXDifferentialBackend from dlm_sway.core.model import ModelSpec backend = MLXDifferentialBackend( base_spec=ModelSpec(base=_MODEL_ID, kind="mlx"), adapter_path=peft_adapter, ) yield backend, cache_root backend.close() finally: if prev is None: os.environ.pop("XDG_CACHE_HOME", None) else: os.environ["XDG_CACHE_HOME"] = prev def test_auto_conversion_writes_to_xdg_cache(mlx_backend) -> None: """The backend's __init__ must have populated the cache dir with an MLX-format adapter — proves the auto-convert path fired.""" _backend, cache_root = mlx_backend converted = list((cache_root / "dlm-sway" / "mlx-converted").glob("*")) assert len(converted) == 1, f"expected exactly one cached MLX adapter dir, got {converted}" cache_dir = converted[0] assert (cache_dir / "adapters.safetensors").exists() assert (cache_dir / "adapter_config.json").exists() def test_next_token_dist_returns_finite_topk_via_converted_adapter(mlx_backend) -> None: """The converted adapter, loaded via mlx_lm + scored via the MLX backend, must produce finite, well-ordered top-k logprobs.""" backend, _ = mlx_backend with backend.as_finetuned() as ft: d = ft.next_token_dist("The capital of France is", top_k=32) assert d.token_ids.shape == (32,) assert d.logprobs.shape == (32,) assert np.all(np.isfinite(d.logprobs)) assert np.all(np.diff(d.logprobs) <= 1e-7) # descending def test_logprob_of_finite_via_converted_adapter(mlx_backend) -> None: backend, _ = mlx_backend with backend.as_finetuned() as ft: lp = ft.logprob_of("The capital of France is", " Paris") assert math.isfinite(lp) assert lp < 0.0 def test_repeat_load_skips_reconvert( peft_adapter: Path, tmp_path_factory: pytest.TempPathFactory ) -> None: """Second backend instance against the same PEFT adapter must short-circuit on the cache and NOT rewrite the converted file.""" pytest.importorskip("mlx_lm", reason="install the [mlx] extra to run MLX tests") import os cache_root = tmp_path_factory.mktemp("mlx-convert-cache-2") prev = os.environ.get("XDG_CACHE_HOME") os.environ["XDG_CACHE_HOME"] = str(cache_root) try: from dlm_sway.backends.mlx import MLXDifferentialBackend from dlm_sway.core.model import ModelSpec b1 = MLXDifferentialBackend( base_spec=ModelSpec(base=_MODEL_ID, kind="mlx"), adapter_path=peft_adapter, ) cache_dir = next((cache_root / "dlm-sway" / "mlx-converted").glob("*")) first_mtime = (cache_dir / "adapters.safetensors").stat().st_mtime_ns b1.close() b2 = MLXDifferentialBackend( base_spec=ModelSpec(base=_MODEL_ID, kind="mlx"), adapter_path=peft_adapter, ) second_mtime = (cache_dir / "adapters.safetensors").stat().st_mtime_ns b2.close() assert second_mtime == first_mtime, ( "second backend init re-wrote the cached MLX adapter — cache short-circuit is broken" ) finally: if prev is None: os.environ.pop("XDG_CACHE_HOME", None) else: os.environ["XDG_CACHE_HOME"] = prev