`468a436`

tests/integration: end-to-end PEFT→MLX convert+load+score (S24 prove-the-value)

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 2 weeks ago

SHA: 468a436ecfaf567a0a6db765a32c1a91306ea908
Parents: 7390b0a
Tree: b960d56

1 changed file

Status	File	+	-
A	`tests/integration/test_mlx_converter_e2e.py`	184	0

tests/integration/test_mlx_converter_e2e.pyadded

 +"""S24 — end-to-end PEFT → MLX adapter conversion (darwin-arm64-only).
++
 +Closes the F01 audit gap: ``dlm train`` writes a PEFT-shaped adapter,
 +``MLXDifferentialBackend`` is pointed at it, the backend auto-converts
 +into the user's cache, ``mlx_lm.load`` consumes the result, scoring
 +returns finite logprobs.
++
 +This is the **prove-the-value** test the sprint file calls out — every
 +other layer of testing (synthetic-input unit tests, CLI smoke tests)
 +is upstream of this. If this passes locally on darwin-arm64, the
 +headline ``.dlm → MLX`` flow works.
++
 +Skips cleanly on:
 +- non-darwin (mlx is Apple Silicon only)
 +- non-arm64
 +- ``mlx_lm`` not installed (the ``[mlx]`` extra is optional)
 +- ``peft`` / ``transformers`` not installed (the ``[hf]`` extra needed
 +  to *build* the source PEFT adapter)
 +"""
++
 +from __future__ import annotations
++
 +import math
 +import platform
 +import sys
 +from pathlib import Path
++
 +import numpy as np
 +import pytest
++
 +pytestmark = [pytest.mark.slow, pytest.mark.online]
++
++
 +# Default to the unquantized MLX repo because the 4-bit variant has
 +# slipped into a gated/auth state on HF Hub. Either repo's adapter
 +# slot works for the converter — the test only cares that mlx-lm
 +# loads our converted ``adapters.safetensors``.
 +_MODEL_ID = "mlx-community/SmolLM2-135M-Instruct"
++
++
 +def _platform_supports_mlx() -> bool:
 +    return sys.platform == "darwin" and platform.machine() == "arm64"
++
++
 +def _build_random_peft_lora(base_dir: Path, out_dir: Path) -> None:
 +    """Same deterministic LoRA the HF integration tests use, shipped
 +    here because we don't want to import from another test file."""
 +    import torch
 +    from peft import LoraConfig, get_peft_model
 +    from transformers import AutoModelForCausalLM, AutoTokenizer
++
 +    torch.manual_seed(0)
 +    tokenizer = AutoTokenizer.from_pretrained(str(base_dir))
 +    if tokenizer.pad_token_id is None:
 +        tokenizer.pad_token = tokenizer.eos_token
 +    base = AutoModelForCausalLM.from_pretrained(str(base_dir), torch_dtype=torch.float32)
 +    cfg = LoraConfig(
 +        r=8,
 +        lora_alpha=16,
 +        target_modules=["q_proj", "v_proj"],
 +        lora_dropout=0.0,
 +        bias="none",
 +        task_type="CAUSAL_LM",
 +    )
 +    peft_model = get_peft_model(base, cfg)
 +    with torch.no_grad():
 +        for name, param in peft_model.named_parameters():
 +            if "lora_B" in name:
 +                param.copy_(torch.randn_like(param) * 0.05)
 +    peft_model.save_pretrained(str(out_dir))
 +    tokenizer.save_pretrained(str(out_dir))
++
++
 +@pytest.fixture(scope="module")
 +def peft_adapter(tiny_model_dir: Path, tmp_path_factory: pytest.TempPathFactory) -> Path:
 +    if not _platform_supports_mlx():
 +        pytest.skip("MLX requires darwin-arm64")
 +    pytest.importorskip("peft", reason="needs the [hf] extra to build a PEFT adapter")
 +    out = tmp_path_factory.mktemp("peft-for-mlx-convert")
 +    _build_random_peft_lora(tiny_model_dir, out)
 +    return out
++
++
 +@pytest.fixture(scope="module")
 +def mlx_backend(peft_adapter: Path, tmp_path_factory: pytest.TempPathFactory):
 +    """Point the MLX backend at a PEFT-shaped adapter dir; the backend
 +    auto-converts into a tmp cache (XDG_CACHE_HOME redirected so we
 +    don't pollute the user's real cache)."""
 +    pytest.importorskip("mlx_lm", reason="install the [mlx] extra to run MLX tests")
++
 +    # Redirect the cache so this test doesn't write to the user's
 +    # ~/.cache/dlm-sway/. Each fixture invocation gets a fresh dir.
 +    import os
++
 +    cache_root = tmp_path_factory.mktemp("mlx-convert-cache")
 +    prev = os.environ.get("XDG_CACHE_HOME")
 +    os.environ["XDG_CACHE_HOME"] = str(cache_root)
 +    try:
 +        from dlm_sway.backends.mlx import MLXDifferentialBackend
 +        from dlm_sway.core.model import ModelSpec
++
 +        backend = MLXDifferentialBackend(
 +            base_spec=ModelSpec(base=_MODEL_ID, kind="mlx"),
 +            adapter_path=peft_adapter,
 +        )
 +        yield backend, cache_root
 +        backend.close()
 +    finally:
 +        if prev is None:
 +            os.environ.pop("XDG_CACHE_HOME", None)
 +        else:
 +            os.environ["XDG_CACHE_HOME"] = prev
++
++
 +def test_auto_conversion_writes_to_xdg_cache(mlx_backend) -> None:
 +    """The backend's __init__ must have populated the cache dir with
 +    an MLX-format adapter — proves the auto-convert path fired."""
 +    _backend, cache_root = mlx_backend
 +    converted = list((cache_root / "dlm-sway" / "mlx-converted").glob("*"))
 +    assert len(converted) == 1, f"expected exactly one cached MLX adapter dir, got {converted}"
 +    cache_dir = converted[0]
 +    assert (cache_dir / "adapters.safetensors").exists()
 +    assert (cache_dir / "adapter_config.json").exists()
++
++
 +def test_next_token_dist_returns_finite_topk_via_converted_adapter(mlx_backend) -> None:
 +    """The converted adapter, loaded via mlx_lm + scored via the MLX
 +    backend, must produce finite, well-ordered top-k logprobs."""
 +    backend, _ = mlx_backend
 +    with backend.as_finetuned() as ft:
 +        d = ft.next_token_dist("The capital of France is", top_k=32)
 +    assert d.token_ids.shape == (32,)
 +    assert d.logprobs.shape == (32,)
 +    assert np.all(np.isfinite(d.logprobs))
 +    assert np.all(np.diff(d.logprobs) <= 1e-7)  # descending
++
++
 +def test_logprob_of_finite_via_converted_adapter(mlx_backend) -> None:
 +    backend, _ = mlx_backend
 +    with backend.as_finetuned() as ft:
 +        lp = ft.logprob_of("The capital of France is", " Paris")
 +    assert math.isfinite(lp)
 +    assert lp < 0.0
++
++
 +def test_repeat_load_skips_reconvert(
 +    peft_adapter: Path, tmp_path_factory: pytest.TempPathFactory
 +) -> None:
 +    """Second backend instance against the same PEFT adapter must
 +    short-circuit on the cache and NOT rewrite the converted file."""
 +    pytest.importorskip("mlx_lm", reason="install the [mlx] extra to run MLX tests")
++
 +    import os
++
 +    cache_root = tmp_path_factory.mktemp("mlx-convert-cache-2")
 +    prev = os.environ.get("XDG_CACHE_HOME")
 +    os.environ["XDG_CACHE_HOME"] = str(cache_root)
 +    try:
 +        from dlm_sway.backends.mlx import MLXDifferentialBackend
 +        from dlm_sway.core.model import ModelSpec
++
 +        b1 = MLXDifferentialBackend(
 +            base_spec=ModelSpec(base=_MODEL_ID, kind="mlx"),
 +            adapter_path=peft_adapter,
 +        )
 +        cache_dir = next((cache_root / "dlm-sway" / "mlx-converted").glob("*"))
 +        first_mtime = (cache_dir / "adapters.safetensors").stat().st_mtime_ns
 +        b1.close()
++
 +        b2 = MLXDifferentialBackend(
 +            base_spec=ModelSpec(base=_MODEL_ID, kind="mlx"),
 +            adapter_path=peft_adapter,
 +        )
 +        second_mtime = (cache_dir / "adapters.safetensors").stat().st_mtime_ns
 +        b2.close()
++
 +        assert second_mtime == first_mtime, (
 +            "second backend init re-wrote the cached MLX adapter — cache short-circuit is broken"
 +        )
 +    finally:
 +        if prev is None:
 +            os.environ.pop("XDG_CACHE_HOME", None)
 +        else:
 +            os.environ["XDG_CACHE_HOME"] = prev