`8917f1f`

tests/mlx_smoke: darwin-arm64 MLX backend smoke + reproducible adapter fixture builder (C5)

Authored by

espadonne 3 weeks ago

SHA: 8917f1fcdef4ed155e01fe204c8adb512c979c26
Parents: c6f6de7
Tree: ee47ced

2 changed files

Status	File	+	-
A	`tests/fixtures/build_mlx_adapter.py`	84	0
A	`tests/integration/test_mlx_smoke.py`	93	0

tests/fixtures/build_mlx_adapter.pyadded

 +"""Reproducible MLX adapter fixture for the smoke test.
++
 +Run this once on darwin-arm64 with the ``[mlx]`` extra installed to
 +generate a tiny LoRA adapter under
 +``tests/fixtures/mlx_adapter_smollm2_135m/``. The integration test
 +at ``tests/integration/test_mlx_smoke.py`` uses the directory if it
 +exists, else skips with a pointer here.
++
 +This script is deliberately *not* vendored as a binary in the repo —
 +regenerating it from scratch stays reproducible, keeps the repo small,
 +and lets us bump the base model without re-checking binaries in.
++
 +Usage:
++
 +    # Prerequisites: darwin-arm64 + ``uv pip install -e ".[mlx]"``
 +    python tests/fixtures/build_mlx_adapter.py
 +"""
++
 +from __future__ import annotations
++
 +import sys
 +from pathlib import Path
++
++
 +def main() -> int:
 +    if sys.platform != "darwin":
 +        print("build_mlx_adapter.py requires darwin-arm64", file=sys.stderr)
 +        return 1
++
 +    try:
 +        import mlx.core as mx
 +        from mlx_lm import load
 +        from mlx_lm.tuner.utils import linear_to_lora_layers
 +    except ImportError as exc:
 +        print(
 +            f"mlx / mlx_lm not importable: {exc}\n"
 +            "Install the [mlx] extra: uv pip install -e '.[mlx]'",
 +            file=sys.stderr,
 +        )
 +        return 1
++
 +    model_id = "mlx-community/SmolLM2-135M-Instruct-4bit"
 +    out_dir = Path(__file__).parent / "mlx_adapter_smollm2_135m"
 +    out_dir.mkdir(parents=True, exist_ok=True)
++
 +    print(f"Loading {model_id} (will download on first run)…")
 +    model, _tokenizer = load(model_id)
++
 +    # Apply a tiny LoRA shim over the attention projections and
 +    # randomize the A/B weights deterministically.
 +    lora_config = {
 +        "rank": 4,
 +        "alpha": 8,
 +        "dropout": 0.0,
 +        "scale": 10.0,
 +        "keys": ["q_proj", "v_proj"],
 +    }
 +    linear_to_lora_layers(model, num_layers=2, config=lora_config)
++
 +    # Seed-scale lora_B so the adapter actually changes outputs.
 +    mx.random.seed(0)
 +    params = dict(model.trainable_parameters())
 +    for name, arr in params.items():
 +        if "lora_b" in name.lower():
 +            params[name] = mx.random.normal(arr.shape) * 0.05
++
 +    # Save adapters to the mlx_lm convention: <dir>/adapters.safetensors
 +    # plus an adapter_config.json stub mlx_lm.load can find.
 +    import json
++
 +    from mlx.utils import tree_flatten
 +    from safetensors.numpy import save_file as save_safetensors
++
 +    save_safetensors(
 +        str(out_dir / "adapters.safetensors"),
 +        {k: v.astype(mx.float16) for k, v in tree_flatten(params)},
 +    )
 +    (out_dir / "adapter_config.json").write_text(json.dumps(lora_config, indent=2))
 +    print(f"Wrote adapter fixture to {out_dir}")
 +    return 0
++
++
 +if __name__ == "__main__":
 +    raise SystemExit(main())

tests/integration/test_mlx_smoke.pyadded

 +"""C5: MLX backend smoke test (darwin-arm64-only).
++
 +Exercises ``MLXDifferentialBackend.next_token_dist`` and ``generate``
 +on a real mlx_lm-loaded model with a small LoRA adapter attached.
 +The adapter is built once via ``tests/fixtures/build_mlx_adapter.py``
 +(see that script's docstring); this test skips when the fixture
 +directory is absent rather than building it on the fly, since fixture
 +builds take ~30s and re-running the test should be cheap.
++
 +Skips on:
 +- non-darwin platforms (mlx is Apple Silicon only)
 +- non-arm64 architectures
 +- missing ``mlx_lm`` import
 +- missing fixture directory
 +"""
++
 +from __future__ import annotations
++
 +import math
 +import platform
 +import sys
 +from pathlib import Path
++
 +import numpy as np
 +import pytest
++
 +pytestmark = [pytest.mark.slow, pytest.mark.online]
++
 +_FIXTURE_DIR = Path(__file__).parent.parent / "fixtures" / "mlx_adapter_smollm2_135m"
 +_MODEL_ID = "mlx-community/SmolLM2-135M-Instruct-4bit"
++
++
 +def _platform_supports_mlx() -> bool:
 +    return sys.platform == "darwin" and platform.machine() == "arm64"
++
++
 +@pytest.fixture(scope="module")
 +def mlx_backend():
 +    if not _platform_supports_mlx():
 +        pytest.skip("MLX requires darwin-arm64")
 +    pytest.importorskip("mlx_lm", reason="install the [mlx] extra to run MLX tests")
++
 +    if not _FIXTURE_DIR.exists():
 +        pytest.skip(
 +            f"missing MLX adapter fixture at {_FIXTURE_DIR} — generate it via "
 +            f"`python tests/fixtures/build_mlx_adapter.py`"
 +        )
++
 +    from dlm_sway.backends.mlx import MLXDifferentialBackend
 +    from dlm_sway.core.model import ModelSpec
++
 +    backend = MLXDifferentialBackend(
 +        base_spec=ModelSpec(base=_MODEL_ID, kind="mlx"),
 +        adapter_path=_FIXTURE_DIR,
 +    )
 +    yield backend
 +    backend.close()
++
++
 +def test_next_token_dist_returns_finite_topk(mlx_backend) -> None:
 +    with mlx_backend.as_base() as b:
 +        d = b.next_token_dist("The capital of France is", top_k=32)
 +    assert d.token_ids.shape == (32,)
 +    assert d.logprobs.shape == (32,)
 +    assert np.all(np.isfinite(d.logprobs))
 +    # Top-k must be sorted in descending probability order.
 +    assert np.all(np.diff(d.logprobs) <= 1e-7)
++
++
 +def test_adapter_changes_distribution(mlx_backend) -> None:
 +    """The whole point of the differential backend: base ≠ ft."""
 +    prompt = "The adapter does"
 +    with mlx_backend.as_base() as b:
 +        base_dist = b.next_token_dist(prompt, top_k=32)
 +    with mlx_backend.as_finetuned() as f:
 +        ft_dist = f.next_token_dist(prompt, top_k=32)
 +    same_ids = np.array_equal(base_dist.token_ids, ft_dist.token_ids)
 +    if same_ids:
 +        assert not np.allclose(base_dist.logprobs, ft_dist.logprobs, atol=1e-5)
++
++
 +def test_logprob_of_finite(mlx_backend) -> None:
 +    with mlx_backend.as_base() as b:
 +        lp = b.logprob_of("The capital of France is", " Paris")
 +    assert math.isfinite(lp)
 +    assert lp < 0.0
++
++
 +def test_generate_returns_nonempty_string(mlx_backend) -> None:
 +    with mlx_backend.as_base() as b:
 +        out = b.generate("Hello", max_new_tokens=8, seed=0)
 +    assert isinstance(out, str)
 +    assert len(out) > 0