`5bec484`

backends/mlx: auto-convert PEFT adapter on load + content-hash cache (F01)

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 2 weeks ago

SHA: 5bec484b49e09aa88d6865fa413583a0e17d7292
Parents: 922ce3e
Tree: bee5aa6

2 changed files

Status	File	+	-
M	`src/dlm_sway/backends/mlx.py`	67	1
M	`tests/unit/test_mlx_convert.py`	68	0

src/dlm_sway/backends/mlx.pymodified

          mx, mlx_lm = _require_mlx()
          self._mx = mx
          self._spec = base_spec
 -        self._adapter_path = Path(adapter_path).expanduser().resolve()
 +        raw_path = Path(adapter_path).expanduser().resolve()
 +        # S24: when the user points us at a PEFT adapter (typical
 +        # `dlm export` output), auto-convert into the user's cache
 +        # so the headline `.dlm → sway` flow on MLX just works.
 +        # Cached by content hash so repeated runs skip the convert.
 +        self._adapter_path = _ensure_mlx_adapter(raw_path)
          # Load bare base (no adapter).
          self._base_model, self._tokenizer = mlx_lm.load(base_spec.base)
          self._active = None
 +def _ensure_mlx_adapter(adapter_path: Path) -> Path:
 +    """Auto-convert PEFT adapters to MLX-LM format on first load (S24).
++
 +    Detection is structural: if ``adapter_path/adapter_model.safetensors``
 +    exists, we treat it as PEFT and run the converter. If it already
 +    contains ``adapters.safetensors`` (mlx-lm's filename), we leave it
 +    alone — assumes the user converted manually or the dir is already
 +    MLX-shaped.
++
 +    Cached at ``${XDG_CACHE_HOME:-$HOME/.cache}/dlm-sway/mlx-converted/<sha>/``
 +    keyed on a hash of the source ``adapter_model.safetensors`` bytes.
 +    Repeated runs on the same adapter version skip conversion entirely
 +    (~10 ms hash + dir lookup).
 +    """
 +    if (adapter_path / "adapters.safetensors").exists():
 +        # Already in MLX format — pass through unchanged.
 +        return adapter_path
 +    if not (adapter_path / "adapter_model.safetensors").exists():
 +        # Neither MLX nor PEFT shape; let mlx_lm.load surface its own error.
 +        return adapter_path
++
 +    # Compute a content hash of the source PEFT safetensors. blake2b
 +    # in 16-byte digest mode is overkill on file IO but unambiguous —
 +    # different adapter versions never collide.
 +    import hashlib
++
 +    src_st = adapter_path / "adapter_model.safetensors"
 +    h = hashlib.blake2b(digest_size=16)
 +    with src_st.open("rb") as fh:
 +        for chunk in iter(lambda: fh.read(1024 * 1024), b""):
 +            h.update(chunk)
 +    sha = h.hexdigest()
++
 +    cache_root = _mlx_cache_root() / sha
 +    if (cache_root / "adapters.safetensors").exists() and (
 +        cache_root / "adapter_config.json"
 +    ).exists():
 +        return cache_root
++
 +    # First-run conversion. Import here to keep the cycle off the
 +    # import path of users who never touch MLX.
 +    from dlm_sway.backends._mlx_convert import convert_peft_to_mlx
++
 +    cache_root.mkdir(parents=True, exist_ok=True)
 +    convert_peft_to_mlx(adapter_path, cache_root, overwrite=True)
 +    return cache_root
++
++
 +def _mlx_cache_root() -> Path:
 +    """``$XDG_CACHE_HOME/dlm-sway/mlx-converted/`` (or ``~/.cache/...``).
++
 +    Honors XDG so Linux users get their conventional cache location;
 +    macOS users get ``~/.cache/...`` (XDG isn't standard on darwin
 +    but uv + many Python tools follow this convention there too).
 +    """
 +    import os
++
 +    base = os.environ.get("XDG_CACHE_HOME") or str(Path.home() / ".cache")
 +    return Path(base) / "dlm-sway" / "mlx-converted"
++
++
  def _log_softmax(x: np.ndarray, *, axis: int) -> np.ndarray:
      x_max = np.max(x, axis=axis, keepdims=True)
      y = x - x_max

tests/unit/test_mlx_convert.pymodified

              convert_peft_to_mlx(src, tmp_path / "mlx")
 +class TestEnsureMlxAdapterAutoConvert:
 +    """``MLXDifferentialBackend.__init__`` calls ``_ensure_mlx_adapter``
 +    to upgrade PEFT-shaped adapter dirs to MLX format on the fly. The
 +    function lives in ``backends/mlx.py`` so it doesn't pull mlx-lm
 +    when the path is already MLX-shaped."""
++
 +    def test_passes_through_when_dir_is_already_mlx_shape(self, tmp_path: Path) -> None:
 +        """Existing ``adapters.safetensors`` → no conversion, return
 +        the same path unchanged. (Manual conversions / pre-built MLX
 +        adapters from other tools must not be re-converted.)"""
 +        from dlm_sway.backends.mlx import _ensure_mlx_adapter
++
 +        mlx_dir = tmp_path / "mlx"
 +        mlx_dir.mkdir()
 +        save_file({}, str(mlx_dir / "adapters.safetensors"))
 +        (mlx_dir / "adapter_config.json").write_text('{"fine_tune_type":"lora"}')
 +        out = _ensure_mlx_adapter(mlx_dir)
 +        assert out == mlx_dir
++
 +    def test_auto_converts_peft_dir_into_cache(
 +        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        """A PEFT-shaped dir gets converted into XDG_CACHE_HOME on
 +        first call; the returned path is the cache dir, not the source."""
 +        from dlm_sway.backends.mlx import _ensure_mlx_adapter
++
 +        cache_root = tmp_path / "cache"
 +        monkeypatch.setenv("XDG_CACHE_HOME", str(cache_root))
++
 +        peft_dir = tmp_path / "peft"
 +        _write_synthetic_peft_adapter(peft_dir)
 +        out = _ensure_mlx_adapter(peft_dir)
++
 +        assert out != peft_dir
 +        assert (out / "adapters.safetensors").exists()
 +        assert (out / "adapter_config.json").exists()
 +        assert str(out).startswith(str(cache_root))
++
 +    def test_repeated_calls_short_circuit_on_cache_hit(
 +        self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        """Same PEFT bytes → same cache hash → second call returns the
 +        cached dir without re-converting (touch mtime to detect)."""
 +        from dlm_sway.backends.mlx import _ensure_mlx_adapter
++
 +        monkeypatch.setenv("XDG_CACHE_HOME", str(tmp_path / "cache"))
 +        peft_dir = tmp_path / "peft"
 +        _write_synthetic_peft_adapter(peft_dir)
++
 +        first = _ensure_mlx_adapter(peft_dir)
 +        first_mtime = (first / "adapters.safetensors").stat().st_mtime_ns
++
 +        # Second call — should NOT rewrite the file.
 +        second = _ensure_mlx_adapter(peft_dir)
 +        assert second == first
 +        assert (second / "adapters.safetensors").stat().st_mtime_ns == first_mtime
++
 +    def test_passes_through_unrecognized_dir(self, tmp_path: Path) -> None:
 +        """A directory with neither shape — let mlx_lm.load surface
 +        its own error rather than this helper second-guessing."""
 +        from dlm_sway.backends.mlx import _ensure_mlx_adapter
++
 +        empty = tmp_path / "empty"
 +        empty.mkdir()
 +        out = _ensure_mlx_adapter(empty)
 +        assert out == empty
++
++
  class TestModulesToSave:
      """``modules_to_save`` (e.g. embed_tokens, lm_head) must be skipped
      cleanly with a report entry, not crash the converter."""