@@ -212,7 +212,12 @@ class MLXDifferentialBackend: |
| 212 | 212 | mx, mlx_lm = _require_mlx() |
| 213 | 213 | self._mx = mx |
| 214 | 214 | self._spec = base_spec |
| 215 | | - self._adapter_path = Path(adapter_path).expanduser().resolve() |
| 215 | + raw_path = Path(adapter_path).expanduser().resolve() |
| 216 | + # S24: when the user points us at a PEFT adapter (typical |
| 217 | + # `dlm export` output), auto-convert into the user's cache |
| 218 | + # so the headline `.dlm → sway` flow on MLX just works. |
| 219 | + # Cached by content hash so repeated runs skip the convert. |
| 220 | + self._adapter_path = _ensure_mlx_adapter(raw_path) |
| 216 | 221 | |
| 217 | 222 | # Load bare base (no adapter). |
| 218 | 223 | self._base_model, self._tokenizer = mlx_lm.load(base_spec.base) |
@@ -267,6 +272,67 @@ class MLXDifferentialBackend: |
| 267 | 272 | self._active = None |
| 268 | 273 | |
| 269 | 274 | |
| 275 | +def _ensure_mlx_adapter(adapter_path: Path) -> Path: |
| 276 | + """Auto-convert PEFT adapters to MLX-LM format on first load (S24). |
| 277 | + |
| 278 | + Detection is structural: if ``adapter_path/adapter_model.safetensors`` |
| 279 | + exists, we treat it as PEFT and run the converter. If it already |
| 280 | + contains ``adapters.safetensors`` (mlx-lm's filename), we leave it |
| 281 | + alone — assumes the user converted manually or the dir is already |
| 282 | + MLX-shaped. |
| 283 | + |
| 284 | + Cached at ``${XDG_CACHE_HOME:-$HOME/.cache}/dlm-sway/mlx-converted/<sha>/`` |
| 285 | + keyed on a hash of the source ``adapter_model.safetensors`` bytes. |
| 286 | + Repeated runs on the same adapter version skip conversion entirely |
| 287 | + (~10 ms hash + dir lookup). |
| 288 | + """ |
| 289 | + if (adapter_path / "adapters.safetensors").exists(): |
| 290 | + # Already in MLX format — pass through unchanged. |
| 291 | + return adapter_path |
| 292 | + if not (adapter_path / "adapter_model.safetensors").exists(): |
| 293 | + # Neither MLX nor PEFT shape; let mlx_lm.load surface its own error. |
| 294 | + return adapter_path |
| 295 | + |
| 296 | + # Compute a content hash of the source PEFT safetensors. blake2b |
| 297 | + # in 16-byte digest mode is overkill on file IO but unambiguous — |
| 298 | + # different adapter versions never collide. |
| 299 | + import hashlib |
| 300 | + |
| 301 | + src_st = adapter_path / "adapter_model.safetensors" |
| 302 | + h = hashlib.blake2b(digest_size=16) |
| 303 | + with src_st.open("rb") as fh: |
| 304 | + for chunk in iter(lambda: fh.read(1024 * 1024), b""): |
| 305 | + h.update(chunk) |
| 306 | + sha = h.hexdigest() |
| 307 | + |
| 308 | + cache_root = _mlx_cache_root() / sha |
| 309 | + if (cache_root / "adapters.safetensors").exists() and ( |
| 310 | + cache_root / "adapter_config.json" |
| 311 | + ).exists(): |
| 312 | + return cache_root |
| 313 | + |
| 314 | + # First-run conversion. Import here to keep the cycle off the |
| 315 | + # import path of users who never touch MLX. |
| 316 | + from dlm_sway.backends._mlx_convert import convert_peft_to_mlx |
| 317 | + |
| 318 | + cache_root.mkdir(parents=True, exist_ok=True) |
| 319 | + convert_peft_to_mlx(adapter_path, cache_root, overwrite=True) |
| 320 | + return cache_root |
| 321 | + |
| 322 | + |
| 323 | +def _mlx_cache_root() -> Path: |
| 324 | + """``$XDG_CACHE_HOME/dlm-sway/mlx-converted/`` (or ``~/.cache/...``). |
| 325 | + |
| 326 | + Honors XDG so Linux users get their conventional cache location; |
| 327 | + macOS users get ``~/.cache/...`` (XDG isn't standard on darwin |
| 328 | + but uv + many Python tools follow this convention there too). |
| 329 | + """ |
| 330 | + import os |
| 331 | + |
| 332 | + base = os.environ.get("XDG_CACHE_HOME") or str(Path.home() / ".cache") |
| 333 | + return Path(base) / "dlm-sway" / "mlx-converted" |
| 334 | + |
| 335 | + |
| 270 | 336 | def _log_softmax(x: np.ndarray, *, axis: int) -> np.ndarray: |
| 271 | 337 | x_max = np.max(x, axis=axis, keepdims=True) |
| 272 | 338 | y = x - x_max |