`eb05e73`

feat(hardware): report MLX inference availability in doctor + pyproject mlx extra

Authored by

espadonne 3 weeks ago

SHA: eb05e739a7e8c59dd7da7e01fcaff0a98cce52f1
Parents: fc20ed1
Tree: 45ade83

5 changed files

Status	File	+
M	`docs/cli/reference.md`	1
M	`pyproject.toml`	7
M	`src/dlm/hardware/capabilities.py`	15
M	`src/dlm/hardware/render.py`	1
M	`tests/unit/hardware/test_capabilities.py`	49

docs/cli/reference.mdmodified

  | `--temp F` | 0.7 | Temperature. `0.0` = greedy decoding (deterministic). |
  | `--top-p F` | None | Top-p sampling. |
  | `--adapter NAME` | None | Select a named adapter from `training.adapters`. Required on multi-adapter documents; rejected on single-adapter ones. |
 +| `--backend {auto,pytorch,mlx}` | `auto` | Inference backend. `auto` picks MLX on Apple Silicon (when `uv sync --extra mlx` is installed), else PyTorch. |
  | `--verbose` | false | Print resolved `InferencePlan` on stderr. |
  Query is the CLI positional argument. Omit to read from stdin.

pyproject.tomlmodified

  cuda = [
      "bitsandbytes>=0.43",
+ ]
 +# Apple Silicon only (Sprint 21). `mlx` + `mlx-lm` wheels are darwin-arm64
 +# exclusives; env markers keep `uv sync --extra mlx` a no-op on non-Apple
 +# hosts so wheel resolution doesn't fail for Linux/CUDA contributors.
 +mlx = [
 +    "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
 +    "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
 +]
  [project.scripts]
  dlm = "dlm.cli.app:main"

src/dlm/hardware/capabilities.pymodified

      has_xformers: bool
      has_bitsandbytes: bool
      has_triton: bool
 +    has_mlx: bool
      torch_version: str
      cuda_version: str | None
      rocm_version: str | None
          has_xformers=_module_available("xformers"),
          has_bitsandbytes=_module_available("bitsandbytes") and backend == Backend.CUDA,
          has_triton=_module_available("triton"),
 +        has_mlx=_has_mlx_inference(backend),
          torch_version=str(torch.__version__),
          cuda_version=_cuda_version(backend, torch),
          rocm_version=_rocm_version(torch),
      return importlib.util.find_spec(name) is not None
 +def _has_mlx_inference(backend: Backend) -> bool:
 +    """True iff MLX inference is runnable on this host.
++
 +    Sprint 21: MLX is darwin-arm64 only. Off-platform installs of `mlx`
 +    via pip would be a packaging mistake, but we still gate on backend
 +    to avoid reporting True for a misconfigured CUDA box that happens
 +    to have an mlx dist lying around.
 +    """
 +    if backend != Backend.MPS:
 +        return False
 +    return _module_available("mlx") and _module_available("mlx_lm")
++
++
  def _cuda_version(backend: Backend, torch: object) -> str | None:
      if backend != Backend.CUDA:
          return None

src/dlm/hardware/render.pymodified

      lines.append(f"FlashAttention: {_bool(caps.has_flash_attention)}")
      lines.append(f"xFormers:       {_bool(caps.has_xformers)}")
      lines.append(f"Triton:         {_bool(caps.has_triton)}")
 +    lines.append(f"MLX inference:  {_bool(caps.has_mlx)}")
      lines.append(f"CPU cores:      {caps.cpu_cores}")
      lines.append(f"RAM:            {caps.ram_gb:.1f} GB")
      lines.append(f"Determinism:    {caps.determinism_class}")

tests/unit/hardware/test_capabilities.pymodified

          assert caps.has_flash_attention is False
 +class TestMlxAvailability:
 +    def test_non_mps_never_reports_mlx(self) -> None:
 +        # Off-Apple hosts: has_mlx is False regardless of dist metadata
 +        # (Sprint 21). The probe won't consult importlib on CUDA/CPU.
 +        with force_cuda(sm=(8, 0)):
 +            caps = probe()
 +        assert caps.has_mlx is False
 +        with force_cpu():
 +            caps = probe()
 +        assert caps.has_mlx is False
++
 +    def test_mps_reports_mlx_when_both_modules_installed(
 +        self, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        # Simulate both mlx + mlx_lm available.
 +        from dlm.hardware import capabilities as caps_mod
++
 +        real_avail = caps_mod._module_available
++
 +        def fake_available(name: str) -> bool:
 +            if name in ("mlx", "mlx_lm"):
 +                return True
 +            return real_avail(name)
++
 +        monkeypatch.setattr(caps_mod, "_module_available", fake_available)
 +        with force_mps():
 +            caps = probe()
 +        assert caps.has_mlx is True
++
 +    def test_mps_reports_no_mlx_when_mlx_lm_missing(
 +        self, monkeypatch: pytest.MonkeyPatch
 +    ) -> None:
 +        from dlm.hardware import capabilities as caps_mod
++
 +        real_avail = caps_mod._module_available
++
 +        def fake_available(name: str) -> bool:
 +            if name == "mlx":
 +                return True
 +            if name == "mlx_lm":
 +                return False
 +            return real_avail(name)
++
 +        monkeypatch.setattr(caps_mod, "_module_available", fake_available)
 +        with force_mps():
 +            caps = probe()
 +        assert caps.has_mlx is False
++
++
  class TestProbeCpu:
      def test_cpu_advisory_determinism(self) -> None:
          with force_cpu():