tenseleyflow/documentlanguagemodel / fc1bd69

Browse files

Add mlx-serve export target

Authored by espadonne
SHA
fc1bd697666b2f6d760ea21062e9b46ad2bac3ae
Parents
787628d
Tree
cb96c6e

13 changed files

StatusFile+-
M README.md 18 8
M docs/cli/reference.md 1 1
A docs/cookbook/multi-target-export.md 175 0
A docs/format/export-manifest.md 95 0
M docs/getting-started/first-export.md 17 0
M docs/index.md 3 3
M mkdocs.yml 2 0
M src/dlm/cli/commands.py 79 1
M src/dlm/export/targets/__init__.py 9 0
A src/dlm/export/targets/mlx_serve.py 272 0
M tests/unit/cli/test_export_target_flag.py 20 0
A tests/unit/export/targets/test_mlx_serve_argv.py 173 0
M tests/unit/export/targets/test_registry.py 3 2
README.mdmodified
@@ -15,12 +15,12 @@ A `.dlm` can be:
15
 
15
 
16
 DLM trains LoRA / QLoRA / DoRA adapters on real pretrained bases, keeps a replay
16
 DLM trains LoRA / QLoRA / DoRA adapters on real pretrained bases, keeps a replay
17
 history so retrains do not silently forget, and exports local runtimes such as
17
 history so retrains do not silently forget, and exports local runtimes such as
18
-Ollama and `llama-server`.
18
+Ollama, `llama-server`, `vllm`, and `mlx-serve`.
19
 
19
 
20
 **Status:** pre-v1.0, but far beyond the original MVP framing. The core
20
 **Status:** pre-v1.0, but far beyond the original MVP framing. The core
21
 author/train/prompt/export/pack/share loop is real, and newer runtime-target
21
 author/train/prompt/export/pack/share loop is real, and newer runtime-target
22
 work is landing incrementally. Current export targets are `ollama`,
22
 work is landing incrementally. Current export targets are `ollama`,
23
-`llama-server`, and `vllm`.
23
+`llama-server`, `vllm`, and `mlx-serve`.
24
 
24
 
25
 ## What A `.dlm` Actually Is
25
 ## What A `.dlm` Actually Is
26
 
26
 
@@ -79,8 +79,8 @@ DLM sits in the gap:
79
   `dlm train --watch`, `dlm metrics`, and `dlm doctor` are all part of the
79
   `dlm train --watch`, `dlm metrics`, and `dlm doctor` are all part of the
80
   normal workflow now.
80
   normal workflow now.
81
 - **Export beyond the original Ollama-only story.** DLM still does explicit
81
 - **Export beyond the original Ollama-only story.** DLM still does explicit
82
-  Ollama exports with pinned templates, and now also emits `llama-server`
82
+  Ollama exports with pinned templates, and now also emits `llama-server`,
83
-  launch artifacts against the same GGUF path.
83
+  `vllm`, and `mlx-serve` launch artifacts for local runtime targets.
84
 - **Close the eval loop.** `dlm harvest` can pull failing `sway`-style probe
84
 - **Close the eval loop.** `dlm harvest` can pull failing `sway`-style probe
85
   reports back into the document as new training examples.
85
   reports back into the document as new training examples.
86
 - **Pack and share reproducibly.** `.dlm.pack`, verification, push/pull, and
86
 - **Pack and share reproducibly.** `.dlm.pack`, verification, push/pull, and
@@ -90,10 +90,10 @@ DLM sits in the gap:
90
 
90
 
91
 | Tier | Training | Inference / export |
91
 | Tier | Training | Inference / export |
92
 |---|---|---|
92
 |---|---|---|
93
-| NVIDIA CUDA (SM ≥ 8.0) | bf16 + QLoRA 4-bit + FlashAttention | Ollama, GGUF export, `llama-server` launch artifacts |
93
+| NVIDIA CUDA (SM ≥ 8.0) | bf16 + QLoRA 4-bit + FlashAttention | Ollama, GGUF export, `llama-server`, `vllm` |
94
-| NVIDIA CUDA (SM < 8.0) | fp16 LoRA | Ollama, GGUF export, `llama-server` launch artifacts |
94
+| NVIDIA CUDA (SM < 8.0) | fp16 LoRA | Ollama, GGUF export, `llama-server`, `vllm` |
95
-| Apple Silicon (MPS) | fp16 or fp32 LoRA depending on doctor plan | Ollama, selected MLX inference paths, GGUF export |
95
+| Apple Silicon (MPS) | fp16 or fp32 LoRA depending on doctor plan | Ollama, selected MLX inference paths, GGUF export, `vllm` (conservative Metal defaults), `mlx-serve` |
96
-| CPU | inference-first; training refused above small bases unless forced | GGUF export, Ollama, `llama-server` launch artifacts |
96
+| CPU | inference-first; training refused above small bases unless forced | GGUF export, Ollama, `llama-server` |
97
 | AMD ROCm | experimental | ROCm-oriented llama.cpp flows |
97
 | AMD ROCm | experimental | ROCm-oriented llama.cpp flows |
98
 
98
 
99
 See [docs/hardware](./docs/hardware/memory-estimates.md) and
99
 See [docs/hardware](./docs/hardware/memory-estimates.md) and
@@ -133,6 +133,13 @@ scripts/bump-llama-cpp.sh build
133
 # If you want the llama.cpp HTTP target too:
133
 # If you want the llama.cpp HTTP target too:
134
 scripts/bump-llama-cpp.sh build --with-server
134
 scripts/bump-llama-cpp.sh build --with-server
135
 
135
 
136
+# If you want the Apple Silicon MLX HTTP target:
137
+uv sync --extra mlx
138
+
139
+# If you want the vLLM HTTP target:
140
+# install a compatible vllm runtime separately; DLM writes launch artifacts
141
+# but does not bundle the server runtime itself.
142
+
136
 uv run dlm --help
143
 uv run dlm --help
137
 ```
144
 ```
138
 
145
 
@@ -276,6 +283,8 @@ uv run dlm metrics mydoc.dlm
276
 ```sh
283
 ```sh
277
 uv run dlm export mydoc.dlm --target ollama --name mydoc
284
 uv run dlm export mydoc.dlm --target ollama --name mydoc
278
 uv run dlm export mydoc.dlm --target llama-server --no-smoke
285
 uv run dlm export mydoc.dlm --target llama-server --no-smoke
286
+uv run dlm export mydoc.dlm --target vllm --no-smoke
287
+uv run dlm export mydoc.dlm --target mlx-serve --no-smoke
279
 uv run dlm pack mydoc.dlm --include-exports
288
 uv run dlm pack mydoc.dlm --include-exports
280
 uv run dlm verify mydoc.dlm.pack
289
 uv run dlm verify mydoc.dlm.pack
281
 ```
290
 ```
@@ -319,6 +328,7 @@ See the [CLI reference](./docs/cli/reference.md) for the full flag surface.
319
 - [Multimodal training](./docs/cookbook/multimodal-training.md)
328
 - [Multimodal training](./docs/cookbook/multimodal-training.md)
320
 - [Audio training](./docs/cookbook/audio-training.md)
329
 - [Audio training](./docs/cookbook/audio-training.md)
321
 - [Probe-driven training / sway harvest](./docs/cookbook/probe-driven-training.md)
330
 - [Probe-driven training / sway harvest](./docs/cookbook/probe-driven-training.md)
331
+- [Multi-target export](./docs/cookbook/multi-target-export.md)
322
 - [CLI reference](./docs/cli/reference.md)
332
 - [CLI reference](./docs/cli/reference.md)
323
 - [Architecture](./docs/architecture.md)
333
 - [Architecture](./docs/architecture.md)
324
 - [Determinism](./docs/determinism.md)
334
 - [Determinism](./docs/determinism.md)
docs/cli/reference.mdmodified
@@ -203,7 +203,7 @@ dlm export <path> [--target NAME] [--quant Q] [--merged [--dequantize]]
203
 
203
 
204
 | Option | Default | Notes |
204
 | Option | Default | Notes |
205
 |---|---|---|
205
 |---|---|---|
206
-| `--target NAME` | `ollama` | Export destination. Sprint 41 currently supports `ollama`, `llama-server`, and `vllm`. The `llama-server` path writes launch artifacts against the existing GGUF export and uses the shared OpenAI-compatible HTTP smoke harness; the `vllm` path writes `vllm_launch.sh` + `vllm_config.json` against the local adapter layout and ignores GGUF-only flags. On Apple Silicon, the generated `vllm` launch path forces the documented low-risk `vllm-metal` settings (`VLLM_METAL_USE_PAGED_ATTENTION=0`, `VLLM_METAL_MEMORY_FRACTION=auto`) and caps `--max-model-len` to the document's `training.sequence_len`. |
206
+| `--target NAME` | `ollama` | Export destination. Sprint 41 currently supports `ollama`, `llama-server`, `vllm`, and `mlx-serve`. The `llama-server` path writes launch artifacts against the existing GGUF export and uses the shared OpenAI-compatible HTTP smoke harness. The `vllm` path writes `vllm_launch.sh` + `vllm_config.json` against the local adapter layout and ignores GGUF-only flags. On Apple Silicon, the generated `vllm` launch path forces the documented low-risk `vllm-metal` settings (`VLLM_METAL_USE_PAGED_ATTENTION=0`, `VLLM_METAL_MEMORY_FRACTION=auto`) and caps `--max-model-len` to the document's `training.sequence_len`. The `mlx-serve` path is Apple Silicon only, writes `mlx_serve_launch.sh` plus a staged MLX adapter directory, and currently supports text bases only. |
207
 | `--quant Q` | frontmatter.export.default_quant | `Q4_K_M` / `Q5_K_M` / `Q6_K` / `Q8_0` / `F16`. |
207
 | `--quant Q` | frontmatter.export.default_quant | `Q4_K_M` / `Q5_K_M` / `Q6_K` / `Q8_0` / `F16`. |
208
 | `--merged` | false | Merge LoRA into base before quantizing. |
208
 | `--merged` | false | Merge LoRA into base before quantizing. |
209
 | `--dequantize` | false | Required with `--merged` on a QLoRA adapter (pitfall #3). |
209
 | `--dequantize` | false | Required with `--merged` on a QLoRA adapter (pitfall #3). |
docs/cookbook/multi-target-export.mdadded
@@ -0,0 +1,175 @@
1
+# Multi-target export
2
+
3
+`dlm export` is no longer just an Ollama registration path. The same
4
+trained store can now emit local runtime artifacts for four targets:
5
+
6
+- `ollama` for managed local registration plus the existing Modelfile flow
7
+- `llama-server` for GGUF-backed OpenAI-compatible HTTP serving via vendored
8
+  `llama.cpp`
9
+- `vllm` for HF-snapshot plus LoRA-module serving on machines that can run
10
+  `vllm`
11
+- `mlx-serve` for Apple Silicon text serving through `mlx_lm.server`
12
+
13
+Use this when you want one training loop but different local runtimes for
14
+prompting, evaluation harnesses, agents, or deployment experiments.
15
+
16
+## Quick map
17
+
18
+| Target | Best for | Artifact shape | Smoke path |
19
+|---|---|---|---|
20
+| `ollama` | Easiest local chat loop | GGUF + `Modelfile` + local registration | existing Ollama smoke |
21
+| `llama-server` | GGUF-backed OpenAI-compatible server | `base.<quant>.gguf` + `adapter.gguf` + `chat-template.jinja` + `llama-server_launch.sh` | shared HTTP smoke |
22
+| `vllm` | HF-snapshot + LoRA serving on supported hosts | `vllm_launch.sh` + `vllm_config.json` + staged adapters | shared HTTP smoke |
23
+| `mlx-serve` | Apple Silicon text serving without GGUF conversion | `mlx_serve_launch.sh` + staged MLX adapter dir | shared HTTP smoke |
24
+
25
+## Prerequisites
26
+
27
+### Ollama
28
+
29
+```sh
30
+brew install ollama
31
+```
32
+
33
+### llama-server
34
+
35
+```sh
36
+scripts/bump-llama-cpp.sh build --with-server
37
+```
38
+
39
+That compiles the vendored `llama-server` binary alongside the GGUF tooling.
40
+
41
+### vLLM
42
+
43
+Install a compatible `vllm` runtime in the environment you plan to launch
44
+from. DLM writes the launch/config artifacts, but it does not bundle the
45
+server runtime.
46
+
47
+On Apple Silicon, the generated `vllm` launch path is deliberately cautious:
48
+
49
+- `VLLM_METAL_USE_PAGED_ATTENTION=0`
50
+- `VLLM_METAL_MEMORY_FRACTION=auto`
51
+- `--max-model-len` capped to the document's `training.sequence_len`
52
+
53
+Those defaults exist to avoid the Metal OOM / hang pattern that shows up when
54
+`vllm-metal` blindly asks for the base model's full context window.
55
+
56
+### MLX-serve
57
+
58
+```sh
59
+uv sync --extra mlx
60
+```
61
+
62
+`mlx-serve` is Apple Silicon only. DLM refuses it on CUDA, ROCm, and CPU-only
63
+hosts, and this Sprint 41 slice only supports text bases on that target.
64
+
65
+## Common exports
66
+
67
+### Ollama
68
+
69
+```sh
70
+uv run dlm export tutor.dlm --target ollama --name my-tutor
71
+```
72
+
73
+This is the classic DLM path: GGUF conversion, explicit Go-template
74
+`Modelfile`, optional registration, and an Ollama smoke prompt.
75
+
76
+### llama-server
77
+
78
+```sh
79
+uv run dlm export tutor.dlm --target llama-server
80
+bash ~/.dlm/store/<dlm_id>/exports/Q4_K_M/llama-server_launch.sh
81
+```
82
+
83
+This reuses the GGUF export artifacts and adds:
84
+
85
+- `chat-template.jinja`
86
+- `llama-server_launch.sh`
87
+- `target: "llama-server"` in `export_manifest.json`
88
+
89
+The launch script binds `127.0.0.1` and speaks `/v1/chat/completions`.
90
+
91
+### vLLM
92
+
93
+```sh
94
+uv run dlm export tutor.dlm --target vllm
95
+bash ~/.dlm/store/<dlm_id>/exports/vllm/vllm_launch.sh
96
+```
97
+
98
+This path stages local LoRA modules and writes:
99
+
100
+- `vllm_launch.sh`
101
+- `vllm_config.json`
102
+- `exports/vllm/adapters/...`
103
+
104
+Flags that only matter to GGUF or Ollama are ignored with a banner:
105
+`--quant`, `--merged`, `--dequantize`, `--no-template`, `--skip-ollama`,
106
+`--no-imatrix`, `--draft`, `--no-draft`.
107
+
108
+### MLX-serve
109
+
110
+```sh
111
+uv run dlm export tutor.dlm --target mlx-serve
112
+bash ~/.dlm/store/<dlm_id>/exports/mlx-serve/mlx_serve_launch.sh
113
+```
114
+
115
+This path stages an MLX-loadable adapter directory and writes:
116
+
117
+- `mlx_serve_launch.sh`
118
+- `exports/mlx-serve/adapter/` or one named adapter directory
119
+- `target: "mlx-serve"` in `export_manifest.json`
120
+
121
+`mlx-serve` also ignores the GGUF/Ollama-only flags above, plus `--name`.
122
+
123
+## Multi-adapter behavior
124
+
125
+The runtime targets split into two families:
126
+
127
+- `ollama` and `llama-server` can reuse the GGUF weighted-merge path for
128
+  `--adapter-mix`
129
+- `vllm` and `mlx-serve` work from local adapter directories
130
+
131
+For `vllm`:
132
+
133
+- single-adapter docs export one staged module
134
+- multi-adapter docs without `--adapter` export every named adapter as a
135
+  `--lora-modules` list
136
+- `--adapter-mix` exports the staged composite adapter instead
137
+
138
+For `mlx-serve`:
139
+
140
+- single-adapter docs export the current flat adapter
141
+- multi-adapter docs must choose one adapter with `--adapter`, or pass
142
+  `--adapter-mix` to export the staged composite adapter
143
+
144
+That "one adapter at a time" rule is intentional: this target is a simple
145
+local-serving path, not a dynamic multi-LoRA router.
146
+
147
+## Smoke behavior
148
+
149
+All three HTTP targets use the shared OpenAI-compatible smoke harness:
150
+
151
+1. reserve a loopback port
152
+2. launch the target-specific server command
153
+3. poll `/v1/models`
154
+4. POST `/v1/chat/completions`
155
+5. record the first non-empty line in the store manifest
156
+
157
+Skip it with `--no-smoke` when the runtime is not installed or you want the
158
+artifacts only.
159
+
160
+## Inspecting what got written
161
+
162
+Every export writes `export_manifest.json` under its target directory. The
163
+important fields are:
164
+
165
+- `target`
166
+- `quant`
167
+- `artifacts`
168
+- `adapter_version`
169
+- `base_model_hf_id`
170
+- `base_model_revision`
171
+
172
+The per-store `manifest.json` also gets an appended `exports[-1]` row with the
173
+same `target` plus the smoke first line when a smoke test ran.
174
+
175
+See [Export manifest](../format/export-manifest.md) for the exact schema.
docs/format/export-manifest.mdadded
@@ -0,0 +1,95 @@
1
+# Export manifest
2
+
3
+Every `dlm export` writes an `export_manifest.json` inside the export directory.
4
+It is the target-local record of what DLM emitted, separate from the broader
5
+per-store `manifest.json`.
6
+
7
+Examples:
8
+
9
+- `~/.dlm/store/<dlm_id>/exports/Q4_K_M/export_manifest.json`
10
+- `~/.dlm/store/<dlm_id>/exports/vllm/export_manifest.json`
11
+- `~/.dlm/store/<dlm_id>/exports/mlx-serve/export_manifest.json`
12
+
13
+## What it records
14
+
15
+The manifest captures:
16
+
17
+- `target`: which runtime this export was prepared for
18
+- `quant`: the export family (`Q4_K_M`, `Q8_0`, `hf`, ...)
19
+- `merged` / `dequantized`: whether LoRA weights were merged into the base
20
+- `created_at` and `created_by`
21
+- `llama_cpp_tag` when the target depends on vendored `llama.cpp`
22
+- `base_model_hf_id` and `base_model_revision`
23
+- `adapter_version`
24
+- `artifacts`: every emitted file with relative path, sha256, and size
25
+
26
+The schema is strict and round-trips through the Pydantic model in
27
+`src/dlm/export/manifest.py`.
28
+
29
+## Example
30
+
31
+```json
32
+{
33
+  "target": "llama-server",
34
+  "quant": "Q4_K_M",
35
+  "merged": false,
36
+  "dequantized": false,
37
+  "ollama_name": null,
38
+  "created_at": "2026-04-23T18:42:00",
39
+  "created_by": "dlm-0.1.0",
40
+  "llama_cpp_tag": "b4281",
41
+  "base_model_hf_id": "HuggingFaceTB/SmolLM2-135M-Instruct",
42
+  "base_model_revision": "4c0d2...",
43
+  "adapter_version": 3,
44
+  "artifacts": [
45
+    {
46
+      "path": "base.Q4_K_M.gguf",
47
+      "sha256": "…",
48
+      "size_bytes": 47211904
49
+    },
50
+    {
51
+      "path": "adapter.gguf",
52
+      "sha256": "…",
53
+      "size_bytes": 3145728
54
+    },
55
+    {
56
+      "path": "llama-server_launch.sh",
57
+      "sha256": "…",
58
+      "size_bytes": 312
59
+    }
60
+  ]
61
+}
62
+```
63
+
64
+## `target`
65
+
66
+`target` is now the load-bearing field for Sprint 41’s runtime split.
67
+
68
+Current values:
69
+
70
+- `ollama`
71
+- `llama-server`
72
+- `vllm`
73
+- `mlx-serve`
74
+
75
+That lets downstream tooling distinguish:
76
+
77
+- a GGUF + Modelfile export meant for Ollama
78
+- a GGUF-backed OpenAI-compatible launch artifact set
79
+- an HF-snapshot + LoRA-module export for `vllm`
80
+- an MLX adapter export for Apple Silicon serving
81
+
82
+## Relationship to the store manifest
83
+
84
+`export_manifest.json` is per-export and artifact-focused.
85
+
86
+The store-level `manifest.json` keeps the running narrative in `exports[]`:
87
+
88
+- when the export happened
89
+- which `target` it used
90
+- GGUF checksums when present
91
+- `ollama_name` when relevant
92
+- the first smoke output line when a smoke test ran
93
+
94
+Use `export_manifest.json` when you need exact artifact provenance for one
95
+export directory. Use `manifest.json` when you want the store’s full history.
docs/getting-started/first-export.mdmodified
@@ -4,6 +4,11 @@
4
 Modelfile with an explicit Go `text/template` (no fuzzy matching),
4
 Modelfile with an explicit Go `text/template` (no fuzzy matching),
5
 registers the model with `ollama create`, and runs a smoke prompt.
5
 registers the model with `ollama create`, and runs a smoke prompt.
6
 
6
 
7
+That is still the default path, but it is no longer the only one. Sprint 41
8
+also adds local runtime targets such as `llama-server`, `vllm`, and
9
+`mlx-serve`; see the [multi-target export cookbook](../cookbook/multi-target-export.md)
10
+once you want an OpenAI-compatible local server instead of an Ollama model.
11
+
7
 ## Prerequisites
12
 ## Prerequisites
8
 
13
 
9
 - `vendor/llama.cpp` submodule is built:
14
 - `vendor/llama.cpp` submodule is built:
@@ -80,6 +85,18 @@ $ uv run dlm export tutor.dlm --quant Q4_K_M --skip-ollama
80
 Useful on CI runners without the Ollama daemon installed. The GGUFs
85
 Useful on CI runners without the Ollama daemon installed. The GGUFs
81
 land in `exports/Q4_K_M/`; wire them into your own runtime.
86
 land in `exports/Q4_K_M/`; wire them into your own runtime.
82
 
87
 
88
+## Other runtime targets
89
+
90
+Once the basic GGUF/Ollama flow is familiar, the same store can export to:
91
+
92
+- `--target llama-server` for a vendored `llama.cpp` HTTP server
93
+- `--target vllm` for HF-snapshot + LoRA-module serving
94
+- `--target mlx-serve` for Apple Silicon text serving through `mlx_lm.server`
95
+
96
+Those targets have different prerequisites and artifact layouts, so they live
97
+in the [multi-target export cookbook](../cookbook/multi-target-export.md)
98
+instead of this first-run page.
99
+
83
 ## Next
100
 ## Next
84
 
101
 
85
 Want to send the whole training history to a friend? The
102
 Want to send the whole training history to a friend? The
docs/index.mdmodified
@@ -10,7 +10,7 @@ A `.dlm` can be a hand-authored training doc, a directive-driven entrypoint
10
 into a codebase, a multi-adapter project with learned routing, or a selected
10
 into a codebase, a multi-adapter project with learned routing, or a selected
11
 multimodal / audio-language document. DLM trains LoRA / QLoRA / DoRA adapters
11
 multimodal / audio-language document. DLM trains LoRA / QLoRA / DoRA adapters
12
 on real pretrained bases, keeps replay history, and exports local runtimes such
12
 on real pretrained bases, keeps replay history, and exports local runtimes such
13
-as Ollama and `llama-server`.
13
+as Ollama, `llama-server`, `vllm`, and `mlx-serve`.
14
 
14
 
15
 ## What DLM Ships Today
15
 ## What DLM Ships Today
16
 
16
 
@@ -27,7 +27,7 @@ as Ollama and `llama-server`.
27
   persona lanes inside one project
27
   persona lanes inside one project
28
 - **Local iteration UX** with `prompt`, `repl`, `train --watch`, `metrics`,
28
 - **Local iteration UX** with `prompt`, `repl`, `train --watch`, `metrics`,
29
   and `doctor`
29
   and `doctor`
30
-- **Runtime export** to `ollama` and `llama-server`
30
+- **Runtime export** to `ollama`, `llama-server`, `vllm`, and `mlx-serve`
31
 - **Probe-driven improvement** through `sway`-style harvest flows
31
 - **Probe-driven improvement** through `sway`-style harvest flows
32
 
32
 
33
 ## 30-Second Demo
33
 ## 30-Second Demo
@@ -49,7 +49,7 @@ $ uv run dlm export tutor.dlm --target ollama --name my-tutor
49
 | Train across a real repo | [Training across codebases](cookbook/training-across-codebases.md) |
49
 | Train across a real repo | [Training across codebases](cookbook/training-across-codebases.md) |
50
 | Use named adapters and routing | [Multi-adapter](cookbook/multi-adapter.md) and [Learned adapter gate](cookbook/learned-adapter-gate.md) |
50
 | Use named adapters and routing | [Multi-adapter](cookbook/multi-adapter.md) and [Learned adapter gate](cookbook/learned-adapter-gate.md) |
51
 | Work with images or audio | [Multimodal training](cookbook/multimodal-training.md) and [Audio training](cookbook/audio-training.md) |
51
 | Work with images or audio | [Multimodal training](cookbook/multimodal-training.md) and [Audio training](cookbook/audio-training.md) |
52
-| Export or ship a model | [CLI reference](cli/reference.md) and [Determinism](determinism.md) |
52
+| Export or ship a model | [Multi-target export](cookbook/multi-target-export.md), [CLI reference](cli/reference.md), and [Determinism](determinism.md) |
53
 | Pull eval failures back into training | [Probe-driven training](cookbook/probe-driven-training.md) |
53
 | Pull eval failures back into training | [Probe-driven training](cookbook/probe-driven-training.md) |
54
 
54
 
55
 ## Status
55
 ## Status
mkdocs.ymlmodified
@@ -58,6 +58,7 @@ nav:
58
   - The .dlm format:
58
   - The .dlm format:
59
       - Frontmatter: format/frontmatter.md
59
       - Frontmatter: format/frontmatter.md
60
       - Sections: format/sections.md
60
       - Sections: format/sections.md
61
+      - Export manifest: format/export-manifest.md
61
       - .dlm/training.yaml: format/dlm-training-yaml.md
62
       - .dlm/training.yaml: format/dlm-training-yaml.md
62
       - .dlm/ignore: format/dlm-ignore.md
63
       - .dlm/ignore: format/dlm-ignore.md
63
   - CLI reference: cli/reference.md
64
   - CLI reference: cli/reference.md
@@ -77,6 +78,7 @@ nav:
77
       - Template gallery: cookbook/template-gallery.md
78
       - Template gallery: cookbook/template-gallery.md
78
       - Sharing adapters: cookbook/sharing.md
79
       - Sharing adapters: cookbook/sharing.md
79
       - Multi-source training: cookbook/multi-source-training.md
80
       - Multi-source training: cookbook/multi-source-training.md
81
+      - Multi-target export: cookbook/multi-target-export.md
80
       - Train from a folder: cookbook/train-from-folder.md
82
       - Train from a folder: cookbook/train-from-folder.md
81
       - Training across codebases: cookbook/training-across-codebases.md
83
       - Training across codebases: cookbook/training-across-codebases.md
82
       - Tokenized-section cache: cookbook/directive-cache.md
84
       - Tokenized-section cache: cookbook/directive-cache.md
src/dlm/cli/commands.pymodified
@@ -1551,7 +1551,7 @@ def export_cmd(
1551
         str,
1551
         str,
1552
         typer.Option(
1552
         typer.Option(
1553
             "--target",
1553
             "--target",
1554
-            help="Export destination. Currently supported: ollama, llama-server, vllm.",
1554
+            help="Export destination. Currently supported: ollama, llama-server, vllm, mlx-serve.",
1555
         ),
1555
         ),
1556
     ] = "ollama",
1556
     ] = "ollama",
1557
     quant: Annotated[
1557
     quant: Annotated[
@@ -1679,8 +1679,10 @@ def export_cmd(
1679
     )
1679
     )
1680
     from dlm.export.quantize import run_checked
1680
     from dlm.export.quantize import run_checked
1681
     from dlm.export.targets import (
1681
     from dlm.export.targets import (
1682
+        finalize_mlx_serve_export,
1682
         finalize_vllm_export,
1683
         finalize_vllm_export,
1683
         prepare_llama_server_export,
1684
         prepare_llama_server_export,
1685
+        prepare_mlx_serve_export,
1684
         prepare_vllm_export,
1686
         prepare_vllm_export,
1685
         resolve_target,
1687
         resolve_target,
1686
     )
1688
     )
@@ -1785,6 +1787,12 @@ def export_cmd(
1785
             "documents yet; this Sprint 41 slice only supports text bases."
1787
             "documents yet; this Sprint 41 slice only supports text bases."
1786
         )
1788
         )
1787
         raise typer.Exit(code=2)
1789
         raise typer.Exit(code=2)
1790
+    if resolved_target.name == "mlx-serve" and export_dispatch.accepts_audio:
1791
+        console.print(
1792
+            "[red]export:[/red] --target mlx-serve is not wired for audio-language "
1793
+            "documents yet; this Sprint 41 slice only supports text bases."
1794
+        )
1795
+        raise typer.Exit(code=2)
1788
     if export_dispatch.accepts_audio:
1796
     if export_dispatch.accepts_audio:
1789
         try:
1797
         try:
1790
             dispatch_result = export_dispatch.dispatch_export(
1798
             dispatch_result = export_dispatch.dispatch_export(
@@ -1830,6 +1838,12 @@ def export_cmd(
1830
             "documents yet; this Sprint 41 slice only supports text bases."
1838
             "documents yet; this Sprint 41 slice only supports text bases."
1831
         )
1839
         )
1832
         raise typer.Exit(code=2)
1840
         raise typer.Exit(code=2)
1841
+    if resolved_target.name == "mlx-serve" and export_dispatch.accepts_images:
1842
+        console.print(
1843
+            "[red]export:[/red] --target mlx-serve is not wired for vision-language "
1844
+            "documents yet; this Sprint 41 slice only supports text bases."
1845
+        )
1846
+        raise typer.Exit(code=2)
1833
     if export_dispatch.accepts_images:
1847
     if export_dispatch.accepts_images:
1834
         gguf_emission_context = None
1848
         gguf_emission_context = None
1835
         try:
1849
         try:
@@ -1957,6 +1971,70 @@ def export_cmd(
1957
             console.print(f"smoke:   {vllm_smoke.detail}")
1971
             console.print(f"smoke:   {vllm_smoke.detail}")
1958
         return
1972
         return
1959
 
1973
 
1974
+    if resolved_target.name == "mlx-serve":
1975
+        mlx_ignored_flags: list[str] = []
1976
+        if quant is not None:
1977
+            mlx_ignored_flags.append("--quant")
1978
+        if merged:
1979
+            mlx_ignored_flags.append("--merged")
1980
+        if dequantize:
1981
+            mlx_ignored_flags.append("--dequantize")
1982
+        if name is not None:
1983
+            mlx_ignored_flags.append("--name")
1984
+        if no_template:
1985
+            mlx_ignored_flags.append("--no-template")
1986
+        if skip_ollama:
1987
+            mlx_ignored_flags.append("--skip-ollama")
1988
+        if no_imatrix:
1989
+            mlx_ignored_flags.append("--no-imatrix")
1990
+        if draft is not None:
1991
+            mlx_ignored_flags.append("--draft")
1992
+        if no_draft:
1993
+            mlx_ignored_flags.append("--no-draft")
1994
+        if mlx_ignored_flags:
1995
+            console.print(
1996
+                "[yellow]export:[/yellow] ignoring flags not applicable to "
1997
+                f"`--target mlx-serve`: {', '.join(mlx_ignored_flags)}"
1998
+            )
1999
+
2000
+        declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None
2001
+        try:
2002
+            mlx_serve_result = prepare_mlx_serve_export(
2003
+                store=store,
2004
+                spec=spec,
2005
+                adapter_name=adapter,
2006
+                adapter_path_override=adapter_path_override,
2007
+                declared_adapter_names=declared_adapter_names,
2008
+            )
2009
+        except ExportError as exc:
2010
+            console.print(f"[red]export:[/red] {exc}")
2011
+            raise typer.Exit(code=1) from exc
2012
+
2013
+        mlx_serve_smoke = None if no_smoke else resolved_target.smoke_test(mlx_serve_result)
2014
+        if mlx_serve_smoke is not None and not mlx_serve_smoke.ok:
2015
+            console.print(
2016
+                f"[red]smoke:[/red] {mlx_serve_smoke.detail}\n"
2017
+                "  re-run with `--no-smoke` to skip the smoke test."
2018
+            )
2019
+            raise typer.Exit(code=1)
2020
+
2021
+        manifest_path = finalize_mlx_serve_export(
2022
+            store=store,
2023
+            spec=spec,
2024
+            prepared=mlx_serve_result,
2025
+            smoke_output_first_line=None if mlx_serve_smoke is None else mlx_serve_smoke.detail,
2026
+            adapter_name=adapter,
2027
+            adapter_mix=mix_entries,
2028
+        )
2029
+        console.print(f"[green]exported:[/green] {mlx_serve_result.export_dir}")
2030
+        console.print("target:  mlx-serve")
2031
+        assert mlx_serve_result.launch_script_path is not None
2032
+        console.print(f"launch:  {mlx_serve_result.launch_script_path.name}")
2033
+        console.print(f"manifest: {manifest_path.name}")
2034
+        if mlx_serve_smoke is not None and mlx_serve_smoke.detail:
2035
+            console.print(f"smoke:   {mlx_serve_smoke.detail}")
2036
+        return
2037
+
1960
     try:
2038
     try:
1961
         result = run_export(
2039
         result = run_export(
1962
             store,
2040
             store,
src/dlm/export/targets/__init__.pymodified
@@ -5,6 +5,11 @@ from __future__ import annotations
5
 from dlm.export.errors import UnknownExportTargetError
5
 from dlm.export.errors import UnknownExportTargetError
6
 from dlm.export.targets.base import ExportTarget, SmokeResult, TargetResult
6
 from dlm.export.targets.base import ExportTarget, SmokeResult, TargetResult
7
 from dlm.export.targets.llama_server import LLAMA_SERVER_TARGET, prepare_llama_server_export
7
 from dlm.export.targets.llama_server import LLAMA_SERVER_TARGET, prepare_llama_server_export
8
+from dlm.export.targets.mlx_serve import (
9
+    MLX_SERVE_TARGET,
10
+    finalize_mlx_serve_export,
11
+    prepare_mlx_serve_export,
12
+)
8
 from dlm.export.targets.ollama import OLLAMA_TARGET
13
 from dlm.export.targets.ollama import OLLAMA_TARGET
9
 from dlm.export.targets.vllm import VLLM_TARGET, finalize_vllm_export, prepare_vllm_export
14
 from dlm.export.targets.vllm import VLLM_TARGET, finalize_vllm_export, prepare_vllm_export
10
 
15
 
@@ -12,6 +17,7 @@ TARGETS: dict[str, ExportTarget] = {
12
     OLLAMA_TARGET.name: OLLAMA_TARGET,
17
     OLLAMA_TARGET.name: OLLAMA_TARGET,
13
     LLAMA_SERVER_TARGET.name: LLAMA_SERVER_TARGET,
18
     LLAMA_SERVER_TARGET.name: LLAMA_SERVER_TARGET,
14
     VLLM_TARGET.name: VLLM_TARGET,
19
     VLLM_TARGET.name: VLLM_TARGET,
20
+    MLX_SERVE_TARGET.name: MLX_SERVE_TARGET,
15
 }
21
 }
16
 
22
 
17
 
23
 
@@ -31,12 +37,15 @@ def resolve_target(name: str) -> ExportTarget:
31
 __all__ = [
37
 __all__ = [
32
     "ExportTarget",
38
     "ExportTarget",
33
     "LLAMA_SERVER_TARGET",
39
     "LLAMA_SERVER_TARGET",
40
+    "MLX_SERVE_TARGET",
34
     "SmokeResult",
41
     "SmokeResult",
35
     "TARGETS",
42
     "TARGETS",
36
     "TargetResult",
43
     "TargetResult",
37
     "VLLM_TARGET",
44
     "VLLM_TARGET",
38
     "available_targets",
45
     "available_targets",
46
+    "finalize_mlx_serve_export",
39
     "finalize_vllm_export",
47
     "finalize_vllm_export",
48
+    "prepare_mlx_serve_export",
40
     "prepare_llama_server_export",
49
     "prepare_llama_server_export",
41
     "prepare_vllm_export",
50
     "prepare_vllm_export",
42
     "resolve_target",
51
     "resolve_target",
src/dlm/export/targets/mlx_serve.pyadded
@@ -0,0 +1,272 @@
1
+"""MLX HTTP server target helpers."""
2
+
3
+from __future__ import annotations
4
+
5
+import shlex
6
+import shutil
7
+from pathlib import Path
8
+
9
+from dlm.base_models import BaseModelSpec
10
+from dlm.export.errors import ExportError, TargetSmokeError
11
+from dlm.export.manifest import ExportManifest, build_artifact, save_export_manifest, utc_now
12
+from dlm.export.record import append_export_summary
13
+from dlm.export.smoke import smoke_openai_compat_server
14
+from dlm.export.targets.base import ExportTarget, SmokeResult, TargetResult
15
+from dlm.inference.backends.mlx_backend import stage_mlx_adapter_dir
16
+from dlm.inference.backends.select import is_apple_silicon, mlx_available
17
+from dlm.io.atomic import write_text
18
+from dlm.store.paths import StorePath
19
+
20
+MLX_SERVE_EXPORT_SUBDIR = "mlx-serve"
21
+LAUNCH_SCRIPT_FILENAME = "mlx_serve_launch.sh"
22
+_HF_QUANT = "hf"
23
+_DEFAULT_ADAPTER_DIRNAME = "adapter"
24
+_MIXED_ADAPTER_DIRNAME = "mixed"
25
+
26
+
27
+class MlxServeTarget:
28
+    """Registered export target for MLX HTTP server launch artifacts."""
29
+
30
+    name = "mlx-serve"
31
+
32
+    def prepare(self, ctx: object) -> TargetResult:
33
+        raise NotImplementedError("mlx-serve exports are prepared via prepare_mlx_serve_export()")
34
+
35
+    def launch_command(self, prepared: TargetResult) -> list[str]:
36
+        return _build_command(prepared, use_script_dir=True)
37
+
38
+    def smoke_test(self, prepared: TargetResult) -> SmokeResult:
39
+        try:
40
+            first_line = smoke_openai_compat_server(_build_command(prepared, use_script_dir=False))
41
+        except (OSError, TargetSmokeError, ExportError) as exc:
42
+            return SmokeResult(attempted=True, ok=False, detail=str(exc))
43
+        return SmokeResult(attempted=True, ok=True, detail=first_line)
44
+
45
+
46
+def prepare_mlx_serve_export(
47
+    *,
48
+    store: StorePath,
49
+    spec: BaseModelSpec,
50
+    adapter_name: str | None,
51
+    adapter_path_override: Path | None,
52
+    declared_adapter_names: tuple[str, ...] | None,
53
+) -> TargetResult:
54
+    """Stage an MLX-loadable adapter dir plus launch script."""
55
+
56
+    _require_mlx_runtime()
57
+    source_adapter_dir, staged_dirname, adapter_version = _resolve_source_adapter(
58
+        store=store,
59
+        adapter_name=adapter_name,
60
+        adapter_path_override=adapter_path_override,
61
+        declared_adapter_names=declared_adapter_names,
62
+    )
63
+
64
+    export_dir = store.exports / MLX_SERVE_EXPORT_SUBDIR
65
+    export_dir.mkdir(parents=True, exist_ok=True)
66
+
67
+    staged_adapter_dir = export_dir / staged_dirname
68
+    if staged_adapter_dir.exists():
69
+        shutil.rmtree(staged_adapter_dir)
70
+    stage_mlx_adapter_dir(source_adapter_dir, staged_adapter_dir, base_hf_id=spec.hf_id)
71
+
72
+    launch_script_path = export_dir / LAUNCH_SCRIPT_FILENAME
73
+    draft = TargetResult(
74
+        name=MLX_SERVE_TARGET.name,
75
+        export_dir=export_dir,
76
+        manifest_path=export_dir / "export_manifest.json",
77
+        artifacts=(),
78
+        launch_script_path=launch_script_path,
79
+        extras={
80
+            "model": spec.hf_id,
81
+            "adapter_dir": staged_adapter_dir,
82
+            "adapter_version": adapter_version,
83
+        },
84
+    )
85
+    write_text(launch_script_path, _render_launch_script(MLX_SERVE_TARGET.launch_command(draft)))
86
+    launch_script_path.chmod(0o755)
87
+    return TargetResult(
88
+        name=draft.name,
89
+        export_dir=draft.export_dir,
90
+        manifest_path=draft.manifest_path,
91
+        artifacts=tuple(_artifact_paths(export_dir)),
92
+        launch_script_path=draft.launch_script_path,
93
+        config_path=None,
94
+        extras=draft.extras,
95
+    )
96
+
97
+
98
+def finalize_mlx_serve_export(
99
+    *,
100
+    store: StorePath,
101
+    spec: BaseModelSpec,
102
+    prepared: TargetResult,
103
+    smoke_output_first_line: str | None,
104
+    adapter_name: str | None,
105
+    adapter_mix: list[tuple[str, float]] | None,
106
+) -> Path:
107
+    """Write export_manifest.json and append the store export summary."""
108
+
109
+    from dlm import __version__ as dlm_version
110
+
111
+    artifacts = [
112
+        build_artifact(prepared.export_dir, path) for path in _artifact_paths(prepared.export_dir)
113
+    ]
114
+    adapter_version = _require_prepared_int(prepared, "adapter_version")
115
+    manifest = ExportManifest(
116
+        target=MLX_SERVE_TARGET.name,
117
+        quant=_HF_QUANT,
118
+        merged=False,
119
+        dequantized=False,
120
+        ollama_name=None,
121
+        created_at=utc_now(),
122
+        created_by=f"dlm-{dlm_version}",
123
+        llama_cpp_tag=None,
124
+        base_model_hf_id=spec.hf_id,
125
+        base_model_revision=spec.revision,
126
+        adapter_version=adapter_version,
127
+        artifacts=artifacts,
128
+    )
129
+    manifest_path = save_export_manifest(prepared.export_dir, manifest)
130
+    append_export_summary(
131
+        store=store,
132
+        quant=_HF_QUANT,
133
+        merged=False,
134
+        target=MLX_SERVE_TARGET.name,
135
+        llama_cpp_tag=None,
136
+        artifacts=artifacts,
137
+        ollama_name=None,
138
+        ollama_version_str=None,
139
+        smoke_first_line=smoke_output_first_line,
140
+        adapter_name=adapter_name,
141
+        adapter_mix=adapter_mix,
142
+    )
143
+    return manifest_path
144
+
145
+
146
+def _resolve_source_adapter(
147
+    *,
148
+    store: StorePath,
149
+    adapter_name: str | None,
150
+    adapter_path_override: Path | None,
151
+    declared_adapter_names: tuple[str, ...] | None,
152
+) -> tuple[Path, str, int]:
153
+    if adapter_path_override is not None:
154
+        if not adapter_path_override.exists():
155
+            raise ExportError(f"adapter_path_override {adapter_path_override} does not exist")
156
+        return (
157
+            adapter_path_override,
158
+            _MIXED_ADAPTER_DIRNAME,
159
+            _version_from_dir_name(adapter_path_override),
160
+        )
161
+
162
+    if declared_adapter_names and adapter_name is None:
163
+        raise ExportError(
164
+            "mlx-serve exports one adapter at a time; pass `--adapter <name>` "
165
+            "or `--adapter-mix` for multi-adapter documents."
166
+        )
167
+
168
+    if adapter_name is not None:
169
+        path = store.resolve_current_adapter_for(adapter_name)
170
+        pointer = store.adapter_current_pointer_for(adapter_name)
171
+        if path is None or not path.exists():
172
+            raise ExportError(
173
+                f"no current adapter under {pointer}; run `dlm train` before exporting."
174
+            )
175
+        return path, adapter_name, _version_from_dir_name(path)
176
+
177
+    path = store.resolve_current_adapter()
178
+    pointer = store.adapter_current_pointer
179
+    if path is None or not path.exists():
180
+        raise ExportError(f"no current adapter under {pointer}; run `dlm train` before exporting.")
181
+    return path, _DEFAULT_ADAPTER_DIRNAME, _version_from_dir_name(path)
182
+
183
+
184
+def _require_mlx_runtime() -> None:
185
+    if not is_apple_silicon():
186
+        raise ExportError(
187
+            "mlx-serve export requires Apple Silicon (darwin-arm64); "
188
+            "this target is not available on CUDA, ROCm, or CPU-only hosts."
189
+        )
190
+    if not mlx_available():
191
+        raise ExportError(
192
+            "mlx-serve export requires the mlx extra to be installed; "
193
+            "run `uv sync --extra mlx` and re-try."
194
+        )
195
+
196
+
197
+def _artifact_paths(export_dir: Path) -> list[Path]:
198
+    artifacts: list[Path] = []
199
+    for path in sorted(export_dir.rglob("*")):
200
+        if path.is_file() and path.name != "export_manifest.json":
201
+            artifacts.append(path)
202
+    return artifacts
203
+
204
+
205
+def _build_command(prepared: TargetResult, *, use_script_dir: bool) -> list[str]:
206
+    model = _require_prepared_str(prepared, "model")
207
+    adapter_dir = _require_prepared_path(prepared, "adapter_dir")
208
+    return [
209
+        "python",
210
+        "-m",
211
+        "mlx_lm.server",
212
+        "--model",
213
+        model,
214
+        "--adapter-path",
215
+        _script_dir_arg(adapter_dir) if use_script_dir else str(adapter_dir),
216
+        "--host",
217
+        "127.0.0.1",
218
+        "--port",
219
+        "8000",
220
+    ]
221
+
222
+
223
+def _script_dir_arg(path: Path) -> str:
224
+    return f"$SCRIPT_DIR/{path.name}"
225
+
226
+
227
+def _render_launch_script(command: list[str]) -> str:
228
+    rendered = " ".join(_quote_script_arg(arg) for arg in command)
229
+    return (
230
+        "#!/usr/bin/env bash\n"
231
+        "set -euo pipefail\n"
232
+        'SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"\n'
233
+        f'exec {rendered} "$@"\n'
234
+    )
235
+
236
+
237
+def _quote_script_arg(arg: str) -> str:
238
+    if arg.startswith("$SCRIPT_DIR/"):
239
+        return f'"{arg}"'
240
+    return shlex.quote(arg)
241
+
242
+
243
+def _version_from_dir_name(path: Path) -> int:
244
+    stem = path.name
245
+    if not stem.startswith("v") or not stem[1:].isdigit():
246
+        return 1
247
+    return int(stem[1:])
248
+
249
+
250
+def _require_prepared_str(prepared: TargetResult, key: str) -> str:
251
+    value = prepared.extras.get(key)
252
+    if not isinstance(value, str) or not value:
253
+        raise ExportError(f"mlx-serve prepared target missing string extra {key!r}")
254
+    return value
255
+
256
+
257
+def _require_prepared_path(prepared: TargetResult, key: str) -> Path:
258
+    value = prepared.extras.get(key)
259
+    if not isinstance(value, Path):
260
+        raise ExportError(f"mlx-serve prepared target missing Path extra {key!r}")
261
+    return value
262
+
263
+
264
+def _require_prepared_int(prepared: TargetResult, key: str) -> int:
265
+    value = prepared.extras.get(key)
266
+    if not isinstance(value, int):
267
+        raise ExportError(f"mlx-serve prepared target missing int extra {key!r}")
268
+    return value
269
+
270
+
271
+MLX_SERVE_TARGET = MlxServeTarget()
272
+assert isinstance(MLX_SERVE_TARGET, ExportTarget)
tests/unit/cli/test_export_target_flag.pymodified
@@ -52,6 +52,7 @@ class TestExportTargetFlag:
52
         assert "ollama" in text
52
         assert "ollama" in text
53
         assert "llama-server" in text
53
         assert "llama-server" in text
54
         assert "vllm" in text
54
         assert "vllm" in text
55
+        assert "mlx-serve" in text
55
 
56
 
56
     def test_ollama_target_reaches_existing_mutex_validation(self, tmp_path: Path) -> None:
57
     def test_ollama_target_reaches_existing_mutex_validation(self, tmp_path: Path) -> None:
57
         doc = _scaffold_doc(tmp_path)
58
         doc = _scaffold_doc(tmp_path)
@@ -112,3 +113,22 @@ class TestExportTargetFlag:
112
         )
113
         )
113
         assert result.exit_code == 2
114
         assert result.exit_code == 2
114
         assert "mutually exclusive" in _joined(result)
115
         assert "mutually exclusive" in _joined(result)
116
+
117
+    def test_mlx_serve_target_reaches_existing_mutex_validation(self, tmp_path: Path) -> None:
118
+        runner = CliRunner()
119
+        result = runner.invoke(
120
+            app,
121
+            [
122
+                "--home",
123
+                str(tmp_path / "home"),
124
+                "export",
125
+                str(tmp_path / "ghost.dlm"),
126
+                "--target",
127
+                "mlx-serve",
128
+                "--draft",
129
+                "qwen2.5:0.5b",
130
+                "--no-draft",
131
+            ],
132
+        )
133
+        assert result.exit_code == 2
134
+        assert "mutually exclusive" in _joined(result)
tests/unit/export/targets/test_mlx_serve_argv.pyadded
@@ -0,0 +1,173 @@
1
+"""MLX serve launch artifact generation."""
2
+
3
+from __future__ import annotations
4
+
5
+from pathlib import Path
6
+
7
+import pytest
8
+
9
+from dlm.base_models import BASE_MODELS
10
+from dlm.export.errors import ExportError
11
+from dlm.export.manifest import load_export_manifest
12
+from dlm.export.targets.mlx_serve import (
13
+    LAUNCH_SCRIPT_FILENAME,
14
+    MLX_SERVE_TARGET,
15
+    finalize_mlx_serve_export,
16
+    prepare_mlx_serve_export,
17
+)
18
+from dlm.store.manifest import Manifest, load_manifest, save_manifest
19
+from dlm.store.paths import for_dlm
20
+
21
+_SPEC = BASE_MODELS["smollm2-135m"]
22
+
23
+
24
+def _write_adapter(path: Path) -> None:
25
+    path.mkdir(parents=True)
26
+    (path / "adapter_config.json").write_text("{}", encoding="utf-8")
27
+    (path / "adapter_model.safetensors").write_bytes(b"adapter")
28
+
29
+
30
+def _fake_stage_mlx(src: Path, dst: Path, *, base_hf_id: str) -> Path:
31
+    assert src.exists()
32
+    assert base_hf_id == _SPEC.hf_id
33
+    dst.mkdir(parents=True, exist_ok=True)
34
+    (dst / "adapter_config.json").write_text("{}", encoding="utf-8")
35
+    (dst / "adapters.safetensors").write_bytes(b"mlx-adapter")
36
+    return dst
37
+
38
+
39
+def _setup_flat_store(tmp_path: Path) -> object:
40
+    store = for_dlm("01MLXTEST", home=tmp_path)
41
+    store.ensure_layout()
42
+    save_manifest(store.manifest, Manifest(dlm_id="01MLXTEST", base_model=_SPEC.key))
43
+    adapter = store.adapter_version(3)
44
+    _write_adapter(adapter)
45
+    store.set_current_adapter(adapter)
46
+    return store
47
+
48
+
49
+def _setup_named_store(tmp_path: Path) -> object:
50
+    store = for_dlm("01MLXMULTI", home=tmp_path)
51
+    store.ensure_layout()
52
+    save_manifest(store.manifest, Manifest(dlm_id="01MLXMULTI", base_model=_SPEC.key))
53
+    knowledge = store.adapter_version_for("knowledge", 2)
54
+    tone = store.adapter_version_for("tone", 4)
55
+    _write_adapter(knowledge)
56
+    _write_adapter(tone)
57
+    store.set_current_adapter_for("knowledge", knowledge)
58
+    store.set_current_adapter_for("tone", tone)
59
+    return store
60
+
61
+
62
+class TestPrepareMlxServeExport:
63
+    def test_prepare_writes_launch_script_and_manifest(
64
+        self, tmp_path: Path, monkeypatch: object
65
+    ) -> None:
66
+        store = _setup_flat_store(tmp_path)
67
+        monkeypatch.setattr("dlm.export.targets.mlx_serve.is_apple_silicon", lambda: True)
68
+        monkeypatch.setattr("dlm.export.targets.mlx_serve.mlx_available", lambda: True)
69
+        monkeypatch.setattr("dlm.export.targets.mlx_serve.stage_mlx_adapter_dir", _fake_stage_mlx)
70
+
71
+        prepared = prepare_mlx_serve_export(
72
+            store=store,
73
+            spec=_SPEC,
74
+            adapter_name=None,
75
+            adapter_path_override=None,
76
+            declared_adapter_names=None,
77
+        )
78
+        manifest_path = finalize_mlx_serve_export(
79
+            store=store,
80
+            spec=_SPEC,
81
+            prepared=prepared,
82
+            smoke_output_first_line="hello from mlx",
83
+            adapter_name=None,
84
+            adapter_mix=None,
85
+        )
86
+
87
+        assert prepared.launch_script_path is not None
88
+        assert prepared.launch_script_path.name == LAUNCH_SCRIPT_FILENAME
89
+        script = prepared.launch_script_path.read_text(encoding="utf-8")
90
+        assert script.startswith("#!/usr/bin/env bash\nset -euo pipefail\n")
91
+        assert "python -m mlx_lm.server" in script
92
+        assert f"--model {_SPEC.hf_id}" in script
93
+        assert '--adapter-path "$SCRIPT_DIR/adapter"' in script
94
+
95
+        export_manifest = load_export_manifest(prepared.export_dir)
96
+        assert manifest_path == prepared.manifest_path
97
+        assert export_manifest.target == "mlx-serve"
98
+        assert export_manifest.quant == "hf"
99
+        assert export_manifest.adapter_version == 3
100
+        assert any(artifact.path == "mlx_serve_launch.sh" for artifact in export_manifest.artifacts)
101
+        assert any(
102
+            artifact.path == "adapter/adapters.safetensors"
103
+            for artifact in export_manifest.artifacts
104
+        )
105
+
106
+        store_manifest = load_manifest(store.manifest)
107
+        assert store_manifest.exports[-1].target == "mlx-serve"
108
+        assert store_manifest.exports[-1].quant == "hf"
109
+        assert store_manifest.exports[-1].smoke_output_first_line == "hello from mlx"
110
+
111
+    def test_multi_adapter_export_requires_explicit_selection(
112
+        self, tmp_path: Path, monkeypatch: object
113
+    ) -> None:
114
+        store = _setup_named_store(tmp_path)
115
+        monkeypatch.setattr("dlm.export.targets.mlx_serve.is_apple_silicon", lambda: True)
116
+        monkeypatch.setattr("dlm.export.targets.mlx_serve.mlx_available", lambda: True)
117
+
118
+        with pytest.raises(ExportError, match="one adapter at a time"):
119
+            prepare_mlx_serve_export(
120
+                store=store,
121
+                spec=_SPEC,
122
+                adapter_name=None,
123
+                adapter_path_override=None,
124
+                declared_adapter_names=("knowledge", "tone"),
125
+            )
126
+
127
+    def test_refuses_without_apple_silicon_runtime(
128
+        self, tmp_path: Path, monkeypatch: object
129
+    ) -> None:
130
+        store = _setup_flat_store(tmp_path)
131
+        monkeypatch.setattr("dlm.export.targets.mlx_serve.is_apple_silicon", lambda: False)
132
+
133
+        with pytest.raises(ExportError, match="Apple Silicon"):
134
+            prepare_mlx_serve_export(
135
+                store=store,
136
+                spec=_SPEC,
137
+                adapter_name=None,
138
+                adapter_path_override=None,
139
+                declared_adapter_names=None,
140
+            )
141
+
142
+
143
+class TestMlxServeSmoke:
144
+    def test_smoke_uses_absolute_runtime_paths(self, tmp_path: Path, monkeypatch: object) -> None:
145
+        store = _setup_flat_store(tmp_path)
146
+        monkeypatch.setattr("dlm.export.targets.mlx_serve.is_apple_silicon", lambda: True)
147
+        monkeypatch.setattr("dlm.export.targets.mlx_serve.mlx_available", lambda: True)
148
+        monkeypatch.setattr("dlm.export.targets.mlx_serve.stage_mlx_adapter_dir", _fake_stage_mlx)
149
+        prepared = prepare_mlx_serve_export(
150
+            store=store,
151
+            spec=_SPEC,
152
+            adapter_name=None,
153
+            adapter_path_override=None,
154
+            declared_adapter_names=None,
155
+        )
156
+        seen: list[list[str]] = []
157
+
158
+        def _fake_smoke(argv: list[str], **_: object) -> str:
159
+            seen.append(list(argv))
160
+            return "mlx replied"
161
+
162
+        monkeypatch.setattr("dlm.export.targets.mlx_serve.smoke_openai_compat_server", _fake_smoke)
163
+
164
+        result = MLX_SERVE_TARGET.smoke_test(prepared)
165
+
166
+        assert result.attempted is True
167
+        assert result.ok is True
168
+        assert result.detail == "mlx replied"
169
+        argv = seen[0]
170
+        assert argv[:3] == ["python", "-m", "mlx_lm.server"]
171
+        assert "$SCRIPT_DIR" not in " ".join(argv)
172
+        assert _SPEC.hf_id in argv
173
+        assert str(prepared.export_dir / "adapter") in argv
tests/unit/export/targets/test_registry.pymodified
@@ -19,12 +19,13 @@ class TestRegistry:
19
         assert TARGETS["ollama"] is target
19
         assert TARGETS["ollama"] is target
20
         assert "llama-server" in TARGETS
20
         assert "llama-server" in TARGETS
21
         assert "vllm" in TARGETS
21
         assert "vllm" in TARGETS
22
-        assert available_targets() == ("ollama", "llama-server", "vllm")
22
+        assert "mlx-serve" in TARGETS
23
+        assert available_targets() == ("ollama", "llama-server", "vllm", "mlx-serve")
23
 
24
 
24
     def test_unknown_target_lists_available_targets(self) -> None:
25
     def test_unknown_target_lists_available_targets(self) -> None:
25
         with pytest.raises(
26
         with pytest.raises(
26
             UnknownExportTargetError,
27
             UnknownExportTargetError,
27
-            match="available targets: ollama, llama-server, vllm",
28
+            match="available targets: ollama, llama-server, vllm, mlx-serve",
28
         ):
29
         ):
29
             resolve_target("sglang")
30
             resolve_target("sglang")
30
 
31