tenseleyflow/documentlanguagemodel / e9dda81

Browse files

Add vLLM export target

Authored by espadonne
SHA
e9dda81eb42d0dc0f4dfe41026a0d933f3091959
Parents
82b797f
Tree
1c4e02f

10 changed files

StatusFile+-
M README.md 2 2
M docs/cli/reference.md 1 1
M src/dlm/cli/commands.py 84 2
A src/dlm/export/record.py 58 0
M src/dlm/export/runner.py 5 54
M src/dlm/export/targets/__init__.py 5 0
A src/dlm/export/targets/vllm.py 356 0
M tests/unit/cli/test_export_target_flag.py 21 1
M tests/unit/export/targets/test_registry.py 5 4
A tests/unit/export/targets/test_vllm_argv.py 166 0
README.mdmodified
@@ -19,8 +19,8 @@ Ollama and `llama-server`.
1919
 
2020
 **Status:** pre-v1.0, but far beyond the original MVP framing. The core
2121
 author/train/prompt/export/pack/share loop is real, and newer runtime-target
22
-work is landing incrementally. Current export targets are `ollama` and
23
-`llama-server`.
22
+work is landing incrementally. Current export targets are `ollama`,
23
+`llama-server`, and `vllm`.
2424
 
2525
 ## What A `.dlm` Actually Is
2626
 
docs/cli/reference.mdmodified
@@ -203,7 +203,7 @@ dlm export <path> [--target NAME] [--quant Q] [--merged [--dequantize]]
203203
 
204204
 | Option | Default | Notes |
205205
 |---|---|---|
206
-| `--target NAME` | `ollama` | Export destination. Sprint 41 currently supports `ollama` and `llama-server`. The `llama-server` path writes launch artifacts against the existing GGUF export and uses the shared OpenAI-compatible HTTP smoke harness when `--no-smoke` is not passed. |
206
+| `--target NAME` | `ollama` | Export destination. Sprint 41 currently supports `ollama`, `llama-server`, and `vllm`. The `llama-server` path writes launch artifacts against the existing GGUF export and uses the shared OpenAI-compatible HTTP smoke harness; the `vllm` path writes `vllm_launch.sh` + `vllm_config.json` against the local adapter layout and ignores GGUF-only flags. |
207207
 | `--quant Q` | frontmatter.export.default_quant | `Q4_K_M` / `Q5_K_M` / `Q6_K` / `Q8_0` / `F16`. |
208208
 | `--merged` | false | Merge LoRA into base before quantizing. |
209209
 | `--dequantize` | false | Required with `--merged` on a QLoRA adapter (pitfall #3). |
src/dlm/cli/commands.pymodified
@@ -1551,7 +1551,7 @@ def export_cmd(
15511551
         str,
15521552
         typer.Option(
15531553
             "--target",
1554
-            help="Export destination. Currently supported: ollama, llama-server.",
1554
+            help="Export destination. Currently supported: ollama, llama-server, vllm.",
15551555
         ),
15561556
     ] = "ollama",
15571557
     quant: Annotated[
@@ -1678,7 +1678,12 @@ def export_cmd(
16781678
         OllamaVersionError,
16791679
     )
16801680
     from dlm.export.quantize import run_checked
1681
-    from dlm.export.targets import prepare_llama_server_export, resolve_target
1681
+    from dlm.export.targets import (
1682
+        finalize_vllm_export,
1683
+        prepare_llama_server_export,
1684
+        prepare_vllm_export,
1685
+        resolve_target,
1686
+    )
16821687
     from dlm.store.paths import for_dlm
16831688
 
16841689
     console = Console(stderr=True)
@@ -1774,6 +1779,12 @@ def export_cmd(
17741779
     from dlm.modality import modality_for
17751780
 
17761781
     export_dispatch = modality_for(spec)
1782
+    if resolved_target.name == "vllm" and export_dispatch.accepts_audio:
1783
+        console.print(
1784
+            "[red]export:[/red] --target vllm is not wired for audio-language "
1785
+            "documents yet; this Sprint 41 slice only supports text bases."
1786
+        )
1787
+        raise typer.Exit(code=2)
17771788
     if export_dispatch.accepts_audio:
17781789
         try:
17791790
             dispatch_result = export_dispatch.dispatch_export(
@@ -1813,6 +1824,12 @@ def export_cmd(
18131824
     # missing local base snapshot should not hard-fail the whole
18141825
     # export — the dispatcher can still emit the HF-snapshot path
18151826
     # without GGUF context.
1827
+    if resolved_target.name == "vllm" and export_dispatch.accepts_images:
1828
+        console.print(
1829
+            "[red]export:[/red] --target vllm is not wired for vision-language "
1830
+            "documents yet; this Sprint 41 slice only supports text bases."
1831
+        )
1832
+        raise typer.Exit(code=2)
18161833
     if export_dispatch.accepts_images:
18171834
         gguf_emission_context = None
18181835
         try:
@@ -1874,6 +1891,71 @@ def export_cmd(
18741891
             combination_type=adapter_mix_method,  # type: ignore[arg-type]
18751892
         )
18761893
 
1894
+    if resolved_target.name == "vllm":
1895
+        ignored_flags: list[str] = []
1896
+        if quant is not None:
1897
+            ignored_flags.append("--quant")
1898
+        if merged:
1899
+            ignored_flags.append("--merged")
1900
+        if dequantize:
1901
+            ignored_flags.append("--dequantize")
1902
+        if no_template:
1903
+            ignored_flags.append("--no-template")
1904
+        if skip_ollama:
1905
+            ignored_flags.append("--skip-ollama")
1906
+        if no_imatrix:
1907
+            ignored_flags.append("--no-imatrix")
1908
+        if draft is not None:
1909
+            ignored_flags.append("--draft")
1910
+        if no_draft:
1911
+            ignored_flags.append("--no-draft")
1912
+        if ignored_flags:
1913
+            console.print(
1914
+                "[yellow]export:[/yellow] ignoring flags not applicable to "
1915
+                f"`--target vllm`: {', '.join(ignored_flags)}"
1916
+            )
1917
+
1918
+        declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None
1919
+        try:
1920
+            vllm_result = prepare_vllm_export(
1921
+                store=store,
1922
+                spec=spec,
1923
+                served_model_name=name or f"dlm-{parsed.frontmatter.dlm_id.lower()}",
1924
+                adapter_name=adapter,
1925
+                adapter_path_override=adapter_path_override,
1926
+                declared_adapter_names=declared_adapter_names,
1927
+            )
1928
+        except ExportError as exc:
1929
+            console.print(f"[red]export:[/red] {exc}")
1930
+            raise typer.Exit(code=1) from exc
1931
+
1932
+        vllm_smoke = None if no_smoke else resolved_target.smoke_test(vllm_result)
1933
+        if vllm_smoke is not None and not vllm_smoke.ok:
1934
+            console.print(
1935
+                f"[red]smoke:[/red] {vllm_smoke.detail}\n"
1936
+                "  re-run with `--no-smoke` to skip the smoke test."
1937
+            )
1938
+            raise typer.Exit(code=1)
1939
+
1940
+        manifest_path = finalize_vllm_export(
1941
+            store=store,
1942
+            spec=spec,
1943
+            prepared=vllm_result,
1944
+            smoke_output_first_line=None if vllm_smoke is None else vllm_smoke.detail,
1945
+            adapter_name=adapter,
1946
+            adapter_mix=mix_entries,
1947
+        )
1948
+        console.print(f"[green]exported:[/green] {vllm_result.export_dir}")
1949
+        console.print("target:  vllm")
1950
+        assert vllm_result.launch_script_path is not None
1951
+        assert vllm_result.config_path is not None
1952
+        console.print(f"launch:  {vllm_result.launch_script_path.name}")
1953
+        console.print(f"config:  {vllm_result.config_path.name}")
1954
+        console.print(f"manifest: {manifest_path.name}")
1955
+        if vllm_smoke is not None and vllm_smoke.detail:
1956
+            console.print(f"smoke:   {vllm_smoke.detail}")
1957
+        return
1958
+
18771959
     try:
18781960
         result = run_export(
18791961
             store,
src/dlm/export/record.pyadded
@@ -0,0 +1,58 @@
1
+"""Shared export-record helpers used by GGUF and non-GGUF targets."""
2
+
3
+from __future__ import annotations
4
+
5
+from typing import TYPE_CHECKING, Any
6
+
7
+from dlm.export.manifest import utc_now
8
+
9
+if TYPE_CHECKING:
10
+    from dlm.store.paths import StorePath
11
+
12
+
13
+def append_export_summary(
14
+    *,
15
+    store: StorePath,
16
+    quant: str,
17
+    merged: bool,
18
+    target: str,
19
+    llama_cpp_tag: str | None,
20
+    artifacts: list[Any],
21
+    ollama_name: str | None,
22
+    ollama_version_str: str | None,
23
+    smoke_first_line: str | None,
24
+    adapter_name: str | None = None,
25
+    adapter_mix: list[tuple[str, float]] | None = None,
26
+    timeout: float = 60.0,
27
+) -> None:
28
+    """Update `manifest.exports` with one new export row."""
29
+    from dlm.store.lock import exclusive
30
+    from dlm.store.manifest import ExportSummary, load_manifest, save_manifest
31
+
32
+    base_sha = next((a.sha256 for a in artifacts if a.path.startswith("base.")), None)
33
+    adapter_sha = next((a.sha256 for a in artifacts if a.path.startswith("adapter.")), None)
34
+
35
+    summary = ExportSummary(
36
+        exported_at=utc_now(),
37
+        target=target,
38
+        quant=quant,
39
+        merged=merged,
40
+        ollama_name=ollama_name,
41
+        ollama_version=ollama_version_str,
42
+        llama_cpp_tag=llama_cpp_tag,
43
+        base_gguf_sha256=base_sha,
44
+        adapter_gguf_sha256=adapter_sha,
45
+        smoke_output_first_line=smoke_first_line,
46
+        adapter_name=adapter_name,
47
+        adapter_mix=adapter_mix,
48
+    )
49
+
50
+    with exclusive(store.lock, timeout=timeout):
51
+        manifest = load_manifest(store.manifest)
52
+        updated = manifest.model_copy(
53
+            update={
54
+                "exports": [*manifest.exports, summary],
55
+                "updated_at": utc_now(),
56
+            }
57
+        )
58
+        save_manifest(store.manifest, updated)
src/dlm/export/runner.pymodified
@@ -34,6 +34,7 @@ from dlm.export.manifest import (
3434
 from dlm.export.plan import ExportPlan
3535
 from dlm.export.precision_safety import require_dequantize_or_refuse
3636
 from dlm.export.quantize import run_checked
37
+from dlm.export.record import append_export_summary
3738
 
3839
 if TYPE_CHECKING:
3940
     from dlm.base_models import BaseModelSpec
@@ -335,9 +336,10 @@ def run_export(
335336
     manifest_path = save_export_manifest(export_dir, em)
336337
 
337338
     # 8. Append to store manifest.exports.
338
-    _append_export_summary(
339
+    append_export_summary(
339340
         store=store,
340
-        plan=plan,
341
+        quant=plan.quant,
342
+        merged=plan.merged,
341343
         llama_cpp_tag=em.llama_cpp_tag,
342344
         artifacts=em.artifacts,
343345
         ollama_name=em.ollama_name,
@@ -346,6 +348,7 @@ def run_export(
346348
         target=resolved_target.name,
347349
         adapter_name=adapter_name,
348350
         adapter_mix=adapter_mix,
351
+        timeout=_APPEND_LOCK_TIMEOUT,
349352
     )
350353
 
351354
     return ExportResult(
@@ -617,55 +620,3 @@ def _run_ollama_stage(
617620
         smoke_first_line = first_line(stdout)
618621
 
619622
     return modelfile_path, name, ver_str, smoke_first_line
620
-
621
-
622
-def _append_export_summary(
623
-    *,
624
-    store: StorePath,
625
-    plan: ExportPlan,
626
-    llama_cpp_tag: str | None,
627
-    artifacts: list[Any],
628
-    ollama_name: str | None,
629
-    ollama_version_str: str | None,
630
-    smoke_first_line: str | None,
631
-    target: str,
632
-    adapter_name: str | None = None,
633
-    adapter_mix: list[tuple[str, float]] | None = None,
634
-) -> None:
635
-    """Update `manifest.exports` with a new `ExportSummary` row."""
636
-    from dlm.store.manifest import ExportSummary, load_manifest, save_manifest
637
-
638
-    base_sha = next((a.sha256 for a in artifacts if a.path.startswith("base.")), None)
639
-    adapter_sha = next((a.sha256 for a in artifacts if a.path.startswith("adapter.")), None)
640
-
641
-    summary = ExportSummary(
642
-        exported_at=utc_now(),
643
-        target=target,
644
-        quant=plan.quant,
645
-        merged=plan.merged,
646
-        ollama_name=ollama_name,
647
-        ollama_version=ollama_version_str,
648
-        llama_cpp_tag=llama_cpp_tag,
649
-        base_gguf_sha256=base_sha,
650
-        adapter_gguf_sha256=adapter_sha,
651
-        smoke_output_first_line=smoke_first_line,
652
-        adapter_name=adapter_name,
653
-        adapter_mix=adapter_mix,
654
-    )
655
-
656
-    # The manifest read-modify-write must be serialized: two concurrent
657
-    # `dlm export` invocations on the same store (different quants) would
658
-    # otherwise race and drop one summary. The per-store exclusive lock
659
-    # is the same one `dlm train` takes; holding it across load→save
660
-    # keeps `manifest.exports` append-atomic.
661
-    from dlm.store.lock import exclusive
662
-
663
-    with exclusive(store.lock, timeout=_APPEND_LOCK_TIMEOUT):
664
-        manifest = load_manifest(store.manifest)
665
-        updated = manifest.model_copy(
666
-            update={
667
-                "exports": [*manifest.exports, summary],
668
-                "updated_at": utc_now(),
669
-            }
670
-        )
671
-        save_manifest(store.manifest, updated)
src/dlm/export/targets/__init__.pymodified
@@ -6,10 +6,12 @@ from dlm.export.errors import UnknownExportTargetError
66
 from dlm.export.targets.base import ExportTarget, SmokeResult, TargetResult
77
 from dlm.export.targets.llama_server import LLAMA_SERVER_TARGET, prepare_llama_server_export
88
 from dlm.export.targets.ollama import OLLAMA_TARGET
9
+from dlm.export.targets.vllm import VLLM_TARGET, finalize_vllm_export, prepare_vllm_export
910
 
1011
 TARGETS: dict[str, ExportTarget] = {
1112
     OLLAMA_TARGET.name: OLLAMA_TARGET,
1213
     LLAMA_SERVER_TARGET.name: LLAMA_SERVER_TARGET,
14
+    VLLM_TARGET.name: VLLM_TARGET,
1315
 }
1416
 
1517
 
@@ -32,7 +34,10 @@ __all__ = [
3234
     "SmokeResult",
3335
     "TARGETS",
3436
     "TargetResult",
37
+    "VLLM_TARGET",
3538
     "available_targets",
39
+    "finalize_vllm_export",
3640
     "prepare_llama_server_export",
41
+    "prepare_vllm_export",
3742
     "resolve_target",
3843
 ]
src/dlm/export/targets/vllm.pyadded
@@ -0,0 +1,356 @@
1
+"""vLLM target helpers."""
2
+
3
+from __future__ import annotations
4
+
5
+import json
6
+import shlex
7
+import shutil
8
+from dataclasses import dataclass
9
+from pathlib import Path
10
+
11
+from dlm.base_models import BaseModelSpec
12
+from dlm.export.errors import ExportError, TargetSmokeError
13
+from dlm.export.manifest import ExportManifest, build_artifact, save_export_manifest, utc_now
14
+from dlm.export.record import append_export_summary
15
+from dlm.export.smoke import smoke_openai_compat_server
16
+from dlm.export.targets.base import ExportTarget, SmokeResult, TargetResult
17
+from dlm.io.atomic import write_text
18
+from dlm.store.paths import StorePath
19
+
20
+VLLM_EXPORT_SUBDIR = "vllm"
21
+VLLM_CONFIG_FILENAME = "vllm_config.json"
22
+LAUNCH_SCRIPT_FILENAME = "vllm_launch.sh"
23
+_ADAPTERS_DIRNAME = "adapters"
24
+_HF_QUANT = "hf"
25
+_DEFAULT_MODULE_NAME = "adapter"
26
+_MIXED_MODULE_NAME = "mixed"
27
+
28
+
29
+@dataclass(frozen=True)
30
+class LoraModule:
31
+    name: str
32
+    path: Path
33
+    adapter_version: int
34
+
35
+
36
+class VllmTarget:
37
+    """Registered export target for vLLM launch artifacts."""
38
+
39
+    name = "vllm"
40
+
41
+    def prepare(self, ctx: object) -> TargetResult:
42
+        raise NotImplementedError("vllm exports are prepared via prepare_vllm_export()")
43
+
44
+    def launch_command(self, prepared: TargetResult) -> list[str]:
45
+        return _build_command(prepared, use_script_dir=True)
46
+
47
+    def smoke_test(self, prepared: TargetResult) -> SmokeResult:
48
+        try:
49
+            first_line = smoke_openai_compat_server(_build_command(prepared, use_script_dir=False))
50
+        except (OSError, TargetSmokeError, ExportError) as exc:
51
+            return SmokeResult(attempted=True, ok=False, detail=str(exc))
52
+        return SmokeResult(attempted=True, ok=True, detail=first_line)
53
+
54
+
55
+def prepare_vllm_export(
56
+    *,
57
+    store: StorePath,
58
+    spec: BaseModelSpec,
59
+    served_model_name: str,
60
+    adapter_name: str | None,
61
+    adapter_path_override: Path | None,
62
+    declared_adapter_names: tuple[str, ...] | None,
63
+) -> TargetResult:
64
+    """Stage vLLM launch artifacts plus local adapter module copies."""
65
+
66
+    export_dir = store.exports / VLLM_EXPORT_SUBDIR
67
+    export_dir.mkdir(parents=True, exist_ok=True)
68
+
69
+    adapters_dir = export_dir / _ADAPTERS_DIRNAME
70
+    if adapters_dir.exists():
71
+        shutil.rmtree(adapters_dir)
72
+    adapters_dir.mkdir(parents=True, exist_ok=True)
73
+
74
+    modules = _stage_modules(
75
+        store=store,
76
+        adapters_dir=adapters_dir,
77
+        adapter_name=adapter_name,
78
+        adapter_path_override=adapter_path_override,
79
+        declared_adapter_names=declared_adapter_names,
80
+    )
81
+    if not modules:
82
+        raise ExportError("vllm export needs at least one adapter module")
83
+
84
+    config_path = export_dir / VLLM_CONFIG_FILENAME
85
+    launch_script_path = export_dir / LAUNCH_SCRIPT_FILENAME
86
+    draft = TargetResult(
87
+        name=VLLM_TARGET.name,
88
+        export_dir=export_dir,
89
+        manifest_path=export_dir / "export_manifest.json",
90
+        artifacts=(),
91
+        launch_script_path=launch_script_path,
92
+        config_path=config_path,
93
+        extras={
94
+            "model": spec.hf_id,
95
+            "revision": spec.revision,
96
+            "served_model_name": served_model_name,
97
+            "module_specs": tuple(modules),
98
+            "adapter_version": max(module.adapter_version for module in modules),
99
+        },
100
+    )
101
+    write_text(config_path, _render_config(draft))
102
+    write_text(launch_script_path, _render_launch_script(VLLM_TARGET.launch_command(draft)))
103
+    launch_script_path.chmod(0o755)
104
+    return TargetResult(
105
+        name=draft.name,
106
+        export_dir=draft.export_dir,
107
+        manifest_path=draft.manifest_path,
108
+        artifacts=tuple(_artifact_paths(export_dir)),
109
+        launch_script_path=draft.launch_script_path,
110
+        config_path=draft.config_path,
111
+        extras=draft.extras,
112
+    )
113
+
114
+
115
+def finalize_vllm_export(
116
+    *,
117
+    store: StorePath,
118
+    spec: BaseModelSpec,
119
+    prepared: TargetResult,
120
+    smoke_output_first_line: str | None,
121
+    adapter_name: str | None,
122
+    adapter_mix: list[tuple[str, float]] | None,
123
+) -> Path:
124
+    """Write export_manifest.json and append the store export summary."""
125
+
126
+    from dlm import __version__ as dlm_version
127
+
128
+    artifacts = [
129
+        build_artifact(prepared.export_dir, path) for path in _artifact_paths(prepared.export_dir)
130
+    ]
131
+    adapter_version = _require_prepared_int(prepared, "adapter_version")
132
+    manifest = ExportManifest(
133
+        target=VLLM_TARGET.name,
134
+        quant=_HF_QUANT,
135
+        merged=False,
136
+        dequantized=False,
137
+        ollama_name=None,
138
+        created_at=utc_now(),
139
+        created_by=f"dlm-{dlm_version}",
140
+        llama_cpp_tag=None,
141
+        base_model_hf_id=spec.hf_id,
142
+        base_model_revision=spec.revision,
143
+        adapter_version=adapter_version,
144
+        artifacts=artifacts,
145
+    )
146
+    manifest_path = save_export_manifest(prepared.export_dir, manifest)
147
+    append_export_summary(
148
+        store=store,
149
+        quant=_HF_QUANT,
150
+        merged=False,
151
+        target=VLLM_TARGET.name,
152
+        llama_cpp_tag=None,
153
+        artifacts=artifacts,
154
+        ollama_name=None,
155
+        ollama_version_str=None,
156
+        smoke_first_line=smoke_output_first_line,
157
+        adapter_name=adapter_name,
158
+        adapter_mix=adapter_mix,
159
+    )
160
+    return manifest_path
161
+
162
+
163
+def _stage_modules(
164
+    *,
165
+    store: StorePath,
166
+    adapters_dir: Path,
167
+    adapter_name: str | None,
168
+    adapter_path_override: Path | None,
169
+    declared_adapter_names: tuple[str, ...] | None,
170
+) -> list[LoraModule]:
171
+    modules = _resolve_modules(
172
+        store=store,
173
+        adapter_name=adapter_name,
174
+        adapter_path_override=adapter_path_override,
175
+        declared_adapter_names=declared_adapter_names,
176
+    )
177
+    staged: list[LoraModule] = []
178
+    for module in modules:
179
+        target_dir = adapters_dir / module.name
180
+        shutil.copytree(module.path, target_dir)
181
+        staged.append(LoraModule(module.name, target_dir, module.adapter_version))
182
+    return staged
183
+
184
+
185
+def _resolve_modules(
186
+    *,
187
+    store: StorePath,
188
+    adapter_name: str | None,
189
+    adapter_path_override: Path | None,
190
+    declared_adapter_names: tuple[str, ...] | None,
191
+) -> list[LoraModule]:
192
+    if adapter_path_override is not None:
193
+        if not adapter_path_override.exists():
194
+            raise ExportError(f"adapter_path_override {adapter_path_override} does not exist")
195
+        return [
196
+            LoraModule(
197
+                name=_MIXED_MODULE_NAME,
198
+                path=adapter_path_override,
199
+                adapter_version=_version_from_dir_name(adapter_path_override),
200
+            )
201
+        ]
202
+
203
+    if adapter_name is not None:
204
+        path = store.resolve_current_adapter_for(adapter_name)
205
+        pointer = store.adapter_current_pointer_for(adapter_name)
206
+        if path is None or not path.exists():
207
+            raise ExportError(
208
+                f"no current adapter under {pointer}; run `dlm train` before exporting."
209
+            )
210
+        return [
211
+            LoraModule(
212
+                name=adapter_name,
213
+                path=path,
214
+                adapter_version=_version_from_dir_name(path),
215
+            )
216
+        ]
217
+
218
+    if declared_adapter_names:
219
+        modules: list[LoraModule] = []
220
+        for name in declared_adapter_names:
221
+            path = store.resolve_current_adapter_for(name)
222
+            pointer = store.adapter_current_pointer_for(name)
223
+            if path is None or not path.exists():
224
+                raise ExportError(
225
+                    f"no current adapter under {pointer}; run `dlm train` before exporting."
226
+                )
227
+            modules.append(
228
+                LoraModule(name=name, path=path, adapter_version=_version_from_dir_name(path))
229
+            )
230
+        return modules
231
+
232
+    path = store.resolve_current_adapter()
233
+    pointer = store.adapter_current_pointer
234
+    if path is None or not path.exists():
235
+        raise ExportError(f"no current adapter under {pointer}; run `dlm train` before exporting.")
236
+    return [
237
+        LoraModule(
238
+            name=_DEFAULT_MODULE_NAME,
239
+            path=path,
240
+            adapter_version=_version_from_dir_name(path),
241
+        )
242
+    ]
243
+
244
+
245
+def _version_from_dir_name(path: Path) -> int:
246
+    stem = path.name
247
+    if not stem.startswith("v") or not stem[1:].isdigit():
248
+        return 1
249
+    return int(stem[1:])
250
+
251
+
252
+def _artifact_paths(export_dir: Path) -> list[Path]:
253
+    artifacts: list[Path] = []
254
+    for path in sorted(export_dir.rglob("*")):
255
+        if path.is_file() and path.name != "export_manifest.json":
256
+            artifacts.append(path)
257
+    return artifacts
258
+
259
+
260
+def _build_command(prepared: TargetResult, *, use_script_dir: bool) -> list[str]:
261
+    model = _require_prepared_str(prepared, "model")
262
+    revision = _require_prepared_str(prepared, "revision")
263
+    served_model_name = _require_prepared_str(prepared, "served_model_name")
264
+    modules = _require_module_specs(prepared)
265
+
266
+    command = [
267
+        "vllm",
268
+        "serve",
269
+        model,
270
+        "--revision",
271
+        revision,
272
+        "--host",
273
+        "127.0.0.1",
274
+        "--port",
275
+        "8000",
276
+        "--dtype",
277
+        "auto",
278
+        "--served-model-name",
279
+        served_model_name,
280
+    ]
281
+    if modules:
282
+        command.extend(["--enable-lora", "--lora-modules"])
283
+        for module in modules:
284
+            path = (
285
+                f"$SCRIPT_DIR/{_ADAPTERS_DIRNAME}/{module.name}"
286
+                if use_script_dir
287
+                else str(module.path)
288
+            )
289
+            command.append(f"{module.name}={path}")
290
+    return command
291
+
292
+
293
+def _render_config(prepared: TargetResult) -> str:
294
+    modules = _require_module_specs(prepared)
295
+    payload = {
296
+        "target": VLLM_TARGET.name,
297
+        "model": _require_prepared_str(prepared, "model"),
298
+        "revision": _require_prepared_str(prepared, "revision"),
299
+        "served_model_name": _require_prepared_str(prepared, "served_model_name"),
300
+        "dtype": "auto",
301
+        "host": "127.0.0.1",
302
+        "port": 8000,
303
+        "lora_modules": [
304
+            {
305
+                "name": module.name,
306
+                "path": f"{_ADAPTERS_DIRNAME}/{module.name}",
307
+                "adapter_version": module.adapter_version,
308
+            }
309
+            for module in modules
310
+        ],
311
+    }
312
+    return json.dumps(payload, sort_keys=True, indent=2) + "\n"
313
+
314
+
315
+def _render_launch_script(command: list[str]) -> str:
316
+    rendered = " ".join(_quote_script_arg(arg) for arg in command)
317
+    return (
318
+        "#!/usr/bin/env bash\n"
319
+        "set -euo pipefail\n"
320
+        'SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"\n'
321
+        f'exec {rendered} "$@"\n'
322
+    )
323
+
324
+
325
+def _quote_script_arg(arg: str) -> str:
326
+    if arg.startswith("$SCRIPT_DIR/"):
327
+        return f'"{arg}"'
328
+    if "=$SCRIPT_DIR/" in arg:
329
+        name, value = arg.split("=", 1)
330
+        return f'{shlex.quote(name)}="{value}"'
331
+    return shlex.quote(arg)
332
+
333
+
334
+def _require_prepared_str(prepared: TargetResult, key: str) -> str:
335
+    value = prepared.extras.get(key)
336
+    if not isinstance(value, str) or not value:
337
+        raise ExportError(f"vllm prepared target missing string extra {key!r}")
338
+    return value
339
+
340
+
341
+def _require_prepared_int(prepared: TargetResult, key: str) -> int:
342
+    value = prepared.extras.get(key)
343
+    if not isinstance(value, int):
344
+        raise ExportError(f"vllm prepared target missing int extra {key!r}")
345
+    return value
346
+
347
+
348
+def _require_module_specs(prepared: TargetResult) -> tuple[LoraModule, ...]:
349
+    value = prepared.extras.get("module_specs")
350
+    if not isinstance(value, tuple) or not all(isinstance(item, LoraModule) for item in value):
351
+        raise ExportError("vllm prepared target missing LoraModule tuple extra 'module_specs'")
352
+    return value
353
+
354
+
355
+VLLM_TARGET = VllmTarget()
356
+assert isinstance(VLLM_TARGET, ExportTarget)
tests/unit/cli/test_export_target_flag.pymodified
@@ -43,7 +43,7 @@ class TestExportTargetFlag:
4343
                 "export",
4444
                 str(tmp_path / "ghost.dlm"),
4545
                 "--target",
46
-                "vllm",
46
+                "sglang",
4747
             ],
4848
         )
4949
         assert result.exit_code == 2
@@ -51,6 +51,7 @@ class TestExportTargetFlag:
5151
         assert "unknown export target" in text
5252
         assert "ollama" in text
5353
         assert "llama-server" in text
54
+        assert "vllm" in text
5455
 
5556
     def test_ollama_target_reaches_existing_mutex_validation(self, tmp_path: Path) -> None:
5657
         doc = _scaffold_doc(tmp_path)
@@ -92,3 +93,22 @@ class TestExportTargetFlag:
9293
         text = _joined(result)
9394
         assert "mutually exclusive" in text
9495
         assert "--no-smoke" not in text
96
+
97
+    def test_vllm_target_reaches_existing_mutex_validation(self, tmp_path: Path) -> None:
98
+        runner = CliRunner()
99
+        result = runner.invoke(
100
+            app,
101
+            [
102
+                "--home",
103
+                str(tmp_path / "home"),
104
+                "export",
105
+                str(tmp_path / "ghost.dlm"),
106
+                "--target",
107
+                "vllm",
108
+                "--draft",
109
+                "qwen2.5:0.5b",
110
+                "--no-draft",
111
+            ],
112
+        )
113
+        assert result.exit_code == 2
114
+        assert "mutually exclusive" in _joined(result)
tests/unit/export/targets/test_registry.pymodified
@@ -12,20 +12,21 @@ from dlm.export.targets import TARGETS, ExportTarget, available_targets, resolve
1212
 
1313
 
1414
 class TestRegistry:
15
-    def test_ollama_target_is_registered(self) -> None:
15
+    def test_targets_are_registered(self) -> None:
1616
         target = resolve_target("ollama")
1717
         assert target.name == "ollama"
1818
         assert isinstance(target, ExportTarget)
1919
         assert TARGETS["ollama"] is target
2020
         assert "llama-server" in TARGETS
21
-        assert available_targets() == ("ollama", "llama-server")
21
+        assert "vllm" in TARGETS
22
+        assert available_targets() == ("ollama", "llama-server", "vllm")
2223
 
2324
     def test_unknown_target_lists_available_targets(self) -> None:
2425
         with pytest.raises(
2526
             UnknownExportTargetError,
26
-            match="available targets: ollama, llama-server",
27
+            match="available targets: ollama, llama-server, vllm",
2728
         ):
28
-            resolve_target("vllm")
29
+            resolve_target("sglang")
2930
 
3031
 
3132
 class TestOllamaWrapper:
tests/unit/export/targets/test_vllm_argv.pyadded
@@ -0,0 +1,166 @@
1
+"""vLLM launch artifact generation."""
2
+
3
+from __future__ import annotations
4
+
5
+import json
6
+from pathlib import Path
7
+
8
+from dlm.base_models import BASE_MODELS
9
+from dlm.export.manifest import load_export_manifest
10
+from dlm.export.targets.vllm import (
11
+    VLLM_CONFIG_FILENAME,
12
+    VLLM_TARGET,
13
+    finalize_vllm_export,
14
+    prepare_vllm_export,
15
+)
16
+from dlm.store.manifest import Manifest, load_manifest, save_manifest
17
+from dlm.store.paths import for_dlm
18
+
19
+_SPEC = BASE_MODELS["smollm2-135m"]
20
+
21
+
22
+def _write_adapter(path: Path) -> None:
23
+    path.mkdir(parents=True)
24
+    (path / "adapter_config.json").write_text("{}", encoding="utf-8")
25
+    (path / "adapter_model.safetensors").write_bytes(b"adapter")
26
+    (path / "tokenizer_config.json").write_text(
27
+        json.dumps({"chat_template": "{{messages}}", "vocab_size": 32000}),
28
+        encoding="utf-8",
29
+    )
30
+
31
+
32
+def _setup_flat_store(tmp_path: Path) -> object:
33
+    store = for_dlm("01VLLMTEST", home=tmp_path)
34
+    store.ensure_layout()
35
+    save_manifest(store.manifest, Manifest(dlm_id="01VLLMTEST", base_model=_SPEC.key))
36
+    adapter = store.adapter_version(3)
37
+    _write_adapter(adapter)
38
+    store.set_current_adapter(adapter)
39
+    return store
40
+
41
+
42
+def _setup_named_store(tmp_path: Path) -> object:
43
+    store = for_dlm("01VLLMMULTI", home=tmp_path)
44
+    store.ensure_layout()
45
+    save_manifest(store.manifest, Manifest(dlm_id="01VLLMMULTI", base_model=_SPEC.key))
46
+    knowledge = store.adapter_version_for("knowledge", 2)
47
+    tone = store.adapter_version_for("tone", 4)
48
+    _write_adapter(knowledge)
49
+    _write_adapter(tone)
50
+    store.set_current_adapter_for("knowledge", knowledge)
51
+    store.set_current_adapter_for("tone", tone)
52
+    return store
53
+
54
+
55
+class TestPrepareVllmExport:
56
+    def test_flat_export_writes_config_manifest_and_launch_script(self, tmp_path: Path) -> None:
57
+        store = _setup_flat_store(tmp_path)
58
+
59
+        prepared = prepare_vllm_export(
60
+            store=store,
61
+            spec=_SPEC,
62
+            served_model_name="dlm-flat",
63
+            adapter_name=None,
64
+            adapter_path_override=None,
65
+            declared_adapter_names=None,
66
+        )
67
+        manifest_path = finalize_vllm_export(
68
+            store=store,
69
+            spec=_SPEC,
70
+            prepared=prepared,
71
+            smoke_output_first_line="hello from vllm",
72
+            adapter_name=None,
73
+            adapter_mix=None,
74
+        )
75
+
76
+        assert prepared.launch_script_path is not None
77
+        script = prepared.launch_script_path.read_text(encoding="utf-8")
78
+        assert script.startswith("#!/usr/bin/env bash\nset -euo pipefail\n")
79
+        assert "vllm serve" in script
80
+        assert _SPEC.hf_id in script
81
+        assert "--revision" in script
82
+        assert "--served-model-name dlm-flat" in script
83
+        assert 'adapter="$SCRIPT_DIR/adapters/adapter"' in script
84
+
85
+        config = json.loads(
86
+            (prepared.export_dir / VLLM_CONFIG_FILENAME).read_text(encoding="utf-8")
87
+        )
88
+        assert config["target"] == "vllm"
89
+        assert config["model"] == _SPEC.hf_id
90
+        assert config["served_model_name"] == "dlm-flat"
91
+        assert config["lora_modules"] == [
92
+            {"adapter_version": 3, "name": "adapter", "path": "adapters/adapter"}
93
+        ]
94
+
95
+        export_manifest = load_export_manifest(prepared.export_dir)
96
+        assert manifest_path == prepared.manifest_path
97
+        assert export_manifest.target == "vllm"
98
+        assert export_manifest.quant == "hf"
99
+        assert export_manifest.adapter_version == 3
100
+        assert any(artifact.path == "vllm_launch.sh" for artifact in export_manifest.artifacts)
101
+        assert any(artifact.path == "vllm_config.json" for artifact in export_manifest.artifacts)
102
+        assert any(
103
+            artifact.path == "adapters/adapter/adapter_model.safetensors"
104
+            for artifact in export_manifest.artifacts
105
+        )
106
+
107
+        store_manifest = load_manifest(store.manifest)
108
+        assert store_manifest.exports[-1].target == "vllm"
109
+        assert store_manifest.exports[-1].quant == "hf"
110
+        assert store_manifest.exports[-1].smoke_output_first_line == "hello from vllm"
111
+
112
+    def test_multi_adapter_export_includes_all_named_modules(self, tmp_path: Path) -> None:
113
+        store = _setup_named_store(tmp_path)
114
+
115
+        prepared = prepare_vllm_export(
116
+            store=store,
117
+            spec=_SPEC,
118
+            served_model_name="dlm-multi",
119
+            adapter_name=None,
120
+            adapter_path_override=None,
121
+            declared_adapter_names=("knowledge", "tone"),
122
+        )
123
+
124
+        script = prepared.launch_script_path.read_text(encoding="utf-8")
125
+        assert 'knowledge="$SCRIPT_DIR/adapters/knowledge"' in script
126
+        assert 'tone="$SCRIPT_DIR/adapters/tone"' in script
127
+
128
+        config = json.loads(
129
+            (prepared.export_dir / VLLM_CONFIG_FILENAME).read_text(encoding="utf-8")
130
+        )
131
+        assert config["lora_modules"] == [
132
+            {"adapter_version": 2, "name": "knowledge", "path": "adapters/knowledge"},
133
+            {"adapter_version": 4, "name": "tone", "path": "adapters/tone"},
134
+        ]
135
+
136
+
137
+class TestVllmSmoke:
138
+    def test_smoke_uses_absolute_runtime_paths(self, tmp_path: Path, monkeypatch: object) -> None:
139
+        store = _setup_named_store(tmp_path)
140
+        prepared = prepare_vllm_export(
141
+            store=store,
142
+            spec=_SPEC,
143
+            served_model_name="dlm-multi",
144
+            adapter_name=None,
145
+            adapter_path_override=None,
146
+            declared_adapter_names=("knowledge", "tone"),
147
+        )
148
+        seen: list[list[str]] = []
149
+
150
+        def _fake_smoke(argv: list[str], **_: object) -> str:
151
+            seen.append(list(argv))
152
+            return "vllm replied"
153
+
154
+        monkeypatch.setattr("dlm.export.targets.vllm.smoke_openai_compat_server", _fake_smoke)
155
+
156
+        result = VLLM_TARGET.smoke_test(prepared)
157
+
158
+        assert result.attempted is True
159
+        assert result.ok is True
160
+        assert result.detail == "vllm replied"
161
+        argv = seen[0]
162
+        assert argv[:2] == ["vllm", "serve"]
163
+        assert "$SCRIPT_DIR" not in " ".join(argv)
164
+        assert _SPEC.hf_id in argv
165
+        assert f"knowledge={prepared.export_dir / 'adapters' / 'knowledge'}" in argv
166
+        assert f"tone={prepared.export_dir / 'adapters' / 'tone'}" in argv