Add vLLM export target
- SHA
e9dda81eb42d0dc0f4dfe41026a0d933f3091959- Parents
-
82b797f - Tree
1c4e02f
e9dda81
e9dda81eb42d0dc0f4dfe41026a0d933f309195982b797f
1c4e02f| Status | File | + | - |
|---|---|---|---|
| M |
README.md
|
2 | 2 |
| M |
docs/cli/reference.md
|
1 | 1 |
| M |
src/dlm/cli/commands.py
|
84 | 2 |
| A |
src/dlm/export/record.py
|
58 | 0 |
| M |
src/dlm/export/runner.py
|
5 | 54 |
| M |
src/dlm/export/targets/__init__.py
|
5 | 0 |
| A |
src/dlm/export/targets/vllm.py
|
356 | 0 |
| M |
tests/unit/cli/test_export_target_flag.py
|
21 | 1 |
| M |
tests/unit/export/targets/test_registry.py
|
5 | 4 |
| A |
tests/unit/export/targets/test_vllm_argv.py
|
166 | 0 |
README.mdmodified@@ -19,8 +19,8 @@ Ollama and `llama-server`. | ||
| 19 | 19 | |
| 20 | 20 | **Status:** pre-v1.0, but far beyond the original MVP framing. The core |
| 21 | 21 | author/train/prompt/export/pack/share loop is real, and newer runtime-target |
| 22 | -work is landing incrementally. Current export targets are `ollama` and | |
| 23 | -`llama-server`. | |
| 22 | +work is landing incrementally. Current export targets are `ollama`, | |
| 23 | +`llama-server`, and `vllm`. | |
| 24 | 24 | |
| 25 | 25 | ## What A `.dlm` Actually Is |
| 26 | 26 | |
docs/cli/reference.mdmodified@@ -203,7 +203,7 @@ dlm export <path> [--target NAME] [--quant Q] [--merged [--dequantize]] | ||
| 203 | 203 | |
| 204 | 204 | | Option | Default | Notes | |
| 205 | 205 | |---|---|---| |
| 206 | -| `--target NAME` | `ollama` | Export destination. Sprint 41 currently supports `ollama` and `llama-server`. The `llama-server` path writes launch artifacts against the existing GGUF export and uses the shared OpenAI-compatible HTTP smoke harness when `--no-smoke` is not passed. | | |
| 206 | +| `--target NAME` | `ollama` | Export destination. Sprint 41 currently supports `ollama`, `llama-server`, and `vllm`. The `llama-server` path writes launch artifacts against the existing GGUF export and uses the shared OpenAI-compatible HTTP smoke harness; the `vllm` path writes `vllm_launch.sh` + `vllm_config.json` against the local adapter layout and ignores GGUF-only flags. | | |
| 207 | 207 | | `--quant Q` | frontmatter.export.default_quant | `Q4_K_M` / `Q5_K_M` / `Q6_K` / `Q8_0` / `F16`. | |
| 208 | 208 | | `--merged` | false | Merge LoRA into base before quantizing. | |
| 209 | 209 | | `--dequantize` | false | Required with `--merged` on a QLoRA adapter (pitfall #3). | |
src/dlm/cli/commands.pymodified@@ -1551,7 +1551,7 @@ def export_cmd( | ||
| 1551 | 1551 | str, |
| 1552 | 1552 | typer.Option( |
| 1553 | 1553 | "--target", |
| 1554 | - help="Export destination. Currently supported: ollama, llama-server.", | |
| 1554 | + help="Export destination. Currently supported: ollama, llama-server, vllm.", | |
| 1555 | 1555 | ), |
| 1556 | 1556 | ] = "ollama", |
| 1557 | 1557 | quant: Annotated[ |
@@ -1678,7 +1678,12 @@ def export_cmd( | ||
| 1678 | 1678 | OllamaVersionError, |
| 1679 | 1679 | ) |
| 1680 | 1680 | from dlm.export.quantize import run_checked |
| 1681 | - from dlm.export.targets import prepare_llama_server_export, resolve_target | |
| 1681 | + from dlm.export.targets import ( | |
| 1682 | + finalize_vllm_export, | |
| 1683 | + prepare_llama_server_export, | |
| 1684 | + prepare_vllm_export, | |
| 1685 | + resolve_target, | |
| 1686 | + ) | |
| 1682 | 1687 | from dlm.store.paths import for_dlm |
| 1683 | 1688 | |
| 1684 | 1689 | console = Console(stderr=True) |
@@ -1774,6 +1779,12 @@ def export_cmd( | ||
| 1774 | 1779 | from dlm.modality import modality_for |
| 1775 | 1780 | |
| 1776 | 1781 | export_dispatch = modality_for(spec) |
| 1782 | + if resolved_target.name == "vllm" and export_dispatch.accepts_audio: | |
| 1783 | + console.print( | |
| 1784 | + "[red]export:[/red] --target vllm is not wired for audio-language " | |
| 1785 | + "documents yet; this Sprint 41 slice only supports text bases." | |
| 1786 | + ) | |
| 1787 | + raise typer.Exit(code=2) | |
| 1777 | 1788 | if export_dispatch.accepts_audio: |
| 1778 | 1789 | try: |
| 1779 | 1790 | dispatch_result = export_dispatch.dispatch_export( |
@@ -1813,6 +1824,12 @@ def export_cmd( | ||
| 1813 | 1824 | # missing local base snapshot should not hard-fail the whole |
| 1814 | 1825 | # export — the dispatcher can still emit the HF-snapshot path |
| 1815 | 1826 | # without GGUF context. |
| 1827 | + if resolved_target.name == "vllm" and export_dispatch.accepts_images: | |
| 1828 | + console.print( | |
| 1829 | + "[red]export:[/red] --target vllm is not wired for vision-language " | |
| 1830 | + "documents yet; this Sprint 41 slice only supports text bases." | |
| 1831 | + ) | |
| 1832 | + raise typer.Exit(code=2) | |
| 1816 | 1833 | if export_dispatch.accepts_images: |
| 1817 | 1834 | gguf_emission_context = None |
| 1818 | 1835 | try: |
@@ -1874,6 +1891,71 @@ def export_cmd( | ||
| 1874 | 1891 | combination_type=adapter_mix_method, # type: ignore[arg-type] |
| 1875 | 1892 | ) |
| 1876 | 1893 | |
| 1894 | + if resolved_target.name == "vllm": | |
| 1895 | + ignored_flags: list[str] = [] | |
| 1896 | + if quant is not None: | |
| 1897 | + ignored_flags.append("--quant") | |
| 1898 | + if merged: | |
| 1899 | + ignored_flags.append("--merged") | |
| 1900 | + if dequantize: | |
| 1901 | + ignored_flags.append("--dequantize") | |
| 1902 | + if no_template: | |
| 1903 | + ignored_flags.append("--no-template") | |
| 1904 | + if skip_ollama: | |
| 1905 | + ignored_flags.append("--skip-ollama") | |
| 1906 | + if no_imatrix: | |
| 1907 | + ignored_flags.append("--no-imatrix") | |
| 1908 | + if draft is not None: | |
| 1909 | + ignored_flags.append("--draft") | |
| 1910 | + if no_draft: | |
| 1911 | + ignored_flags.append("--no-draft") | |
| 1912 | + if ignored_flags: | |
| 1913 | + console.print( | |
| 1914 | + "[yellow]export:[/yellow] ignoring flags not applicable to " | |
| 1915 | + f"`--target vllm`: {', '.join(ignored_flags)}" | |
| 1916 | + ) | |
| 1917 | + | |
| 1918 | + declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None | |
| 1919 | + try: | |
| 1920 | + vllm_result = prepare_vllm_export( | |
| 1921 | + store=store, | |
| 1922 | + spec=spec, | |
| 1923 | + served_model_name=name or f"dlm-{parsed.frontmatter.dlm_id.lower()}", | |
| 1924 | + adapter_name=adapter, | |
| 1925 | + adapter_path_override=adapter_path_override, | |
| 1926 | + declared_adapter_names=declared_adapter_names, | |
| 1927 | + ) | |
| 1928 | + except ExportError as exc: | |
| 1929 | + console.print(f"[red]export:[/red] {exc}") | |
| 1930 | + raise typer.Exit(code=1) from exc | |
| 1931 | + | |
| 1932 | + vllm_smoke = None if no_smoke else resolved_target.smoke_test(vllm_result) | |
| 1933 | + if vllm_smoke is not None and not vllm_smoke.ok: | |
| 1934 | + console.print( | |
| 1935 | + f"[red]smoke:[/red] {vllm_smoke.detail}\n" | |
| 1936 | + " re-run with `--no-smoke` to skip the smoke test." | |
| 1937 | + ) | |
| 1938 | + raise typer.Exit(code=1) | |
| 1939 | + | |
| 1940 | + manifest_path = finalize_vllm_export( | |
| 1941 | + store=store, | |
| 1942 | + spec=spec, | |
| 1943 | + prepared=vllm_result, | |
| 1944 | + smoke_output_first_line=None if vllm_smoke is None else vllm_smoke.detail, | |
| 1945 | + adapter_name=adapter, | |
| 1946 | + adapter_mix=mix_entries, | |
| 1947 | + ) | |
| 1948 | + console.print(f"[green]exported:[/green] {vllm_result.export_dir}") | |
| 1949 | + console.print("target: vllm") | |
| 1950 | + assert vllm_result.launch_script_path is not None | |
| 1951 | + assert vllm_result.config_path is not None | |
| 1952 | + console.print(f"launch: {vllm_result.launch_script_path.name}") | |
| 1953 | + console.print(f"config: {vllm_result.config_path.name}") | |
| 1954 | + console.print(f"manifest: {manifest_path.name}") | |
| 1955 | + if vllm_smoke is not None and vllm_smoke.detail: | |
| 1956 | + console.print(f"smoke: {vllm_smoke.detail}") | |
| 1957 | + return | |
| 1958 | + | |
| 1877 | 1959 | try: |
| 1878 | 1960 | result = run_export( |
| 1879 | 1961 | store, |
src/dlm/export/record.pyadded@@ -0,0 +1,58 @@ | ||
| 1 | +"""Shared export-record helpers used by GGUF and non-GGUF targets.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +from typing import TYPE_CHECKING, Any | |
| 6 | + | |
| 7 | +from dlm.export.manifest import utc_now | |
| 8 | + | |
| 9 | +if TYPE_CHECKING: | |
| 10 | + from dlm.store.paths import StorePath | |
| 11 | + | |
| 12 | + | |
| 13 | +def append_export_summary( | |
| 14 | + *, | |
| 15 | + store: StorePath, | |
| 16 | + quant: str, | |
| 17 | + merged: bool, | |
| 18 | + target: str, | |
| 19 | + llama_cpp_tag: str | None, | |
| 20 | + artifacts: list[Any], | |
| 21 | + ollama_name: str | None, | |
| 22 | + ollama_version_str: str | None, | |
| 23 | + smoke_first_line: str | None, | |
| 24 | + adapter_name: str | None = None, | |
| 25 | + adapter_mix: list[tuple[str, float]] | None = None, | |
| 26 | + timeout: float = 60.0, | |
| 27 | +) -> None: | |
| 28 | + """Update `manifest.exports` with one new export row.""" | |
| 29 | + from dlm.store.lock import exclusive | |
| 30 | + from dlm.store.manifest import ExportSummary, load_manifest, save_manifest | |
| 31 | + | |
| 32 | + base_sha = next((a.sha256 for a in artifacts if a.path.startswith("base.")), None) | |
| 33 | + adapter_sha = next((a.sha256 for a in artifacts if a.path.startswith("adapter.")), None) | |
| 34 | + | |
| 35 | + summary = ExportSummary( | |
| 36 | + exported_at=utc_now(), | |
| 37 | + target=target, | |
| 38 | + quant=quant, | |
| 39 | + merged=merged, | |
| 40 | + ollama_name=ollama_name, | |
| 41 | + ollama_version=ollama_version_str, | |
| 42 | + llama_cpp_tag=llama_cpp_tag, | |
| 43 | + base_gguf_sha256=base_sha, | |
| 44 | + adapter_gguf_sha256=adapter_sha, | |
| 45 | + smoke_output_first_line=smoke_first_line, | |
| 46 | + adapter_name=adapter_name, | |
| 47 | + adapter_mix=adapter_mix, | |
| 48 | + ) | |
| 49 | + | |
| 50 | + with exclusive(store.lock, timeout=timeout): | |
| 51 | + manifest = load_manifest(store.manifest) | |
| 52 | + updated = manifest.model_copy( | |
| 53 | + update={ | |
| 54 | + "exports": [*manifest.exports, summary], | |
| 55 | + "updated_at": utc_now(), | |
| 56 | + } | |
| 57 | + ) | |
| 58 | + save_manifest(store.manifest, updated) | |
src/dlm/export/runner.pymodified@@ -34,6 +34,7 @@ from dlm.export.manifest import ( | ||
| 34 | 34 | from dlm.export.plan import ExportPlan |
| 35 | 35 | from dlm.export.precision_safety import require_dequantize_or_refuse |
| 36 | 36 | from dlm.export.quantize import run_checked |
| 37 | +from dlm.export.record import append_export_summary | |
| 37 | 38 | |
| 38 | 39 | if TYPE_CHECKING: |
| 39 | 40 | from dlm.base_models import BaseModelSpec |
@@ -335,9 +336,10 @@ def run_export( | ||
| 335 | 336 | manifest_path = save_export_manifest(export_dir, em) |
| 336 | 337 | |
| 337 | 338 | # 8. Append to store manifest.exports. |
| 338 | - _append_export_summary( | |
| 339 | + append_export_summary( | |
| 339 | 340 | store=store, |
| 340 | - plan=plan, | |
| 341 | + quant=plan.quant, | |
| 342 | + merged=plan.merged, | |
| 341 | 343 | llama_cpp_tag=em.llama_cpp_tag, |
| 342 | 344 | artifacts=em.artifacts, |
| 343 | 345 | ollama_name=em.ollama_name, |
@@ -346,6 +348,7 @@ def run_export( | ||
| 346 | 348 | target=resolved_target.name, |
| 347 | 349 | adapter_name=adapter_name, |
| 348 | 350 | adapter_mix=adapter_mix, |
| 351 | + timeout=_APPEND_LOCK_TIMEOUT, | |
| 349 | 352 | ) |
| 350 | 353 | |
| 351 | 354 | return ExportResult( |
@@ -617,55 +620,3 @@ def _run_ollama_stage( | ||
| 617 | 620 | smoke_first_line = first_line(stdout) |
| 618 | 621 | |
| 619 | 622 | return modelfile_path, name, ver_str, smoke_first_line |
| 620 | - | |
| 621 | - | |
| 622 | -def _append_export_summary( | |
| 623 | - *, | |
| 624 | - store: StorePath, | |
| 625 | - plan: ExportPlan, | |
| 626 | - llama_cpp_tag: str | None, | |
| 627 | - artifacts: list[Any], | |
| 628 | - ollama_name: str | None, | |
| 629 | - ollama_version_str: str | None, | |
| 630 | - smoke_first_line: str | None, | |
| 631 | - target: str, | |
| 632 | - adapter_name: str | None = None, | |
| 633 | - adapter_mix: list[tuple[str, float]] | None = None, | |
| 634 | -) -> None: | |
| 635 | - """Update `manifest.exports` with a new `ExportSummary` row.""" | |
| 636 | - from dlm.store.manifest import ExportSummary, load_manifest, save_manifest | |
| 637 | - | |
| 638 | - base_sha = next((a.sha256 for a in artifacts if a.path.startswith("base.")), None) | |
| 639 | - adapter_sha = next((a.sha256 for a in artifacts if a.path.startswith("adapter.")), None) | |
| 640 | - | |
| 641 | - summary = ExportSummary( | |
| 642 | - exported_at=utc_now(), | |
| 643 | - target=target, | |
| 644 | - quant=plan.quant, | |
| 645 | - merged=plan.merged, | |
| 646 | - ollama_name=ollama_name, | |
| 647 | - ollama_version=ollama_version_str, | |
| 648 | - llama_cpp_tag=llama_cpp_tag, | |
| 649 | - base_gguf_sha256=base_sha, | |
| 650 | - adapter_gguf_sha256=adapter_sha, | |
| 651 | - smoke_output_first_line=smoke_first_line, | |
| 652 | - adapter_name=adapter_name, | |
| 653 | - adapter_mix=adapter_mix, | |
| 654 | - ) | |
| 655 | - | |
| 656 | - # The manifest read-modify-write must be serialized: two concurrent | |
| 657 | - # `dlm export` invocations on the same store (different quants) would | |
| 658 | - # otherwise race and drop one summary. The per-store exclusive lock | |
| 659 | - # is the same one `dlm train` takes; holding it across load→save | |
| 660 | - # keeps `manifest.exports` append-atomic. | |
| 661 | - from dlm.store.lock import exclusive | |
| 662 | - | |
| 663 | - with exclusive(store.lock, timeout=_APPEND_LOCK_TIMEOUT): | |
| 664 | - manifest = load_manifest(store.manifest) | |
| 665 | - updated = manifest.model_copy( | |
| 666 | - update={ | |
| 667 | - "exports": [*manifest.exports, summary], | |
| 668 | - "updated_at": utc_now(), | |
| 669 | - } | |
| 670 | - ) | |
| 671 | - save_manifest(store.manifest, updated) | |
src/dlm/export/targets/__init__.pymodified@@ -6,10 +6,12 @@ from dlm.export.errors import UnknownExportTargetError | ||
| 6 | 6 | from dlm.export.targets.base import ExportTarget, SmokeResult, TargetResult |
| 7 | 7 | from dlm.export.targets.llama_server import LLAMA_SERVER_TARGET, prepare_llama_server_export |
| 8 | 8 | from dlm.export.targets.ollama import OLLAMA_TARGET |
| 9 | +from dlm.export.targets.vllm import VLLM_TARGET, finalize_vllm_export, prepare_vllm_export | |
| 9 | 10 | |
| 10 | 11 | TARGETS: dict[str, ExportTarget] = { |
| 11 | 12 | OLLAMA_TARGET.name: OLLAMA_TARGET, |
| 12 | 13 | LLAMA_SERVER_TARGET.name: LLAMA_SERVER_TARGET, |
| 14 | + VLLM_TARGET.name: VLLM_TARGET, | |
| 13 | 15 | } |
| 14 | 16 | |
| 15 | 17 | |
@@ -32,7 +34,10 @@ __all__ = [ | ||
| 32 | 34 | "SmokeResult", |
| 33 | 35 | "TARGETS", |
| 34 | 36 | "TargetResult", |
| 37 | + "VLLM_TARGET", | |
| 35 | 38 | "available_targets", |
| 39 | + "finalize_vllm_export", | |
| 36 | 40 | "prepare_llama_server_export", |
| 41 | + "prepare_vllm_export", | |
| 37 | 42 | "resolve_target", |
| 38 | 43 | ] |
src/dlm/export/targets/vllm.pyadded@@ -0,0 +1,356 @@ | ||
| 1 | +"""vLLM target helpers.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import json | |
| 6 | +import shlex | |
| 7 | +import shutil | |
| 8 | +from dataclasses import dataclass | |
| 9 | +from pathlib import Path | |
| 10 | + | |
| 11 | +from dlm.base_models import BaseModelSpec | |
| 12 | +from dlm.export.errors import ExportError, TargetSmokeError | |
| 13 | +from dlm.export.manifest import ExportManifest, build_artifact, save_export_manifest, utc_now | |
| 14 | +from dlm.export.record import append_export_summary | |
| 15 | +from dlm.export.smoke import smoke_openai_compat_server | |
| 16 | +from dlm.export.targets.base import ExportTarget, SmokeResult, TargetResult | |
| 17 | +from dlm.io.atomic import write_text | |
| 18 | +from dlm.store.paths import StorePath | |
| 19 | + | |
| 20 | +VLLM_EXPORT_SUBDIR = "vllm" | |
| 21 | +VLLM_CONFIG_FILENAME = "vllm_config.json" | |
| 22 | +LAUNCH_SCRIPT_FILENAME = "vllm_launch.sh" | |
| 23 | +_ADAPTERS_DIRNAME = "adapters" | |
| 24 | +_HF_QUANT = "hf" | |
| 25 | +_DEFAULT_MODULE_NAME = "adapter" | |
| 26 | +_MIXED_MODULE_NAME = "mixed" | |
| 27 | + | |
| 28 | + | |
| 29 | +@dataclass(frozen=True) | |
| 30 | +class LoraModule: | |
| 31 | + name: str | |
| 32 | + path: Path | |
| 33 | + adapter_version: int | |
| 34 | + | |
| 35 | + | |
| 36 | +class VllmTarget: | |
| 37 | + """Registered export target for vLLM launch artifacts.""" | |
| 38 | + | |
| 39 | + name = "vllm" | |
| 40 | + | |
| 41 | + def prepare(self, ctx: object) -> TargetResult: | |
| 42 | + raise NotImplementedError("vllm exports are prepared via prepare_vllm_export()") | |
| 43 | + | |
| 44 | + def launch_command(self, prepared: TargetResult) -> list[str]: | |
| 45 | + return _build_command(prepared, use_script_dir=True) | |
| 46 | + | |
| 47 | + def smoke_test(self, prepared: TargetResult) -> SmokeResult: | |
| 48 | + try: | |
| 49 | + first_line = smoke_openai_compat_server(_build_command(prepared, use_script_dir=False)) | |
| 50 | + except (OSError, TargetSmokeError, ExportError) as exc: | |
| 51 | + return SmokeResult(attempted=True, ok=False, detail=str(exc)) | |
| 52 | + return SmokeResult(attempted=True, ok=True, detail=first_line) | |
| 53 | + | |
| 54 | + | |
| 55 | +def prepare_vllm_export( | |
| 56 | + *, | |
| 57 | + store: StorePath, | |
| 58 | + spec: BaseModelSpec, | |
| 59 | + served_model_name: str, | |
| 60 | + adapter_name: str | None, | |
| 61 | + adapter_path_override: Path | None, | |
| 62 | + declared_adapter_names: tuple[str, ...] | None, | |
| 63 | +) -> TargetResult: | |
| 64 | + """Stage vLLM launch artifacts plus local adapter module copies.""" | |
| 65 | + | |
| 66 | + export_dir = store.exports / VLLM_EXPORT_SUBDIR | |
| 67 | + export_dir.mkdir(parents=True, exist_ok=True) | |
| 68 | + | |
| 69 | + adapters_dir = export_dir / _ADAPTERS_DIRNAME | |
| 70 | + if adapters_dir.exists(): | |
| 71 | + shutil.rmtree(adapters_dir) | |
| 72 | + adapters_dir.mkdir(parents=True, exist_ok=True) | |
| 73 | + | |
| 74 | + modules = _stage_modules( | |
| 75 | + store=store, | |
| 76 | + adapters_dir=adapters_dir, | |
| 77 | + adapter_name=adapter_name, | |
| 78 | + adapter_path_override=adapter_path_override, | |
| 79 | + declared_adapter_names=declared_adapter_names, | |
| 80 | + ) | |
| 81 | + if not modules: | |
| 82 | + raise ExportError("vllm export needs at least one adapter module") | |
| 83 | + | |
| 84 | + config_path = export_dir / VLLM_CONFIG_FILENAME | |
| 85 | + launch_script_path = export_dir / LAUNCH_SCRIPT_FILENAME | |
| 86 | + draft = TargetResult( | |
| 87 | + name=VLLM_TARGET.name, | |
| 88 | + export_dir=export_dir, | |
| 89 | + manifest_path=export_dir / "export_manifest.json", | |
| 90 | + artifacts=(), | |
| 91 | + launch_script_path=launch_script_path, | |
| 92 | + config_path=config_path, | |
| 93 | + extras={ | |
| 94 | + "model": spec.hf_id, | |
| 95 | + "revision": spec.revision, | |
| 96 | + "served_model_name": served_model_name, | |
| 97 | + "module_specs": tuple(modules), | |
| 98 | + "adapter_version": max(module.adapter_version for module in modules), | |
| 99 | + }, | |
| 100 | + ) | |
| 101 | + write_text(config_path, _render_config(draft)) | |
| 102 | + write_text(launch_script_path, _render_launch_script(VLLM_TARGET.launch_command(draft))) | |
| 103 | + launch_script_path.chmod(0o755) | |
| 104 | + return TargetResult( | |
| 105 | + name=draft.name, | |
| 106 | + export_dir=draft.export_dir, | |
| 107 | + manifest_path=draft.manifest_path, | |
| 108 | + artifacts=tuple(_artifact_paths(export_dir)), | |
| 109 | + launch_script_path=draft.launch_script_path, | |
| 110 | + config_path=draft.config_path, | |
| 111 | + extras=draft.extras, | |
| 112 | + ) | |
| 113 | + | |
| 114 | + | |
| 115 | +def finalize_vllm_export( | |
| 116 | + *, | |
| 117 | + store: StorePath, | |
| 118 | + spec: BaseModelSpec, | |
| 119 | + prepared: TargetResult, | |
| 120 | + smoke_output_first_line: str | None, | |
| 121 | + adapter_name: str | None, | |
| 122 | + adapter_mix: list[tuple[str, float]] | None, | |
| 123 | +) -> Path: | |
| 124 | + """Write export_manifest.json and append the store export summary.""" | |
| 125 | + | |
| 126 | + from dlm import __version__ as dlm_version | |
| 127 | + | |
| 128 | + artifacts = [ | |
| 129 | + build_artifact(prepared.export_dir, path) for path in _artifact_paths(prepared.export_dir) | |
| 130 | + ] | |
| 131 | + adapter_version = _require_prepared_int(prepared, "adapter_version") | |
| 132 | + manifest = ExportManifest( | |
| 133 | + target=VLLM_TARGET.name, | |
| 134 | + quant=_HF_QUANT, | |
| 135 | + merged=False, | |
| 136 | + dequantized=False, | |
| 137 | + ollama_name=None, | |
| 138 | + created_at=utc_now(), | |
| 139 | + created_by=f"dlm-{dlm_version}", | |
| 140 | + llama_cpp_tag=None, | |
| 141 | + base_model_hf_id=spec.hf_id, | |
| 142 | + base_model_revision=spec.revision, | |
| 143 | + adapter_version=adapter_version, | |
| 144 | + artifacts=artifacts, | |
| 145 | + ) | |
| 146 | + manifest_path = save_export_manifest(prepared.export_dir, manifest) | |
| 147 | + append_export_summary( | |
| 148 | + store=store, | |
| 149 | + quant=_HF_QUANT, | |
| 150 | + merged=False, | |
| 151 | + target=VLLM_TARGET.name, | |
| 152 | + llama_cpp_tag=None, | |
| 153 | + artifacts=artifacts, | |
| 154 | + ollama_name=None, | |
| 155 | + ollama_version_str=None, | |
| 156 | + smoke_first_line=smoke_output_first_line, | |
| 157 | + adapter_name=adapter_name, | |
| 158 | + adapter_mix=adapter_mix, | |
| 159 | + ) | |
| 160 | + return manifest_path | |
| 161 | + | |
| 162 | + | |
| 163 | +def _stage_modules( | |
| 164 | + *, | |
| 165 | + store: StorePath, | |
| 166 | + adapters_dir: Path, | |
| 167 | + adapter_name: str | None, | |
| 168 | + adapter_path_override: Path | None, | |
| 169 | + declared_adapter_names: tuple[str, ...] | None, | |
| 170 | +) -> list[LoraModule]: | |
| 171 | + modules = _resolve_modules( | |
| 172 | + store=store, | |
| 173 | + adapter_name=adapter_name, | |
| 174 | + adapter_path_override=adapter_path_override, | |
| 175 | + declared_adapter_names=declared_adapter_names, | |
| 176 | + ) | |
| 177 | + staged: list[LoraModule] = [] | |
| 178 | + for module in modules: | |
| 179 | + target_dir = adapters_dir / module.name | |
| 180 | + shutil.copytree(module.path, target_dir) | |
| 181 | + staged.append(LoraModule(module.name, target_dir, module.adapter_version)) | |
| 182 | + return staged | |
| 183 | + | |
| 184 | + | |
| 185 | +def _resolve_modules( | |
| 186 | + *, | |
| 187 | + store: StorePath, | |
| 188 | + adapter_name: str | None, | |
| 189 | + adapter_path_override: Path | None, | |
| 190 | + declared_adapter_names: tuple[str, ...] | None, | |
| 191 | +) -> list[LoraModule]: | |
| 192 | + if adapter_path_override is not None: | |
| 193 | + if not adapter_path_override.exists(): | |
| 194 | + raise ExportError(f"adapter_path_override {adapter_path_override} does not exist") | |
| 195 | + return [ | |
| 196 | + LoraModule( | |
| 197 | + name=_MIXED_MODULE_NAME, | |
| 198 | + path=adapter_path_override, | |
| 199 | + adapter_version=_version_from_dir_name(adapter_path_override), | |
| 200 | + ) | |
| 201 | + ] | |
| 202 | + | |
| 203 | + if adapter_name is not None: | |
| 204 | + path = store.resolve_current_adapter_for(adapter_name) | |
| 205 | + pointer = store.adapter_current_pointer_for(adapter_name) | |
| 206 | + if path is None or not path.exists(): | |
| 207 | + raise ExportError( | |
| 208 | + f"no current adapter under {pointer}; run `dlm train` before exporting." | |
| 209 | + ) | |
| 210 | + return [ | |
| 211 | + LoraModule( | |
| 212 | + name=adapter_name, | |
| 213 | + path=path, | |
| 214 | + adapter_version=_version_from_dir_name(path), | |
| 215 | + ) | |
| 216 | + ] | |
| 217 | + | |
| 218 | + if declared_adapter_names: | |
| 219 | + modules: list[LoraModule] = [] | |
| 220 | + for name in declared_adapter_names: | |
| 221 | + path = store.resolve_current_adapter_for(name) | |
| 222 | + pointer = store.adapter_current_pointer_for(name) | |
| 223 | + if path is None or not path.exists(): | |
| 224 | + raise ExportError( | |
| 225 | + f"no current adapter under {pointer}; run `dlm train` before exporting." | |
| 226 | + ) | |
| 227 | + modules.append( | |
| 228 | + LoraModule(name=name, path=path, adapter_version=_version_from_dir_name(path)) | |
| 229 | + ) | |
| 230 | + return modules | |
| 231 | + | |
| 232 | + path = store.resolve_current_adapter() | |
| 233 | + pointer = store.adapter_current_pointer | |
| 234 | + if path is None or not path.exists(): | |
| 235 | + raise ExportError(f"no current adapter under {pointer}; run `dlm train` before exporting.") | |
| 236 | + return [ | |
| 237 | + LoraModule( | |
| 238 | + name=_DEFAULT_MODULE_NAME, | |
| 239 | + path=path, | |
| 240 | + adapter_version=_version_from_dir_name(path), | |
| 241 | + ) | |
| 242 | + ] | |
| 243 | + | |
| 244 | + | |
| 245 | +def _version_from_dir_name(path: Path) -> int: | |
| 246 | + stem = path.name | |
| 247 | + if not stem.startswith("v") or not stem[1:].isdigit(): | |
| 248 | + return 1 | |
| 249 | + return int(stem[1:]) | |
| 250 | + | |
| 251 | + | |
| 252 | +def _artifact_paths(export_dir: Path) -> list[Path]: | |
| 253 | + artifacts: list[Path] = [] | |
| 254 | + for path in sorted(export_dir.rglob("*")): | |
| 255 | + if path.is_file() and path.name != "export_manifest.json": | |
| 256 | + artifacts.append(path) | |
| 257 | + return artifacts | |
| 258 | + | |
| 259 | + | |
| 260 | +def _build_command(prepared: TargetResult, *, use_script_dir: bool) -> list[str]: | |
| 261 | + model = _require_prepared_str(prepared, "model") | |
| 262 | + revision = _require_prepared_str(prepared, "revision") | |
| 263 | + served_model_name = _require_prepared_str(prepared, "served_model_name") | |
| 264 | + modules = _require_module_specs(prepared) | |
| 265 | + | |
| 266 | + command = [ | |
| 267 | + "vllm", | |
| 268 | + "serve", | |
| 269 | + model, | |
| 270 | + "--revision", | |
| 271 | + revision, | |
| 272 | + "--host", | |
| 273 | + "127.0.0.1", | |
| 274 | + "--port", | |
| 275 | + "8000", | |
| 276 | + "--dtype", | |
| 277 | + "auto", | |
| 278 | + "--served-model-name", | |
| 279 | + served_model_name, | |
| 280 | + ] | |
| 281 | + if modules: | |
| 282 | + command.extend(["--enable-lora", "--lora-modules"]) | |
| 283 | + for module in modules: | |
| 284 | + path = ( | |
| 285 | + f"$SCRIPT_DIR/{_ADAPTERS_DIRNAME}/{module.name}" | |
| 286 | + if use_script_dir | |
| 287 | + else str(module.path) | |
| 288 | + ) | |
| 289 | + command.append(f"{module.name}={path}") | |
| 290 | + return command | |
| 291 | + | |
| 292 | + | |
| 293 | +def _render_config(prepared: TargetResult) -> str: | |
| 294 | + modules = _require_module_specs(prepared) | |
| 295 | + payload = { | |
| 296 | + "target": VLLM_TARGET.name, | |
| 297 | + "model": _require_prepared_str(prepared, "model"), | |
| 298 | + "revision": _require_prepared_str(prepared, "revision"), | |
| 299 | + "served_model_name": _require_prepared_str(prepared, "served_model_name"), | |
| 300 | + "dtype": "auto", | |
| 301 | + "host": "127.0.0.1", | |
| 302 | + "port": 8000, | |
| 303 | + "lora_modules": [ | |
| 304 | + { | |
| 305 | + "name": module.name, | |
| 306 | + "path": f"{_ADAPTERS_DIRNAME}/{module.name}", | |
| 307 | + "adapter_version": module.adapter_version, | |
| 308 | + } | |
| 309 | + for module in modules | |
| 310 | + ], | |
| 311 | + } | |
| 312 | + return json.dumps(payload, sort_keys=True, indent=2) + "\n" | |
| 313 | + | |
| 314 | + | |
| 315 | +def _render_launch_script(command: list[str]) -> str: | |
| 316 | + rendered = " ".join(_quote_script_arg(arg) for arg in command) | |
| 317 | + return ( | |
| 318 | + "#!/usr/bin/env bash\n" | |
| 319 | + "set -euo pipefail\n" | |
| 320 | + 'SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"\n' | |
| 321 | + f'exec {rendered} "$@"\n' | |
| 322 | + ) | |
| 323 | + | |
| 324 | + | |
| 325 | +def _quote_script_arg(arg: str) -> str: | |
| 326 | + if arg.startswith("$SCRIPT_DIR/"): | |
| 327 | + return f'"{arg}"' | |
| 328 | + if "=$SCRIPT_DIR/" in arg: | |
| 329 | + name, value = arg.split("=", 1) | |
| 330 | + return f'{shlex.quote(name)}="{value}"' | |
| 331 | + return shlex.quote(arg) | |
| 332 | + | |
| 333 | + | |
| 334 | +def _require_prepared_str(prepared: TargetResult, key: str) -> str: | |
| 335 | + value = prepared.extras.get(key) | |
| 336 | + if not isinstance(value, str) or not value: | |
| 337 | + raise ExportError(f"vllm prepared target missing string extra {key!r}") | |
| 338 | + return value | |
| 339 | + | |
| 340 | + | |
| 341 | +def _require_prepared_int(prepared: TargetResult, key: str) -> int: | |
| 342 | + value = prepared.extras.get(key) | |
| 343 | + if not isinstance(value, int): | |
| 344 | + raise ExportError(f"vllm prepared target missing int extra {key!r}") | |
| 345 | + return value | |
| 346 | + | |
| 347 | + | |
| 348 | +def _require_module_specs(prepared: TargetResult) -> tuple[LoraModule, ...]: | |
| 349 | + value = prepared.extras.get("module_specs") | |
| 350 | + if not isinstance(value, tuple) or not all(isinstance(item, LoraModule) for item in value): | |
| 351 | + raise ExportError("vllm prepared target missing LoraModule tuple extra 'module_specs'") | |
| 352 | + return value | |
| 353 | + | |
| 354 | + | |
| 355 | +VLLM_TARGET = VllmTarget() | |
| 356 | +assert isinstance(VLLM_TARGET, ExportTarget) | |
tests/unit/cli/test_export_target_flag.pymodified@@ -43,7 +43,7 @@ class TestExportTargetFlag: | ||
| 43 | 43 | "export", |
| 44 | 44 | str(tmp_path / "ghost.dlm"), |
| 45 | 45 | "--target", |
| 46 | - "vllm", | |
| 46 | + "sglang", | |
| 47 | 47 | ], |
| 48 | 48 | ) |
| 49 | 49 | assert result.exit_code == 2 |
@@ -51,6 +51,7 @@ class TestExportTargetFlag: | ||
| 51 | 51 | assert "unknown export target" in text |
| 52 | 52 | assert "ollama" in text |
| 53 | 53 | assert "llama-server" in text |
| 54 | + assert "vllm" in text | |
| 54 | 55 | |
| 55 | 56 | def test_ollama_target_reaches_existing_mutex_validation(self, tmp_path: Path) -> None: |
| 56 | 57 | doc = _scaffold_doc(tmp_path) |
@@ -92,3 +93,22 @@ class TestExportTargetFlag: | ||
| 92 | 93 | text = _joined(result) |
| 93 | 94 | assert "mutually exclusive" in text |
| 94 | 95 | assert "--no-smoke" not in text |
| 96 | + | |
| 97 | + def test_vllm_target_reaches_existing_mutex_validation(self, tmp_path: Path) -> None: | |
| 98 | + runner = CliRunner() | |
| 99 | + result = runner.invoke( | |
| 100 | + app, | |
| 101 | + [ | |
| 102 | + "--home", | |
| 103 | + str(tmp_path / "home"), | |
| 104 | + "export", | |
| 105 | + str(tmp_path / "ghost.dlm"), | |
| 106 | + "--target", | |
| 107 | + "vllm", | |
| 108 | + "--draft", | |
| 109 | + "qwen2.5:0.5b", | |
| 110 | + "--no-draft", | |
| 111 | + ], | |
| 112 | + ) | |
| 113 | + assert result.exit_code == 2 | |
| 114 | + assert "mutually exclusive" in _joined(result) | |
tests/unit/export/targets/test_registry.pymodified@@ -12,20 +12,21 @@ from dlm.export.targets import TARGETS, ExportTarget, available_targets, resolve | ||
| 12 | 12 | |
| 13 | 13 | |
| 14 | 14 | class TestRegistry: |
| 15 | - def test_ollama_target_is_registered(self) -> None: | |
| 15 | + def test_targets_are_registered(self) -> None: | |
| 16 | 16 | target = resolve_target("ollama") |
| 17 | 17 | assert target.name == "ollama" |
| 18 | 18 | assert isinstance(target, ExportTarget) |
| 19 | 19 | assert TARGETS["ollama"] is target |
| 20 | 20 | assert "llama-server" in TARGETS |
| 21 | - assert available_targets() == ("ollama", "llama-server") | |
| 21 | + assert "vllm" in TARGETS | |
| 22 | + assert available_targets() == ("ollama", "llama-server", "vllm") | |
| 22 | 23 | |
| 23 | 24 | def test_unknown_target_lists_available_targets(self) -> None: |
| 24 | 25 | with pytest.raises( |
| 25 | 26 | UnknownExportTargetError, |
| 26 | - match="available targets: ollama, llama-server", | |
| 27 | + match="available targets: ollama, llama-server, vllm", | |
| 27 | 28 | ): |
| 28 | - resolve_target("vllm") | |
| 29 | + resolve_target("sglang") | |
| 29 | 30 | |
| 30 | 31 | |
| 31 | 32 | class TestOllamaWrapper: |
tests/unit/export/targets/test_vllm_argv.pyadded@@ -0,0 +1,166 @@ | ||
| 1 | +"""vLLM launch artifact generation.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import json | |
| 6 | +from pathlib import Path | |
| 7 | + | |
| 8 | +from dlm.base_models import BASE_MODELS | |
| 9 | +from dlm.export.manifest import load_export_manifest | |
| 10 | +from dlm.export.targets.vllm import ( | |
| 11 | + VLLM_CONFIG_FILENAME, | |
| 12 | + VLLM_TARGET, | |
| 13 | + finalize_vllm_export, | |
| 14 | + prepare_vllm_export, | |
| 15 | +) | |
| 16 | +from dlm.store.manifest import Manifest, load_manifest, save_manifest | |
| 17 | +from dlm.store.paths import for_dlm | |
| 18 | + | |
| 19 | +_SPEC = BASE_MODELS["smollm2-135m"] | |
| 20 | + | |
| 21 | + | |
| 22 | +def _write_adapter(path: Path) -> None: | |
| 23 | + path.mkdir(parents=True) | |
| 24 | + (path / "adapter_config.json").write_text("{}", encoding="utf-8") | |
| 25 | + (path / "adapter_model.safetensors").write_bytes(b"adapter") | |
| 26 | + (path / "tokenizer_config.json").write_text( | |
| 27 | + json.dumps({"chat_template": "{{messages}}", "vocab_size": 32000}), | |
| 28 | + encoding="utf-8", | |
| 29 | + ) | |
| 30 | + | |
| 31 | + | |
| 32 | +def _setup_flat_store(tmp_path: Path) -> object: | |
| 33 | + store = for_dlm("01VLLMTEST", home=tmp_path) | |
| 34 | + store.ensure_layout() | |
| 35 | + save_manifest(store.manifest, Manifest(dlm_id="01VLLMTEST", base_model=_SPEC.key)) | |
| 36 | + adapter = store.adapter_version(3) | |
| 37 | + _write_adapter(adapter) | |
| 38 | + store.set_current_adapter(adapter) | |
| 39 | + return store | |
| 40 | + | |
| 41 | + | |
| 42 | +def _setup_named_store(tmp_path: Path) -> object: | |
| 43 | + store = for_dlm("01VLLMMULTI", home=tmp_path) | |
| 44 | + store.ensure_layout() | |
| 45 | + save_manifest(store.manifest, Manifest(dlm_id="01VLLMMULTI", base_model=_SPEC.key)) | |
| 46 | + knowledge = store.adapter_version_for("knowledge", 2) | |
| 47 | + tone = store.adapter_version_for("tone", 4) | |
| 48 | + _write_adapter(knowledge) | |
| 49 | + _write_adapter(tone) | |
| 50 | + store.set_current_adapter_for("knowledge", knowledge) | |
| 51 | + store.set_current_adapter_for("tone", tone) | |
| 52 | + return store | |
| 53 | + | |
| 54 | + | |
| 55 | +class TestPrepareVllmExport: | |
| 56 | + def test_flat_export_writes_config_manifest_and_launch_script(self, tmp_path: Path) -> None: | |
| 57 | + store = _setup_flat_store(tmp_path) | |
| 58 | + | |
| 59 | + prepared = prepare_vllm_export( | |
| 60 | + store=store, | |
| 61 | + spec=_SPEC, | |
| 62 | + served_model_name="dlm-flat", | |
| 63 | + adapter_name=None, | |
| 64 | + adapter_path_override=None, | |
| 65 | + declared_adapter_names=None, | |
| 66 | + ) | |
| 67 | + manifest_path = finalize_vllm_export( | |
| 68 | + store=store, | |
| 69 | + spec=_SPEC, | |
| 70 | + prepared=prepared, | |
| 71 | + smoke_output_first_line="hello from vllm", | |
| 72 | + adapter_name=None, | |
| 73 | + adapter_mix=None, | |
| 74 | + ) | |
| 75 | + | |
| 76 | + assert prepared.launch_script_path is not None | |
| 77 | + script = prepared.launch_script_path.read_text(encoding="utf-8") | |
| 78 | + assert script.startswith("#!/usr/bin/env bash\nset -euo pipefail\n") | |
| 79 | + assert "vllm serve" in script | |
| 80 | + assert _SPEC.hf_id in script | |
| 81 | + assert "--revision" in script | |
| 82 | + assert "--served-model-name dlm-flat" in script | |
| 83 | + assert 'adapter="$SCRIPT_DIR/adapters/adapter"' in script | |
| 84 | + | |
| 85 | + config = json.loads( | |
| 86 | + (prepared.export_dir / VLLM_CONFIG_FILENAME).read_text(encoding="utf-8") | |
| 87 | + ) | |
| 88 | + assert config["target"] == "vllm" | |
| 89 | + assert config["model"] == _SPEC.hf_id | |
| 90 | + assert config["served_model_name"] == "dlm-flat" | |
| 91 | + assert config["lora_modules"] == [ | |
| 92 | + {"adapter_version": 3, "name": "adapter", "path": "adapters/adapter"} | |
| 93 | + ] | |
| 94 | + | |
| 95 | + export_manifest = load_export_manifest(prepared.export_dir) | |
| 96 | + assert manifest_path == prepared.manifest_path | |
| 97 | + assert export_manifest.target == "vllm" | |
| 98 | + assert export_manifest.quant == "hf" | |
| 99 | + assert export_manifest.adapter_version == 3 | |
| 100 | + assert any(artifact.path == "vllm_launch.sh" for artifact in export_manifest.artifacts) | |
| 101 | + assert any(artifact.path == "vllm_config.json" for artifact in export_manifest.artifacts) | |
| 102 | + assert any( | |
| 103 | + artifact.path == "adapters/adapter/adapter_model.safetensors" | |
| 104 | + for artifact in export_manifest.artifacts | |
| 105 | + ) | |
| 106 | + | |
| 107 | + store_manifest = load_manifest(store.manifest) | |
| 108 | + assert store_manifest.exports[-1].target == "vllm" | |
| 109 | + assert store_manifest.exports[-1].quant == "hf" | |
| 110 | + assert store_manifest.exports[-1].smoke_output_first_line == "hello from vllm" | |
| 111 | + | |
| 112 | + def test_multi_adapter_export_includes_all_named_modules(self, tmp_path: Path) -> None: | |
| 113 | + store = _setup_named_store(tmp_path) | |
| 114 | + | |
| 115 | + prepared = prepare_vllm_export( | |
| 116 | + store=store, | |
| 117 | + spec=_SPEC, | |
| 118 | + served_model_name="dlm-multi", | |
| 119 | + adapter_name=None, | |
| 120 | + adapter_path_override=None, | |
| 121 | + declared_adapter_names=("knowledge", "tone"), | |
| 122 | + ) | |
| 123 | + | |
| 124 | + script = prepared.launch_script_path.read_text(encoding="utf-8") | |
| 125 | + assert 'knowledge="$SCRIPT_DIR/adapters/knowledge"' in script | |
| 126 | + assert 'tone="$SCRIPT_DIR/adapters/tone"' in script | |
| 127 | + | |
| 128 | + config = json.loads( | |
| 129 | + (prepared.export_dir / VLLM_CONFIG_FILENAME).read_text(encoding="utf-8") | |
| 130 | + ) | |
| 131 | + assert config["lora_modules"] == [ | |
| 132 | + {"adapter_version": 2, "name": "knowledge", "path": "adapters/knowledge"}, | |
| 133 | + {"adapter_version": 4, "name": "tone", "path": "adapters/tone"}, | |
| 134 | + ] | |
| 135 | + | |
| 136 | + | |
| 137 | +class TestVllmSmoke: | |
| 138 | + def test_smoke_uses_absolute_runtime_paths(self, tmp_path: Path, monkeypatch: object) -> None: | |
| 139 | + store = _setup_named_store(tmp_path) | |
| 140 | + prepared = prepare_vllm_export( | |
| 141 | + store=store, | |
| 142 | + spec=_SPEC, | |
| 143 | + served_model_name="dlm-multi", | |
| 144 | + adapter_name=None, | |
| 145 | + adapter_path_override=None, | |
| 146 | + declared_adapter_names=("knowledge", "tone"), | |
| 147 | + ) | |
| 148 | + seen: list[list[str]] = [] | |
| 149 | + | |
| 150 | + def _fake_smoke(argv: list[str], **_: object) -> str: | |
| 151 | + seen.append(list(argv)) | |
| 152 | + return "vllm replied" | |
| 153 | + | |
| 154 | + monkeypatch.setattr("dlm.export.targets.vllm.smoke_openai_compat_server", _fake_smoke) | |
| 155 | + | |
| 156 | + result = VLLM_TARGET.smoke_test(prepared) | |
| 157 | + | |
| 158 | + assert result.attempted is True | |
| 159 | + assert result.ok is True | |
| 160 | + assert result.detail == "vllm replied" | |
| 161 | + argv = seen[0] | |
| 162 | + assert argv[:2] == ["vllm", "serve"] | |
| 163 | + assert "$SCRIPT_DIR" not in " ".join(argv) | |
| 164 | + assert _SPEC.hf_id in argv | |
| 165 | + assert f"knowledge={prepared.export_dir / 'adapters' / 'knowledge'}" in argv | |
| 166 | + assert f"tone={prepared.export_dir / 'adapters' / 'tone'}" in argv | |