Python · 13391 bytes Raw Blame History
1 """Modality-aware export dispatcher — pure logic, no CLI I/O.
2
3 Three public entry points (`dispatch_vl_export`, `dispatch_audio_export`,
4 `emit_vl_snapshot`) wrap the existing single-modality runners
5 (`run_vl_gguf_export`, `run_vl_snapshot_export`, `run_audio_snapshot_export`)
6 with the arch-probe + fallback decision tree that used to live inline
7 in `dlm.cli.commands`. They return :class:`DispatchResult`; the CLI
8 prints `banner_lines` and handles typed errors.
9
10 Why pull this out of the CLI:
11
12 - Business logic lived alongside `console.print` + `typer.Exit`, which
13 made the dispatch decisions untestable without booting a full Typer
14 runner. Returning data (lines + paths) means unit tests can assert
15 which path was taken from the banner contents.
16 - `cli/commands.py` had direct heavy imports (`run_vl_gguf_export`,
17 `run_audio_snapshot_export`) and modality `== "vision-language"`
18 branches — both routed through the shared dispatch helpers.
19
20 All user-facing flow control is still here; the CLI is the thin
21 parse+print layer on top.
22 """
23
24 from __future__ import annotations
25
26 from dataclasses import dataclass, field
27 from pathlib import Path
28 from typing import TYPE_CHECKING, Any
29
30 from dlm.export.errors import (
31 ExportError,
32 ProcessorLoadError,
33 VendoringError,
34 VlGgufUnsupportedError,
35 )
36
37 if TYPE_CHECKING:
38 from dlm.base_models import BaseModelSpec
39 from dlm.store.paths import StorePath
40
41
42 @dataclass(frozen=True)
43 class DispatchResult:
44 """What the CLI needs to print after a successful dispatch.
45
46 `banner_lines` are pre-formatted Rich markup accumulated during
47 the dispatch (fallback announcements, final success). The CLI
48 iterates them in order. `extras` carries path-specific extras
49 (GGUF path, llama.cpp tag) under documented keys per modality.
50 """
51
52 export_dir: Path
53 manifest_path: Path
54 artifacts: list[Path]
55 banner_lines: list[str]
56 extras: dict[str, Any] = field(default_factory=dict)
57
58
59 def _load_processor_or_raise(spec: BaseModelSpec) -> Any:
60 """Load HF processor, wrap any failure in :class:`ProcessorLoadError`.
61
62 The HF snapshot exports need the processor to be loadable on the
63 recipient side — shipping an incomplete tarball is worse than a
64 crisp refusal. All flavors of import/network/license failure
65 collapse into one typed error the CLI can print in one line.
66 """
67 from dlm.train.loader import load_processor # pragma: no cover - heavy
68
69 try:
70 return load_processor(spec) # pragma: no cover - heavy
71 except Exception as exc: # noqa: BLE001 - wrapping heterogeneous HF errors
72 raise ProcessorLoadError(
73 f"could not load processor for {spec.key!r} "
74 f"({type(exc).__name__}: {exc}). "
75 "The HF-snapshot export needs the processor to be loadable — "
76 "verify license acceptance + network + cache, then re-run."
77 ) from exc
78
79
80 def emit_vl_snapshot(
81 *,
82 store: StorePath,
83 spec: BaseModelSpec,
84 adapter_name: str | None,
85 quant: str | None,
86 merged: bool,
87 adapter_mix_raw: str | None,
88 skip_gguf_flag_warning: bool = False,
89 ) -> DispatchResult:
90 """Emit the HF-snapshot VL artifact + return its banner lines.
91
92 Kept separate from the probe logic so the dispatcher can reach
93 this both on non-SUPPORTED verdicts and on a GGUF emission
94 fallback. `skip_gguf_flag_warning` is True on the fallback path —
95 the user already saw a "GGUF emission refused" banner, and
96 re-warning about --quant/--merged would be noisy.
97 """
98 from dlm.export.vl_snapshot import run_vl_snapshot_export
99
100 banner: list[str] = []
101 if not skip_gguf_flag_warning and (quant is not None or merged or adapter_mix_raw is not None):
102 banner.append(
103 "[yellow]export:[/yellow] ignoring GGUF-only flags "
104 "(--quant / --merged / --adapter-mix) — they're not applicable "
105 "to the HF-snapshot path."
106 )
107
108 processor = _load_processor_or_raise(spec)
109 result = run_vl_snapshot_export(
110 store,
111 spec,
112 adapter_name=adapter_name,
113 processor=processor,
114 )
115
116 banner.append(
117 f"[green]export:[/green] HF snapshot written to {result.export_dir}\n"
118 f" manifest: {result.manifest_path.name}\n"
119 f" adapter: {result.adapter_dir}\n"
120 f" artifacts: {len(result.artifacts)} file(s)"
121 )
122 return DispatchResult(
123 export_dir=result.export_dir,
124 manifest_path=result.manifest_path,
125 artifacts=list(result.artifacts),
126 banner_lines=banner,
127 extras={"adapter_dir": result.adapter_dir, "path": "hf-snapshot"},
128 )
129
130
131 def dispatch_vl_export(
132 *,
133 store: StorePath,
134 spec: BaseModelSpec,
135 adapter_name: str | None,
136 quant: str | None,
137 merged: bool,
138 adapter_mix_raw: str | None,
139 gguf_emission_context: dict[str, Any] | None = None,
140 ) -> DispatchResult:
141 """Route a VL spec through the GGUF or HF-snapshot export path.
142
143 Probes the vendored llama.cpp for arch coverage and picks a path:
144
145 - **SUPPORTED** + `gguf_emission_context` present → try single-file
146 GGUF emission via `run_vl_gguf_export`. On `VlGgufUnsupportedError`
147 (plan refusal), `VendoringError` (missing/unbuilt vendor), or
148 `ExportError` (subprocess failure), fall back to HF-snapshot
149 with an explanatory banner.
150 - **PARTIAL** → HF-snapshot with a banner explaining the split-arch
151 caveat (vision tower would require an mmproj sidecar upstream
152 doesn't emit at our pinned tag).
153 - **UNSUPPORTED** (or probe failure) → HF-snapshot with a banner
154 pointing the user at `scripts/bump-llama-cpp.sh`.
155
156 `gguf_emission_context` carries everything the GGUF path needs
157 (plan, cached base dir, source dlm path, sequence len, dlm
158 version). `None` forces the snapshot path.
159 """
160 from dlm.export.arch_probe import SupportLevel, probe_gguf_arch
161 from dlm.export.vl_gguf import run_vl_gguf_export
162
163 probe_banner: list[str] = []
164 try:
165 verdict = probe_gguf_arch(spec.architecture)
166 except VendoringError as exc:
167 probe_banner.append(
168 f"[yellow]export:[/yellow] llama.cpp probe unavailable ({exc}); "
169 "falling back to HF-snapshot without a GGUF verdict."
170 )
171 verdict = None
172
173 if verdict is None or verdict.support is SupportLevel.UNSUPPORTED:
174 tag_note = f"at tag={verdict.llama_cpp_tag or 'unknown'} " if verdict is not None else ""
175 probe_banner.append(
176 f"[yellow]export:[/yellow] base {spec.key!r} "
177 f"(arch={spec.architecture}) is not covered by the vendored "
178 f"llama.cpp {tag_note}— emitting HF-snapshot. Run "
179 "`scripts/bump-llama-cpp.sh` to pull a newer tag if upstream "
180 "has added support, or ship this adapter as a snapshot."
181 )
182 result = emit_vl_snapshot(
183 store=store,
184 spec=spec,
185 adapter_name=adapter_name,
186 quant=quant,
187 merged=merged,
188 adapter_mix_raw=adapter_mix_raw,
189 )
190 return _prepend_banner(probe_banner, result)
191
192 if verdict.support is SupportLevel.PARTIAL:
193 probe_banner.append(
194 f"[yellow]export:[/yellow] base {spec.key!r} has PARTIAL "
195 "llama.cpp coverage (vision tower ships as mmproj sidecar). "
196 "Emitting HF-snapshot — single-file GGUF emission for "
197 "split VL archs is gated on upstream mmproj support."
198 )
199 result = emit_vl_snapshot(
200 store=store,
201 spec=spec,
202 adapter_name=adapter_name,
203 quant=quant,
204 merged=merged,
205 adapter_mix_raw=adapter_mix_raw,
206 )
207 return _prepend_banner(probe_banner, result)
208
209 # SUPPORTED
210 if gguf_emission_context is None:
211 probe_banner.append(
212 f"[yellow]export:[/yellow] base {spec.key!r} is SUPPORTED by "
213 f"llama.cpp (tag={verdict.llama_cpp_tag or 'unknown'}), but "
214 "this dispatcher was invoked without GGUF plan context — "
215 "emitting HF-snapshot."
216 )
217 result = emit_vl_snapshot(
218 store=store,
219 spec=spec,
220 adapter_name=adapter_name,
221 quant=quant,
222 merged=merged,
223 adapter_mix_raw=adapter_mix_raw,
224 )
225 return _prepend_banner(probe_banner, result)
226
227 probe_banner.append(
228 f"[dim]export:[/dim] base {spec.key!r} is SUPPORTED by llama.cpp "
229 f"(tag={verdict.llama_cpp_tag or 'unknown'}); attempting single-file "
230 "VL GGUF emission."
231 )
232 try:
233 gguf_result = run_vl_gguf_export(
234 store,
235 spec,
236 gguf_emission_context["plan"],
237 verdict=verdict,
238 cached_base_dir=gguf_emission_context["cached_base_dir"],
239 adapter_name=adapter_name,
240 system_prompt=gguf_emission_context.get("system_prompt"),
241 source_dlm_path=gguf_emission_context.get("source_dlm_path"),
242 dlm_version=gguf_emission_context.get("dlm_version", "dev"),
243 training_sequence_len=gguf_emission_context.get("training_sequence_len"),
244 )
245 except VlGgufUnsupportedError as exc:
246 probe_banner.append(
247 f"[yellow]export:[/yellow] VL GGUF emission refused ({exc}); "
248 "falling back to HF-snapshot."
249 )
250 result = emit_vl_snapshot(
251 store=store,
252 spec=spec,
253 adapter_name=adapter_name,
254 quant=quant,
255 merged=merged,
256 adapter_mix_raw=adapter_mix_raw,
257 skip_gguf_flag_warning=True,
258 )
259 return _prepend_banner(probe_banner, result)
260 except (VendoringError, ExportError) as exc:
261 probe_banner.append(
262 f"[yellow]export:[/yellow] VL GGUF emission failed "
263 f"({type(exc).__name__}: {exc}); falling back to HF-snapshot."
264 )
265 result = emit_vl_snapshot(
266 store=store,
267 spec=spec,
268 adapter_name=adapter_name,
269 quant=quant,
270 merged=merged,
271 adapter_mix_raw=adapter_mix_raw,
272 skip_gguf_flag_warning=True,
273 )
274 return _prepend_banner(probe_banner, result)
275
276 probe_banner.append(
277 f"[green]export:[/green] VL GGUF written to {gguf_result.export_dir}\n"
278 f" manifest: {gguf_result.manifest_path.name}\n"
279 f" gguf: {gguf_result.gguf_path.name} ({gguf_result.quant})\n"
280 f" Modelfile: {gguf_result.modelfile_path.name}\n"
281 f" llama.cpp: {gguf_result.llama_cpp_tag or 'unknown'}\n"
282 f" artifacts: {len(gguf_result.artifacts)} file(s)"
283 )
284 return DispatchResult(
285 export_dir=gguf_result.export_dir,
286 manifest_path=gguf_result.manifest_path,
287 artifacts=list(gguf_result.artifacts),
288 banner_lines=probe_banner,
289 extras={
290 "path": "vl-gguf",
291 "gguf_path": gguf_result.gguf_path,
292 "modelfile_path": gguf_result.modelfile_path,
293 "quant": gguf_result.quant,
294 "llama_cpp_tag": gguf_result.llama_cpp_tag,
295 "mmproj_path": gguf_result.mmproj_path,
296 },
297 )
298
299
300 def dispatch_audio_export(
301 *,
302 store: StorePath,
303 spec: BaseModelSpec,
304 adapter_name: str | None,
305 quant: str | None,
306 merged: bool,
307 adapter_mix_raw: str | None,
308 ) -> DispatchResult:
309 """Route an audio-language spec through the HF-snapshot export path.
310
311 Parallel to :func:`dispatch_vl_export` but simpler: llama.cpp has
312 no audio-arch roadmap at our pinned tag, so there's nothing to
313 probe — always emit an HF-snapshot.
314 """
315 from dlm.export.audio_snapshot import run_audio_snapshot_export
316
317 banner: list[str] = [
318 f"[yellow]export:[/yellow] base {spec.key!r} is audio-language; "
319 "emitting HF-snapshot (GGUF not supported for audio archs)."
320 ]
321 if quant is not None or merged or adapter_mix_raw is not None:
322 banner.append(
323 "[yellow]export:[/yellow] ignoring GGUF-only flags "
324 "(--quant / --merged / --adapter-mix) — they're not applicable "
325 "to the HF-snapshot path."
326 )
327
328 processor = _load_processor_or_raise(spec)
329 result = run_audio_snapshot_export(
330 store,
331 spec,
332 adapter_name=adapter_name,
333 processor=processor,
334 )
335
336 banner.append(
337 f"[green]export:[/green] HF audio snapshot written to {result.export_dir}\n"
338 f" manifest: {result.manifest_path.name}\n"
339 f" adapter: {result.adapter_dir}\n"
340 f" artifacts: {len(result.artifacts)} file(s)"
341 )
342 return DispatchResult(
343 export_dir=result.export_dir,
344 manifest_path=result.manifest_path,
345 artifacts=list(result.artifacts),
346 banner_lines=banner,
347 extras={"path": "audio-snapshot", "adapter_dir": result.adapter_dir},
348 )
349
350
351 def _prepend_banner(lines: list[str], result: DispatchResult) -> DispatchResult:
352 """Return a new DispatchResult with `lines` prepended to banner_lines."""
353 return DispatchResult(
354 export_dir=result.export_dir,
355 manifest_path=result.manifest_path,
356 artifacts=result.artifacts,
357 banner_lines=[*lines, *result.banner_lines],
358 extras=result.extras,
359 )