Python · 2590 bytes Raw Blame History
1 """Convert an HF base model to fp16 GGUF, then quantize.
2
3 Two-step pipeline invoked per `(spec, quant)` combination:
4
5 1. `convert_hf_to_gguf.py <cached_base> --outfile <fp16_gguf> --outtype f16`
6 2. `llama-quantize <fp16_gguf> <quant_gguf> <QUANT>`
7
8 Caching: if `base.<quant>.gguf` already exists in the target dir and
9 its sha matches the manifest's recorded hash, skip both steps. This is
10 the "re-running export with unchanged adapter + quant is a no-op"
11 DoD item.
12
13 This module exposes two functions — `build_convert_args` and
14 `build_quantize_args` — that return the `subprocess.run` argv lists.
15 The real runner (`runner.py`) consumes them, adds timeout + stderr
16 capture, and retries. Keeping command assembly as a pure function
17 makes the snapshot test trivial (audit F09: the `--help` diff gate).
18 """
19
20 from __future__ import annotations
21
22 import sys
23 from pathlib import Path
24
25 from dlm.export import vendoring
26 from dlm.export.plan import QuantLevel
27
28
29 def build_convert_hf_args(
30 cached_base_dir: Path,
31 *,
32 out_fp16: Path,
33 outtype: str = "f16",
34 script_override: Path | None = None,
35 python_exe: str | None = None,
36 ) -> list[str]:
37 """Assemble the `python convert_hf_to_gguf.py ...` argv.
38
39 Pure string-manipulation; no subprocess, no HF, no FS side effects
40 beyond `vendoring.convert_hf_to_gguf_py` resolving its path.
41 Test target: snapshot against the pinned upstream CLI surface.
42 """
43 script = vendoring.convert_hf_to_gguf_py(script_override)
44 return [
45 python_exe or sys.executable,
46 str(script),
47 str(cached_base_dir),
48 "--outfile",
49 str(out_fp16),
50 "--outtype",
51 outtype,
52 ]
53
54
55 def build_quantize_args(
56 fp16_gguf: Path,
57 *,
58 out_quant: Path,
59 quant: QuantLevel,
60 bin_override: Path | None = None,
61 imatrix_path: Path | None = None,
62 ) -> list[str]:
63 """Assemble the `llama-quantize <in> <out> <QUANT>` argv.
64
65 Note: `llama-quantize` takes the quant string as a POSITIONAL
66 argument (no `--quant` flag upstream). Pass it verbatim.
67
68 `imatrix_path` threads the importance-matrix flag in as
69 `--imatrix <path>` before the positional arguments. The upstream
70 tool ignores imatrix on non-k-quant levels (`Q8_0`, `F16`) so we
71 don't branch here — callers decide whether to pass a path at all.
72 """
73 binary = vendoring.llama_quantize_bin(bin_override)
74 argv: list[str] = [str(binary)]
75 if imatrix_path is not None:
76 argv.extend(["--imatrix", str(imatrix_path)])
77 argv.extend([str(fp16_gguf), str(out_quant), quant])
78 return argv