documentlanguagemodel Public

Watch 0 Fork 0 Star 0

Python · 2590 bytes Raw Blame History

  
        1
        """Convert an HF base model to fp16 GGUF, then quantize.
      
        2
        
        3
        Two-step pipeline invoked per `(spec, quant)` combination:
      
        4
        
        5
        1. `convert_hf_to_gguf.py <cached_base> --outfile <fp16_gguf> --outtype f16`
      
        6
        2. `llama-quantize <fp16_gguf> <quant_gguf> <QUANT>`
      
        7
        
        8
        Caching: if `base.<quant>.gguf` already exists in the target dir and
      
        9
        its sha matches the manifest's recorded hash, skip both steps. This is
      
        10
        the "re-running export with unchanged adapter + quant is a no-op"
      
        11
        DoD item.
      
        12
        
        13
        This module exposes two functions — `build_convert_args` and
      
        14
        `build_quantize_args` — that return the `subprocess.run` argv lists.
      
        15
        The real runner (`runner.py`) consumes them, adds timeout + stderr
      
        16
        capture, and retries. Keeping command assembly as a pure function
      
        17
        makes the snapshot test trivial (audit F09: the `--help` diff gate).
      
        18
        """
      
        19
        
        20
        from __future__ import annotations
      
        21
        
        22
        import sys
      
        23
        from pathlib import Path
      
        24
        
        25
        from dlm.export import vendoring
      
        26
        from dlm.export.plan import QuantLevel
      
        27
        
        28
        
        29
        def build_convert_hf_args(
      
        30
            cached_base_dir: Path,
      
        31
            *,
      
        32
            out_fp16: Path,
      
        33
            outtype: str = "f16",
      
        34
            script_override: Path | None = None,
      
        35
            python_exe: str | None = None,
      
        36
        ) -> list[str]:
      
        37
            """Assemble the `python convert_hf_to_gguf.py ...` argv.
      
        38
        
        39
            Pure string-manipulation; no subprocess, no HF, no FS side effects
      
        40
            beyond `vendoring.convert_hf_to_gguf_py` resolving its path.
      
        41
            Test target: snapshot against the pinned upstream CLI surface.
      
        42
            """
      
        43
            script = vendoring.convert_hf_to_gguf_py(script_override)
      
        44
            return [
      
        45
                python_exe or sys.executable,
      
        46
                str(script),
      
        47
                str(cached_base_dir),
      
        48
                "--outfile",
      
        49
                str(out_fp16),
      
        50
                "--outtype",
      
        51
                outtype,
      
        52
            ]
      
        53
        
        54
        
        55
        def build_quantize_args(
      
        56
            fp16_gguf: Path,
      
        57
            *,
      
        58
            out_quant: Path,
      
        59
            quant: QuantLevel,
      
        60
            bin_override: Path | None = None,
      
        61
            imatrix_path: Path | None = None,
      
        62
        ) -> list[str]:
      
        63
            """Assemble the `llama-quantize <in> <out> <QUANT>` argv.
      
        64
        
        65
            Note: `llama-quantize` takes the quant string as a POSITIONAL
      
        66
            argument (no `--quant` flag upstream). Pass it verbatim.
      
        67
        
        68
            `imatrix_path` threads the importance-matrix flag in as
      
        69
            `--imatrix <path>` before the positional arguments. The upstream
      
        70
            tool ignores imatrix on non-k-quant levels (`Q8_0`, `F16`) so we
      
        71
            don't branch here — callers decide whether to pass a path at all.
      
        72
            """
      
        73
            binary = vendoring.llama_quantize_bin(bin_override)
      
        74
            argv: list[str] = [str(binary)]
      
        75
            if imatrix_path is not None:
      
        76
                argv.extend(["--imatrix", str(imatrix_path)])
      
        77
            argv.extend([str(fp16_gguf), str(out_quant), quant])
      
        78
            return argv

1	"""Convert an HF base model to fp16 GGUF, then quantize.
2
3	Two-step pipeline invoked per `(spec, quant)` combination:
4
5	1. `convert_hf_to_gguf.py <cached_base> --outfile <fp16_gguf> --outtype f16`
6	2. `llama-quantize <fp16_gguf> <quant_gguf> <QUANT>`
7
8	Caching: if `base.<quant>.gguf` already exists in the target dir and
9	its sha matches the manifest's recorded hash, skip both steps. This is
10	the "re-running export with unchanged adapter + quant is a no-op"
11	DoD item.
12
13	This module exposes two functions — `build_convert_args` and
14	`build_quantize_args` — that return the `subprocess.run` argv lists.
15	The real runner (`runner.py`) consumes them, adds timeout + stderr
16	capture, and retries. Keeping command assembly as a pure function
17	makes the snapshot test trivial (audit F09: the `--help` diff gate).
18	"""
19
20	from __future__ import annotations
21
22	import sys
23	from pathlib import Path
24
25	from dlm.export import vendoring
26	from dlm.export.plan import QuantLevel
27
28
29	def build_convert_hf_args(
30	cached_base_dir: Path,
31	*,
32	out_fp16: Path,
33	outtype: str = "f16",
34	script_override: Path \| None = None,
35	python_exe: str \| None = None,
36	) -> list[str]:
37	"""Assemble the `python convert_hf_to_gguf.py ...` argv.
38
39	Pure string-manipulation; no subprocess, no HF, no FS side effects
40	beyond `vendoring.convert_hf_to_gguf_py` resolving its path.
41	Test target: snapshot against the pinned upstream CLI surface.
42	"""
43	script = vendoring.convert_hf_to_gguf_py(script_override)
44	return [
45	python_exe or sys.executable,
46	str(script),
47	str(cached_base_dir),
48	"--outfile",
49	str(out_fp16),
50	"--outtype",
51	outtype,
52	]
53
54
55	def build_quantize_args(
56	fp16_gguf: Path,
57	*,
58	out_quant: Path,
59	quant: QuantLevel,
60	bin_override: Path \| None = None,
61	imatrix_path: Path \| None = None,
62	) -> list[str]:
63	"""Assemble the `llama-quantize <in> <out> <QUANT>` argv.
64
65	Note: `llama-quantize` takes the quant string as a POSITIONAL
66	argument (no `--quant` flag upstream). Pass it verbatim.
67
68	`imatrix_path` threads the importance-matrix flag in as
69	`--imatrix <path>` before the positional arguments. The upstream
70	tool ignores imatrix on non-k-quant levels (`Q8_0`, `F16`) so we
71	don't branch here — callers decide whether to pass a path at all.
72	"""
73	binary = vendoring.llama_quantize_bin(bin_override)
74	argv: list[str] = [str(binary)]
75	if imatrix_path is not None:
76	argv.extend(["--imatrix", str(imatrix_path)])
77	argv.extend([str(fp16_gguf), str(out_quant), quant])
78	return argv