Python · 18640 bytes Raw Blame History
1 """imatrix build + cache + argv (Sprint 11.6)."""
2
3 from __future__ import annotations
4
5 import json
6 from datetime import UTC, datetime
7 from pathlib import Path
8 from typing import Any
9
10 import pytest
11
12 from dlm.export.errors import SubprocessError
13 from dlm.export.imatrix import (
14 DEFAULT_CHUNK_SIZE,
15 DEFAULT_CHUNKS,
16 ImatrixArtifact,
17 _sha256_of_file,
18 build_imatrix,
19 build_imatrix_args,
20 calibration_text_from_replay,
21 resolve_imatrix,
22 )
23
24
25 def _write_vendor(tmp_path: Path) -> Path:
26 """Build a fake vendor tree so llama_imatrix_bin() resolves."""
27 vendor = tmp_path / "vendor" / "llama.cpp"
28 (vendor / "build" / "bin").mkdir(parents=True)
29 (vendor / "build" / "bin" / "llama-imatrix").write_text("# mock")
30 (vendor / "convert_hf_to_gguf.py").write_text("# mock") # required by vendoring
31 return vendor
32
33
34 # --- build_imatrix_args -------------------------------------------------------
35
36
37 class TestBuildArgs:
38 def test_includes_all_required_flags(self, tmp_path: Path) -> None:
39 vendor = _write_vendor(tmp_path)
40 argv = build_imatrix_args(
41 base_gguf=tmp_path / "base.fp16.gguf",
42 calib_path=tmp_path / "calib.txt",
43 out_path=tmp_path / "imatrix.gguf",
44 chunks=128,
45 bin_override=vendor,
46 )
47 # binary absolute path
48 assert argv[0].endswith("/bin/llama-imatrix") or argv[0].endswith("llama-imatrix")
49 assert "-m" in argv
50 assert "-f" in argv
51 assert "-o" in argv
52 assert "--chunks" in argv
53 assert "128" in argv
54
55 def test_paths_are_stringified(self, tmp_path: Path) -> None:
56 vendor = _write_vendor(tmp_path)
57 argv = build_imatrix_args(
58 base_gguf=tmp_path / "b.gguf",
59 calib_path=tmp_path / "c.txt",
60 out_path=tmp_path / "o.gguf",
61 chunks=64,
62 bin_override=vendor,
63 )
64 assert all(isinstance(a, str) for a in argv)
65
66
67 # --- build_imatrix ------------------------------------------------------------
68
69
70 class TestBuildImatrix:
71 def _fake_runner(self, *, write_output: bool = True) -> tuple[Any, list[list[str]]]:
72 """Runner that captures argv and optionally writes the output file."""
73 calls: list[list[str]] = []
74
75 def runner(argv: Any) -> None:
76 cmd = list(argv)
77 calls.append(cmd)
78 if write_output:
79 out_ix = cmd.index("-o") + 1
80 Path(cmd[out_ix]).write_bytes(b"fake imatrix bytes")
81
82 return runner, calls
83
84 def test_successful_build_writes_sidecar(self, tmp_path: Path) -> None:
85 vendor = _write_vendor(tmp_path)
86 base_gguf = tmp_path / "base.fp16.gguf"
87 base_gguf.write_bytes(b"ignored; base presence is all build_imatrix checks")
88 export_dir = tmp_path / "exports" / "Q4_K_M"
89
90 runner, calls = self._fake_runner()
91 artifact = build_imatrix(
92 base_gguf=base_gguf,
93 calibration_text="some calibration text " * 50,
94 export_dir=export_dir,
95 base_revision="r1",
96 corpus_sha256="c1",
97 chunks=32,
98 bin_override=vendor,
99 subprocess_runner=runner,
100 )
101 assert len(calls) == 1
102 assert artifact.path.is_file()
103 assert (export_dir / "imatrix.meta.json").is_file()
104 meta = json.loads((export_dir / "imatrix.meta.json").read_text())
105 assert meta["base_revision"] == "r1"
106 assert meta["corpus_sha256"] == "c1"
107 assert meta["chunks"] == 32
108 assert meta["sha256"] == artifact.sha256
109 # Calib text file removed on success.
110 assert not (export_dir / "imatrix.calib.txt").exists()
111
112 def test_subprocess_leaves_calib_on_failure(self, tmp_path: Path) -> None:
113 vendor = _write_vendor(tmp_path)
114 base_gguf = tmp_path / "base.fp16.gguf"
115 base_gguf.write_bytes(b"ok")
116 export_dir = tmp_path / "exports" / "Q4_K_M"
117
118 def runner(_argv: Any) -> None:
119 raise SubprocessError(cmd=["x"], returncode=1, stderr_tail="boom")
120
121 with pytest.raises(SubprocessError, match="boom"):
122 build_imatrix(
123 base_gguf=base_gguf,
124 calibration_text="calibration text is here " * 20,
125 export_dir=export_dir,
126 base_revision="r1",
127 corpus_sha256="c1",
128 chunks=16,
129 bin_override=vendor,
130 subprocess_runner=runner,
131 )
132 # Operator should still be able to rerun the command by hand.
133 assert (export_dir / "imatrix.calib.txt").is_file()
134
135 def test_missing_output_raises_subprocess_error(self, tmp_path: Path) -> None:
136 vendor = _write_vendor(tmp_path)
137 base_gguf = tmp_path / "base.fp16.gguf"
138 base_gguf.write_bytes(b"ok")
139 export_dir = tmp_path / "exports" / "Q4_K_M"
140
141 runner, _ = self._fake_runner(write_output=False)
142 with pytest.raises(SubprocessError, match="not produced"):
143 build_imatrix(
144 base_gguf=base_gguf,
145 calibration_text="x " * 100,
146 export_dir=export_dir,
147 base_revision="r1",
148 corpus_sha256="c1",
149 chunks=16,
150 bin_override=vendor,
151 subprocess_runner=runner,
152 )
153
154 def test_missing_base_gguf_raises(self, tmp_path: Path) -> None:
155 vendor = _write_vendor(tmp_path)
156 with pytest.raises(FileNotFoundError, match="imatrix base model missing"):
157 build_imatrix(
158 base_gguf=tmp_path / "nope.gguf",
159 calibration_text="text",
160 export_dir=tmp_path / "out",
161 base_revision="r",
162 corpus_sha256="c",
163 bin_override=vendor,
164 subprocess_runner=lambda _a: None,
165 )
166
167 @pytest.mark.parametrize(
168 ("chunks", "chunk_size"),
169 [(0, 512), (-1, 512), (256, 0), (256, -1)],
170 )
171 def test_nonpositive_params_raise(self, tmp_path: Path, chunks: int, chunk_size: int) -> None:
172 vendor = _write_vendor(tmp_path)
173 base = tmp_path / "b.gguf"
174 base.write_bytes(b"x")
175 with pytest.raises(ValueError):
176 build_imatrix(
177 base_gguf=base,
178 calibration_text="text",
179 export_dir=tmp_path / "out",
180 base_revision="r",
181 corpus_sha256="c",
182 chunks=chunks,
183 chunk_size=chunk_size,
184 bin_override=vendor,
185 subprocess_runner=lambda _a: None,
186 )
187
188 def test_empty_calibration_text_raises(self, tmp_path: Path) -> None:
189 vendor = _write_vendor(tmp_path)
190 base = tmp_path / "b.gguf"
191 base.write_bytes(b"x")
192 with pytest.raises(ValueError, match="calibration_text is empty"):
193 build_imatrix(
194 base_gguf=base,
195 calibration_text=" \n\n\t ",
196 export_dir=tmp_path / "out",
197 base_revision="r",
198 corpus_sha256="c",
199 bin_override=vendor,
200 subprocess_runner=lambda _a: None,
201 )
202
203
204 # --- resolve_imatrix ----------------------------------------------------------
205
206
207 class TestResolveImatrix:
208 def _seed(
209 self,
210 tmp_path: Path,
211 *,
212 base_revision: str = "r1",
213 corpus_sha256: str = "c1",
214 chunks: int = DEFAULT_CHUNKS,
215 chunk_size: int = DEFAULT_CHUNK_SIZE,
216 mutate_binary: bool = False,
217 ) -> Path:
218 export_dir = tmp_path / "exports" / "Q4_K_M"
219 export_dir.mkdir(parents=True)
220 binary = export_dir / "imatrix.gguf"
221 binary.write_bytes(b"fake imatrix bytes")
222 sha = _sha256_of_file(binary)
223 if mutate_binary:
224 binary.write_bytes(b"bytes have been changed")
225 meta = {
226 "path": "imatrix.gguf",
227 "sha256": sha,
228 "base_revision": base_revision,
229 "corpus_sha256": corpus_sha256,
230 "chunks": chunks,
231 "chunk_size": chunk_size,
232 "built_at": datetime.now(UTC).replace(tzinfo=None, microsecond=0).isoformat(),
233 }
234 (export_dir / "imatrix.meta.json").write_text(json.dumps(meta))
235 return export_dir
236
237 def test_matching_key_returns_artifact(self, tmp_path: Path) -> None:
238 export_dir = self._seed(tmp_path)
239 artifact = resolve_imatrix(
240 export_dir,
241 base_revision="r1",
242 corpus_sha256="c1",
243 chunks=DEFAULT_CHUNKS,
244 )
245 assert isinstance(artifact, ImatrixArtifact)
246 assert artifact.base_revision == "r1"
247 assert artifact.corpus_sha256 == "c1"
248
249 def test_missing_binary_returns_none(self, tmp_path: Path) -> None:
250 export_dir = tmp_path / "exports"
251 export_dir.mkdir()
252 assert (
253 resolve_imatrix(
254 export_dir,
255 base_revision="r",
256 corpus_sha256="c",
257 chunks=DEFAULT_CHUNKS,
258 )
259 is None
260 )
261
262 def test_mismatched_corpus_sha_returns_none(self, tmp_path: Path) -> None:
263 export_dir = self._seed(tmp_path, corpus_sha256="c1")
264 assert (
265 resolve_imatrix(
266 export_dir,
267 base_revision="r1",
268 corpus_sha256="different",
269 chunks=DEFAULT_CHUNKS,
270 )
271 is None
272 )
273
274 def test_mismatched_chunks_returns_none(self, tmp_path: Path) -> None:
275 export_dir = self._seed(tmp_path, chunks=256)
276 assert (
277 resolve_imatrix(
278 export_dir,
279 base_revision="r1",
280 corpus_sha256="c1",
281 chunks=128, # was 256
282 )
283 is None
284 )
285
286 def test_mismatched_base_revision_returns_none(self, tmp_path: Path) -> None:
287 export_dir = self._seed(tmp_path, base_revision="r1")
288 assert (
289 resolve_imatrix(
290 export_dir,
291 base_revision="r2",
292 corpus_sha256="c1",
293 chunks=DEFAULT_CHUNKS,
294 )
295 is None
296 )
297
298 def test_tampered_binary_returns_none(self, tmp_path: Path) -> None:
299 """A stale sidecar + post-mutation binary must look like a miss."""
300 export_dir = self._seed(tmp_path, mutate_binary=True)
301 assert (
302 resolve_imatrix(
303 export_dir,
304 base_revision="r1",
305 corpus_sha256="c1",
306 chunks=DEFAULT_CHUNKS,
307 )
308 is None
309 )
310
311 def test_malformed_sidecar_returns_none(self, tmp_path: Path) -> None:
312 export_dir = self._seed(tmp_path)
313 (export_dir / "imatrix.meta.json").write_text("not json")
314 assert (
315 resolve_imatrix(
316 export_dir,
317 base_revision="r1",
318 corpus_sha256="c1",
319 chunks=DEFAULT_CHUNKS,
320 )
321 is None
322 )
323
324 def test_sidecar_wrong_shape_returns_none(self, tmp_path: Path) -> None:
325 export_dir = self._seed(tmp_path)
326 (export_dir / "imatrix.meta.json").write_text(json.dumps(["not", "a", "dict"]))
327 assert (
328 resolve_imatrix(
329 export_dir,
330 base_revision="r1",
331 corpus_sha256="c1",
332 chunks=DEFAULT_CHUNKS,
333 )
334 is None
335 )
336
337 def test_non_string_sha_returns_none(self, tmp_path: Path) -> None:
338 export_dir = self._seed(tmp_path)
339 meta = json.loads((export_dir / "imatrix.meta.json").read_text())
340 meta["sha256"] = 123
341 (export_dir / "imatrix.meta.json").write_text(json.dumps(meta))
342 assert (
343 resolve_imatrix(
344 export_dir,
345 base_revision="r1",
346 corpus_sha256="c1",
347 chunks=DEFAULT_CHUNKS,
348 )
349 is None
350 )
351
352 def test_invalid_built_at_returns_none(self, tmp_path: Path) -> None:
353 export_dir = self._seed(tmp_path)
354 meta = json.loads((export_dir / "imatrix.meta.json").read_text())
355 meta["built_at"] = "not-a-datetime"
356 (export_dir / "imatrix.meta.json").write_text(json.dumps(meta))
357 assert (
358 resolve_imatrix(
359 export_dir,
360 base_revision="r1",
361 corpus_sha256="c1",
362 chunks=DEFAULT_CHUNKS,
363 )
364 is None
365 )
366
367
368 # --- calibration_text_from_replay --------------------------------------------
369
370
371 class TestCalibrationTextFromReplay:
372 def test_missing_corpus_returns_sentinel(self, tmp_path: Path) -> None:
373 text, sha = calibration_text_from_replay(
374 corpus_path=tmp_path / "nope.zst",
375 index_path=tmp_path / "idx.json",
376 )
377 assert text == ""
378 assert sha == "<no-corpus>"
379
380 def test_missing_index_tracks_corpus_sha(self, tmp_path: Path) -> None:
381 corpus = tmp_path / "corpus.zst"
382 corpus.write_bytes(b"deliberately-not-zstd bytes")
383 text, sha = calibration_text_from_replay(
384 corpus_path=corpus, index_path=tmp_path / "idx.json"
385 )
386 assert text == ""
387 # Even without an index, we record the binary sha so a later
388 # index write triggers a rebuild.
389 assert sha != "<no-corpus>"
390 assert len(sha) == 64
391
392 def test_full_round_trip(self, tmp_path: Path) -> None:
393 """Write a real replay via ReplayStore, then round-trip it."""
394 from datetime import UTC
395 from datetime import datetime as _dt
396
397 from dlm.replay.models import SectionSnapshot
398 from dlm.replay.store import ReplayStore
399
400 corpus = tmp_path / "corpus.zst"
401 idx = tmp_path / "index.json"
402 store = ReplayStore.at(corpus, idx)
403
404 snaps = [
405 SectionSnapshot(
406 section_id=f"{i:016x}",
407 section_type="prose",
408 content=f"Snapshot {i} prose content with lorem ipsum words.",
409 first_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
410 last_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
411 )
412 for i in range(5)
413 ]
414 store.append_many(snaps)
415
416 text, sha = calibration_text_from_replay(corpus_path=corpus, index_path=idx)
417 assert "Snapshot 0 prose content" in text
418 assert "Snapshot 4 prose content" in text
419 assert len(sha) == 64
420
421 def test_truncates_at_max_chars(self, tmp_path: Path) -> None:
422 from datetime import UTC
423 from datetime import datetime as _dt
424
425 from dlm.replay.models import SectionSnapshot
426 from dlm.replay.store import ReplayStore
427
428 corpus = tmp_path / "corpus.zst"
429 idx = tmp_path / "index.json"
430 store = ReplayStore.at(corpus, idx)
431 big_content = "word " * 1000 # 5000 chars each
432 snaps = [
433 SectionSnapshot(
434 section_id=f"{i:016x}",
435 section_type="prose",
436 content=big_content,
437 first_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
438 last_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
439 )
440 for i in range(10)
441 ]
442 store.append_many(snaps)
443
444 text, _sha = calibration_text_from_replay(
445 corpus_path=corpus, index_path=idx, max_chars=8_000
446 )
447 # `max_chars` is the pre-joiner content budget; the `\n\n`
448 # separator between snapshots adds a small constant overhead.
449 assert len(text) <= 8_000 + 2 * 10 # 10 possible joiners
450
451 def test_empty_and_whitespace_snapshots_are_skipped(self, tmp_path: Path) -> None:
452 from datetime import UTC
453 from datetime import datetime as _dt
454
455 from dlm.replay.models import SectionSnapshot
456 from dlm.replay.store import ReplayStore
457
458 corpus = tmp_path / "corpus.zst"
459 idx = tmp_path / "index.json"
460 store = ReplayStore.at(corpus, idx)
461 snaps = [
462 SectionSnapshot(
463 section_id="0000000000000001",
464 section_type="prose",
465 content="",
466 first_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
467 last_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
468 ),
469 SectionSnapshot(
470 section_id="0000000000000002",
471 section_type="prose",
472 content=" \n\t ",
473 first_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
474 last_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
475 ),
476 SectionSnapshot(
477 section_id="0000000000000003",
478 section_type="prose",
479 content="real calibration content",
480 first_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
481 last_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
482 ),
483 ]
484 store.append_many(snaps)
485
486 text, _sha = calibration_text_from_replay(corpus_path=corpus, index_path=idx)
487 assert text == "real calibration content"
488
489 def test_truncation_can_clip_with_zero_remaining_budget(self, tmp_path: Path) -> None:
490 from datetime import UTC
491 from datetime import datetime as _dt
492
493 from dlm.replay.models import SectionSnapshot
494 from dlm.replay.store import ReplayStore
495
496 corpus = tmp_path / "corpus.zst"
497 idx = tmp_path / "index.json"
498 store = ReplayStore.at(corpus, idx)
499 snaps = [
500 SectionSnapshot(
501 section_id="0000000000000001",
502 section_type="prose",
503 content="abcd",
504 first_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
505 last_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
506 ),
507 SectionSnapshot(
508 section_id="0000000000000002",
509 section_type="prose",
510 content="efgh",
511 first_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
512 last_seen_at=_dt(2026, 4, 19, tzinfo=UTC).replace(tzinfo=None),
513 ),
514 ]
515 store.append_many(snaps)
516
517 text, _sha = calibration_text_from_replay(corpus_path=corpus, index_path=idx, max_chars=4)
518 assert text == "abcd"