@@ -9,14 +9,16 @@ budget is tuned small. |
| 9 | 9 | from __future__ import annotations |
| 10 | 10 | |
| 11 | 11 | import string |
| 12 | +from pathlib import Path |
| 12 | 13 | |
| 13 | 14 | import pytest |
| 14 | | -from hypothesis import given |
| 15 | +from hypothesis import HealthCheck, given, settings |
| 15 | 16 | from hypothesis import strategies as st |
| 16 | 17 | |
| 17 | | -from dlm.export.errors import UnsafeMergeError |
| 18 | +from dlm.export.errors import PreflightError, UnsafeMergeError |
| 18 | 19 | from dlm.export.merge import check_merge_safety |
| 19 | 20 | from dlm.export.plan import ExportPlan |
| 21 | +from dlm.export.tokenizer_sync import read_gguf_vocab_size |
| 20 | 22 | from dlm.io.ulid import mint_ulid |
| 21 | 23 | from dlm.pack.integrity import rollup_sha256 |
| 22 | 24 | |
@@ -107,3 +109,39 @@ class TestMergeSafetyTruthTable: |
| 107 | 109 | check_merge_safety(plan, was_qlora=was_qlora) |
| 108 | 110 | else: |
| 109 | 111 | check_merge_safety(plan, was_qlora=was_qlora) # no raise |
| 112 | + |
| 113 | + |
| 114 | +# --- GGUF parser fuzz --------------------------------------------------------- |
| 115 | + |
| 116 | + |
| 117 | +class TestGgufParserFuzz: |
| 118 | + """Feed random bytes at `read_gguf_vocab_size`; it must surface a typed |
| 119 | + `PreflightError`, never leak `struct.error` / `MemoryError` / a raw |
| 120 | + decoded string. Audit-04 T6 / B7 defense-in-depth. |
| 121 | + |
| 122 | + Reusing `tmp_path` across hypothesis iterations is deliberate — we |
| 123 | + overwrite the file each run, and spinning up a fresh dir per-sample |
| 124 | + would dominate the test runtime. |
| 125 | + """ |
| 126 | + |
| 127 | + @settings(suppress_health_check=[HealthCheck.function_scoped_fixture], max_examples=50) |
| 128 | + @given( |
| 129 | + payload=st.binary(min_size=0, max_size=256).filter( |
| 130 | + # Skip valid headers — fuzzing valid packets isn't the point. |
| 131 | + lambda b: not b.startswith(b"GGUF") |
| 132 | + ) |
| 133 | + ) |
| 134 | + def test_random_bytes_raise_preflight_error(self, payload: bytes, tmp_path: Path) -> None: |
| 135 | + path = tmp_path / "fuzz.gguf" |
| 136 | + path.write_bytes(payload) |
| 137 | + with pytest.raises(PreflightError): |
| 138 | + read_gguf_vocab_size(path) |
| 139 | + |
| 140 | + @settings(suppress_health_check=[HealthCheck.function_scoped_fixture], max_examples=50) |
| 141 | + @given(body=st.binary(min_size=0, max_size=256)) |
| 142 | + def test_random_body_after_magic_doesnt_crash(self, body: bytes, tmp_path: Path) -> None: |
| 143 | + """Even with valid magic, garbage body → typed error, no crash.""" |
| 144 | + path = tmp_path / "fuzz.gguf" |
| 145 | + path.write_bytes(b"GGUF" + body) |
| 146 | + with pytest.raises(PreflightError): |
| 147 | + read_gguf_vocab_size(path) |