@@ -24,6 +24,7 @@ via `pytest -m "slow and vl"` on a provisioned host. |
| 24 | 24 | |
| 25 | 25 | from __future__ import annotations |
| 26 | 26 | |
| 27 | +import json |
| 27 | 28 | import shutil |
| 28 | 29 | import subprocess |
| 29 | 30 | from pathlib import Path |
@@ -113,6 +114,7 @@ def test_vl_gguf_roundtrip( |
| 113 | 114 | arch: str, |
| 114 | 115 | hf_id: str, |
| 115 | 116 | tmp_path: Path, |
| 117 | + run_heavy_vl: bool, |
| 116 | 118 | ) -> None: |
| 117 | 119 | """GGUF emission for a VL arch — filled body on SUPPORTED, skip otherwise. |
| 118 | 120 | |
@@ -156,23 +158,109 @@ def test_vl_gguf_roundtrip( |
| 156 | 158 | "provisioned host." |
| 157 | 159 | ) |
| 158 | 160 | |
| 159 | | - # With the cache + SUPPORTED gate both satisfied, the full |
| 160 | | - # train→merge→convert→quantize chain can land here. That chain |
| 161 | | - # writes ~4-8 GB of intermediate fp16 GGUFs and takes several |
| 162 | | - # minutes even on a provisioned host, so the assertion list stays |
| 163 | | - # tight and focused: what we actually want to pin is that the |
| 164 | | - # emitter produces a quantized GGUF + a Modelfile with `FROM |
| 165 | | - # ./base.Q4_K_M.gguf` and no ADAPTER line (merged path), plus a |
| 166 | | - # vl_gguf.json sidecar capturing the arch verdict. |
| 167 | | - # |
| 168 | | - # The body below is the skeleton; a CI environment with enough |
| 169 | | - # resources + matching tokenizer fingerprint fills it in. |
| 170 | | - # (See docs/cookbook/vl-base.md for the manual priming recipe.) |
| 171 | 161 | assert cached_base.exists(), cached_base |
| 172 | | - pytest.skip( |
| 173 | | - "VL GGUF round-trip body requires ~8 GB intermediate storage + " |
| 174 | | - "several minutes of training; run manually via " |
| 175 | | - "`pytest -m 'slow and vl' --run-heavy-vl` once that opt-in " |
| 176 | | - "flag lands. The emitter itself is covered by " |
| 177 | | - "tests/unit/export/test_vl_gguf.py." |
| 162 | + |
| 163 | + if not run_heavy_vl: |
| 164 | + pytest.skip( |
| 165 | + "VL GGUF round-trip body requires ~8 GB intermediate storage + " |
| 166 | + "several minutes of training. Opt in with " |
| 167 | + "`pytest -m 'slow and vl' --run-heavy-vl` to execute. " |
| 168 | + "The emitter itself is covered by tests/unit/export/test_vl_gguf.py." |
| 169 | + ) |
| 170 | + |
| 171 | + # Heavy body: train a 1-step LoRA → export --merged --quant Q4_K_M → |
| 172 | + # verify the GGUF + Modelfile + manifest land. Any subprocess failure |
| 173 | + # surfaces here as a test error, which is the desired signal for |
| 174 | + # "upstream flipped support on this arch and our emitter broke." |
| 175 | + from typer.testing import CliRunner |
| 176 | + |
| 177 | + from dlm.base_models import BASE_MODELS |
| 178 | + from dlm.cli.app import app |
| 179 | + from dlm.doc.parser import parse_file |
| 180 | + from dlm.export.vl_gguf import run_vl_gguf_export |
| 181 | + from dlm.store.paths import for_dlm |
| 182 | + |
| 183 | + # Find the registered base-model key whose hf_id matches the arch; |
| 184 | + # the test parametrization carries the hf_id, and `dlm init --base` |
| 185 | + # wants a registry key. |
| 186 | + base_key = next( |
| 187 | + (key for key, spec in BASE_MODELS.items() if spec.hf_id == hf_id), |
| 188 | + None, |
| 189 | + ) |
| 190 | + if base_key is None: |
| 191 | + pytest.skip(f"{hf_id} is not in the registry; add a BaseModelSpec for it") |
| 192 | + |
| 193 | + tmp_home = tmp_path / "home" |
| 194 | + doc_path = tmp_path / "doc.dlm" |
| 195 | + runner = CliRunner() |
| 196 | + |
| 197 | + # Scaffold a multimodal doc at the target base, then train one step |
| 198 | + # so an adapter version exists. We intentionally use --max-steps 1 |
| 199 | + # (cap training cost) + --i-accept-license (gated bases). |
| 200 | + init_result = runner.invoke( |
| 201 | + app, |
| 202 | + [ |
| 203 | + "--home", |
| 204 | + str(tmp_home), |
| 205 | + "init", |
| 206 | + str(doc_path), |
| 207 | + "--multimodal", |
| 208 | + "--base", |
| 209 | + base_key, |
| 210 | + "--i-accept-license", |
| 211 | + ], |
| 178 | 212 | ) |
| 213 | + assert init_result.exit_code == 0, init_result.output |
| 214 | + |
| 215 | + train_result = runner.invoke( |
| 216 | + app, |
| 217 | + [ |
| 218 | + "--home", |
| 219 | + str(tmp_home), |
| 220 | + "train", |
| 221 | + str(doc_path), |
| 222 | + "--max-steps", |
| 223 | + "1", |
| 224 | + "--seed", |
| 225 | + "42", |
| 226 | + ], |
| 227 | + ) |
| 228 | + assert train_result.exit_code == 0, train_result.output |
| 229 | + |
| 230 | + # Now drive the emitter directly — we control plan + verdict this |
| 231 | + # way and avoid routing through the CLI dispatcher's fallback on |
| 232 | + # any unrelated refusal. |
| 233 | + parsed = parse_file(doc_path) |
| 234 | + store = for_dlm(parsed.frontmatter.dlm_id, home=tmp_home) |
| 235 | + spec = BASE_MODELS[base_key] |
| 236 | + |
| 237 | + from dlm.export.plan import ExportPlan |
| 238 | + |
| 239 | + plan = ExportPlan(merged=True, imatrix="off", quant="Q4_K_M") |
| 240 | + emit_result = run_vl_gguf_export( |
| 241 | + store, |
| 242 | + spec, |
| 243 | + plan, |
| 244 | + verdict=verdict, |
| 245 | + cached_base_dir=cached_base, |
| 246 | + source_dlm_path=doc_path, |
| 247 | + dlm_version="test", |
| 248 | + ) |
| 249 | + |
| 250 | + # Contract checks — the module's docstring + Sprint 35.4 spec pin these. |
| 251 | + assert emit_result.gguf_path.exists() |
| 252 | + assert emit_result.gguf_path.stat().st_size > 0 |
| 253 | + assert emit_result.modelfile_path.exists() |
| 254 | + modelfile_body = emit_result.modelfile_path.read_text(encoding="utf-8") |
| 255 | + assert f"FROM ./{emit_result.gguf_path.name}" in modelfile_body |
| 256 | + assert "ADAPTER" not in modelfile_body # merged-only at this upstream tag |
| 257 | + assert emit_result.mmproj_path is None # single-file contract |
| 258 | + assert emit_result.quant == "Q4_K_M" |
| 259 | + assert emit_result.llama_cpp_tag == verdict.llama_cpp_tag |
| 260 | + assert emit_result.manifest_path.exists() |
| 261 | + |
| 262 | + sidecar_path = emit_result.export_dir / "vl_gguf.json" |
| 263 | + assert sidecar_path.exists() |
| 264 | + sidecar = json.loads(sidecar_path.read_text(encoding="utf-8")) |
| 265 | + assert sidecar["arch_verdict"]["support"] == "SUPPORTED" |
| 266 | + assert sidecar["arch_verdict"]["architecture"] == arch |