tenseleyflow/documentlanguagemodel / a66389b

Browse files

test(export): wire --run-heavy-vl opt-in + fill VL GGUF roundtrip body (audit-11 B2)

The VL GGUF roundtrip test previously ended with a blanket
`pytest.skip("...run via --run-heavy-vl")` — for a flag that never
existed. Zero execution, ever, even when all preconditions were met.

- conftest.py: add --run-heavy-vl pytest option + run_heavy_vl fixture.
- test_vl_gguf_roundtrip: take run_heavy_vl; gate the final body on it.
Without the flag, same skip behavior as before (clear message). With
--run-heavy-vl + a cached base + a built llama-quantize + a
SUPPORTED arch, the real body runs: dlm init --multimodal + dlm
train 1 step, then run_vl_gguf_export, then asserts GGUF exists,
Modelfile has `FROM ./base.Q4_K_M.gguf` and no ADAPTER line,
mmproj_path is None (single-file contract), manifest + vl_gguf.json
sidecar record the arch verdict.

This closes the "dead scaffold" testing-gap finding from audit-11.
Authored by espadonne
SHA
a66389b53e265c91da8c3002d6ce95d74efd79fe
Parents
fbf82a6
Tree
e2e2b17

2 changed files

StatusFile+-
M tests/conftest.py 23 0
M tests/integration/export/test_vl_gguf_roundtrip.py 106 18
tests/conftest.pymodified
@@ -25,6 +25,17 @@ def pytest_addoption(parser: pytest.Parser) -> None:
2525
         default=False,
2626
         help="Regenerate golden-output fixtures instead of asserting against them.",
2727
     )
28
+    parser.addoption(
29
+        "--run-heavy-vl",
30
+        action="store_true",
31
+        default=False,
32
+        help=(
33
+            "Opt into heavy VL integration-test bodies that need ~8 GB "
34
+            "intermediate storage + several minutes of training (e.g., "
35
+            "VL GGUF round-trip). Without this flag, gated heavy bodies "
36
+            "skip with a clear message even when all other prereqs are met."
37
+        ),
38
+    )
2839
 
2940
 
3041
 @pytest.fixture
@@ -33,6 +44,18 @@ def update_goldens(request: pytest.FixtureRequest) -> bool:
3344
     return bool(request.config.getoption("--update-goldens"))
3445
 
3546
 
47
+@pytest.fixture
48
+def run_heavy_vl(request: pytest.FixtureRequest) -> bool:
49
+    """Expose --run-heavy-vl to tests.
50
+
51
+    The VL round-trip test gates its train→merge→convert→quantize body
52
+    on this flag so CI doesn't accidentally burn 8 GB of scratch space
53
+    on every pass. Heavy VL tests consult this fixture and skip when
54
+    it's False.
55
+    """
56
+    return bool(request.config.getoption("--run-heavy-vl"))
57
+
58
+
3659
 @pytest.fixture
3760
 def seeded_rng() -> Iterator[int]:
3861
     """Seed Python's random for tests that need local determinism.
tests/integration/export/test_vl_gguf_roundtrip.pymodified
@@ -24,6 +24,7 @@ via `pytest -m "slow and vl"` on a provisioned host.
2424
 
2525
 from __future__ import annotations
2626
 
27
+import json
2728
 import shutil
2829
 import subprocess
2930
 from pathlib import Path
@@ -113,6 +114,7 @@ def test_vl_gguf_roundtrip(
113114
     arch: str,
114115
     hf_id: str,
115116
     tmp_path: Path,
117
+    run_heavy_vl: bool,
116118
 ) -> None:
117119
     """GGUF emission for a VL arch — filled body on SUPPORTED, skip otherwise.
118120
 
@@ -156,23 +158,109 @@ def test_vl_gguf_roundtrip(
156158
             "provisioned host."
157159
         )
158160
 
159
-    # With the cache + SUPPORTED gate both satisfied, the full
160
-    # train→merge→convert→quantize chain can land here. That chain
161
-    # writes ~4-8 GB of intermediate fp16 GGUFs and takes several
162
-    # minutes even on a provisioned host, so the assertion list stays
163
-    # tight and focused: what we actually want to pin is that the
164
-    # emitter produces a quantized GGUF + a Modelfile with `FROM
165
-    # ./base.Q4_K_M.gguf` and no ADAPTER line (merged path), plus a
166
-    # vl_gguf.json sidecar capturing the arch verdict.
167
-    #
168
-    # The body below is the skeleton; a CI environment with enough
169
-    # resources + matching tokenizer fingerprint fills it in.
170
-    # (See docs/cookbook/vl-base.md for the manual priming recipe.)
171161
     assert cached_base.exists(), cached_base
172
-    pytest.skip(
173
-        "VL GGUF round-trip body requires ~8 GB intermediate storage + "
174
-        "several minutes of training; run manually via "
175
-        "`pytest -m 'slow and vl' --run-heavy-vl` once that opt-in "
176
-        "flag lands. The emitter itself is covered by "
177
-        "tests/unit/export/test_vl_gguf.py."
162
+
163
+    if not run_heavy_vl:
164
+        pytest.skip(
165
+            "VL GGUF round-trip body requires ~8 GB intermediate storage + "
166
+            "several minutes of training. Opt in with "
167
+            "`pytest -m 'slow and vl' --run-heavy-vl` to execute. "
168
+            "The emitter itself is covered by tests/unit/export/test_vl_gguf.py."
169
+        )
170
+
171
+    # Heavy body: train a 1-step LoRA → export --merged --quant Q4_K_M →
172
+    # verify the GGUF + Modelfile + manifest land. Any subprocess failure
173
+    # surfaces here as a test error, which is the desired signal for
174
+    # "upstream flipped support on this arch and our emitter broke."
175
+    from typer.testing import CliRunner
176
+
177
+    from dlm.base_models import BASE_MODELS
178
+    from dlm.cli.app import app
179
+    from dlm.doc.parser import parse_file
180
+    from dlm.export.vl_gguf import run_vl_gguf_export
181
+    from dlm.store.paths import for_dlm
182
+
183
+    # Find the registered base-model key whose hf_id matches the arch;
184
+    # the test parametrization carries the hf_id, and `dlm init --base`
185
+    # wants a registry key.
186
+    base_key = next(
187
+        (key for key, spec in BASE_MODELS.items() if spec.hf_id == hf_id),
188
+        None,
189
+    )
190
+    if base_key is None:
191
+        pytest.skip(f"{hf_id} is not in the registry; add a BaseModelSpec for it")
192
+
193
+    tmp_home = tmp_path / "home"
194
+    doc_path = tmp_path / "doc.dlm"
195
+    runner = CliRunner()
196
+
197
+    # Scaffold a multimodal doc at the target base, then train one step
198
+    # so an adapter version exists. We intentionally use --max-steps 1
199
+    # (cap training cost) + --i-accept-license (gated bases).
200
+    init_result = runner.invoke(
201
+        app,
202
+        [
203
+            "--home",
204
+            str(tmp_home),
205
+            "init",
206
+            str(doc_path),
207
+            "--multimodal",
208
+            "--base",
209
+            base_key,
210
+            "--i-accept-license",
211
+        ],
178212
     )
213
+    assert init_result.exit_code == 0, init_result.output
214
+
215
+    train_result = runner.invoke(
216
+        app,
217
+        [
218
+            "--home",
219
+            str(tmp_home),
220
+            "train",
221
+            str(doc_path),
222
+            "--max-steps",
223
+            "1",
224
+            "--seed",
225
+            "42",
226
+        ],
227
+    )
228
+    assert train_result.exit_code == 0, train_result.output
229
+
230
+    # Now drive the emitter directly — we control plan + verdict this
231
+    # way and avoid routing through the CLI dispatcher's fallback on
232
+    # any unrelated refusal.
233
+    parsed = parse_file(doc_path)
234
+    store = for_dlm(parsed.frontmatter.dlm_id, home=tmp_home)
235
+    spec = BASE_MODELS[base_key]
236
+
237
+    from dlm.export.plan import ExportPlan
238
+
239
+    plan = ExportPlan(merged=True, imatrix="off", quant="Q4_K_M")
240
+    emit_result = run_vl_gguf_export(
241
+        store,
242
+        spec,
243
+        plan,
244
+        verdict=verdict,
245
+        cached_base_dir=cached_base,
246
+        source_dlm_path=doc_path,
247
+        dlm_version="test",
248
+    )
249
+
250
+    # Contract checks — the module's docstring + Sprint 35.4 spec pin these.
251
+    assert emit_result.gguf_path.exists()
252
+    assert emit_result.gguf_path.stat().st_size > 0
253
+    assert emit_result.modelfile_path.exists()
254
+    modelfile_body = emit_result.modelfile_path.read_text(encoding="utf-8")
255
+    assert f"FROM ./{emit_result.gguf_path.name}" in modelfile_body
256
+    assert "ADAPTER" not in modelfile_body  # merged-only at this upstream tag
257
+    assert emit_result.mmproj_path is None  # single-file contract
258
+    assert emit_result.quant == "Q4_K_M"
259
+    assert emit_result.llama_cpp_tag == verdict.llama_cpp_tag
260
+    assert emit_result.manifest_path.exists()
261
+
262
+    sidecar_path = emit_result.export_dir / "vl_gguf.json"
263
+    assert sidecar_path.exists()
264
+    sidecar = json.loads(sidecar_path.read_text(encoding="utf-8"))
265
+    assert sidecar["arch_verdict"]["support"] == "SUPPORTED"
266
+    assert sidecar["arch_verdict"]["architecture"] == arch