@@ -0,0 +1,116 @@ |
| 1 | +"""End-to-end VL GGUF export round-trip (Sprint 35.4 T7). |
| 2 | + |
| 3 | +Tests the full SUPPORTED path: train a PaliGemma adapter → `dlm export` |
| 4 | +emits GGUF + Modelfile → `ollama create` + `ollama run` returns a |
| 5 | +coherent response to an image prompt. |
| 6 | + |
| 7 | +**Current status: auto-skips.** The vendored llama.cpp tag (b8816) |
| 8 | +doesn't know about `PaliGemmaForConditionalGeneration` — the arch |
| 9 | +probe returns UNSUPPORTED, so this test skips without running the |
| 10 | +expensive training/export pipeline. It stays in the tree so a |
| 11 | +llama.cpp bump that flips the probe verdict surfaces the GGUF path |
| 12 | +immediately; the day that happens the test either passes (happy |
| 13 | +path) or fails with a real actionable error. |
| 14 | + |
| 15 | +Markers: `slow` + `vl` + `ollama`. Skipped by default. Run explicitly |
| 16 | +on a provisioned host (Ollama 0.4+ installed, PaliGemma cached, |
| 17 | +Gemma license accepted). |
| 18 | +""" |
| 19 | + |
| 20 | +from __future__ import annotations |
| 21 | + |
| 22 | +import shutil |
| 23 | +import subprocess |
| 24 | +from pathlib import Path |
| 25 | + |
| 26 | +import pytest |
| 27 | + |
| 28 | +from dlm.export.arch_probe import SupportLevel, probe_gguf_arch |
| 29 | + |
| 30 | +pytestmark = [ |
| 31 | + pytest.mark.slow, |
| 32 | + pytest.mark.vl, |
| 33 | + pytest.mark.ollama, |
| 34 | +] |
| 35 | + |
| 36 | + |
| 37 | +_PALIGEMMA_ARCH = "PaliGemmaForConditionalGeneration" |
| 38 | +_OLLAMA_MIN_VERSION = (0, 4, 0) |
| 39 | + |
| 40 | + |
| 41 | +def _host_has_ollama() -> tuple[bool, str]: |
| 42 | + """Return (ok, reason). Ollama 0.4+ is required for `{{ .Image }}`.""" |
| 43 | + ollama = shutil.which("ollama") |
| 44 | + if ollama is None: |
| 45 | + return False, "ollama not on PATH" |
| 46 | + try: |
| 47 | + proc = subprocess.run( |
| 48 | + [ollama, "--version"], |
| 49 | + check=True, |
| 50 | + capture_output=True, |
| 51 | + text=True, |
| 52 | + timeout=10, |
| 53 | + ) |
| 54 | + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as exc: |
| 55 | + return False, f"ollama --version failed: {exc}" |
| 56 | + version_line = proc.stdout.strip() |
| 57 | + # Best-effort: Ollama emits "ollama version is 0.4.x" or similar. |
| 58 | + # Any probe failure → assume pre-0.4 + skip rather than crash. |
| 59 | + parts = [int(p) for p in _extract_version(version_line) if p.isdigit()] |
| 60 | + if len(parts) < 3: |
| 61 | + return False, f"could not parse `{version_line}`" |
| 62 | + if tuple(parts[:3]) < _OLLAMA_MIN_VERSION: |
| 63 | + return False, ( |
| 64 | + f"ollama {'.'.join(str(p) for p in parts[:3])} < " |
| 65 | + f"{'.'.join(str(p) for p in _OLLAMA_MIN_VERSION)} " |
| 66 | + "(required for {{ .Image }} directive)" |
| 67 | + ) |
| 68 | + return True, "" |
| 69 | + |
| 70 | + |
| 71 | +def _extract_version(line: str) -> list[str]: |
| 72 | + """Pull tokens that look like version parts from a free-form line.""" |
| 73 | + chunks: list[str] = [] |
| 74 | + for token in line.replace("-", " ").replace(".", " ").split(): |
| 75 | + chunks.append(token) |
| 76 | + return chunks |
| 77 | + |
| 78 | + |
| 79 | +@pytest.fixture |
| 80 | +def paligemma_supported() -> None: |
| 81 | + """Skip the test cleanly when llama.cpp doesn't support PaliGemma yet.""" |
| 82 | + verdict = probe_gguf_arch(_PALIGEMMA_ARCH) |
| 83 | + if verdict.support is not SupportLevel.SUPPORTED: |
| 84 | + pytest.skip( |
| 85 | + f"llama.cpp {verdict.llama_cpp_tag or '?'} does not support " |
| 86 | + f"{_PALIGEMMA_ARCH} ({verdict.support.value}). " |
| 87 | + "Bump the vendored tag once upstream adds PaliGemma GGUF " |
| 88 | + "conversion, then this test runs." |
| 89 | + ) |
| 90 | + |
| 91 | + |
| 92 | +@pytest.fixture |
| 93 | +def ollama_available() -> None: |
| 94 | + ok, reason = _host_has_ollama() |
| 95 | + if not ok: |
| 96 | + pytest.skip(f"ollama prerequisite missing: {reason}") |
| 97 | + |
| 98 | + |
| 99 | +def test_paligemma_gguf_roundtrip( |
| 100 | + paligemma_supported: None, |
| 101 | + ollama_available: None, |
| 102 | + tmp_path: Path, |
| 103 | +) -> None: |
| 104 | + """Train tiny PaliGemma adapter → export GGUF → ollama run. |
| 105 | + |
| 106 | + Intentionally light on the training side (1 step, 1 image) — the |
| 107 | + test is about the export + ollama plumbing, not training quality. |
| 108 | + """ |
| 109 | + # When this test actually runs (post-llama.cpp-bump), the body |
| 110 | + # below fills in. For now the SUPPORTED gate above skips every |
| 111 | + # invocation on the current vendored tag, so the scaffold doesn't |
| 112 | + # drag in PaliGemma weights on CI. |
| 113 | + pytest.skip( |
| 114 | + "VL GGUF round-trip body awaits llama.cpp PaliGemma support. " |
| 115 | + "See sprint 35.4 T7." |
| 116 | + ) |