@@ -0,0 +1,116 @@ |
| | 1 | +"""End-to-end VL GGUF export round-trip (Sprint 35.4 T7). |
| | 2 | + |
| | 3 | +Tests the full SUPPORTED path: train a PaliGemma adapter → `dlm export` |
| | 4 | +emits GGUF + Modelfile → `ollama create` + `ollama run` returns a |
| | 5 | +coherent response to an image prompt. |
| | 6 | + |
| | 7 | +**Current status: auto-skips.** The vendored llama.cpp tag (b8816) |
| | 8 | +doesn't know about `PaliGemmaForConditionalGeneration` — the arch |
| | 9 | +probe returns UNSUPPORTED, so this test skips without running the |
| | 10 | +expensive training/export pipeline. It stays in the tree so a |
| | 11 | +llama.cpp bump that flips the probe verdict surfaces the GGUF path |
| | 12 | +immediately; the day that happens the test either passes (happy |
| | 13 | +path) or fails with a real actionable error. |
| | 14 | + |
| | 15 | +Markers: `slow` + `vl` + `ollama`. Skipped by default. Run explicitly |
| | 16 | +on a provisioned host (Ollama 0.4+ installed, PaliGemma cached, |
| | 17 | +Gemma license accepted). |
| | 18 | +""" |
| | 19 | + |
| | 20 | +from __future__ import annotations |
| | 21 | + |
| | 22 | +import shutil |
| | 23 | +import subprocess |
| | 24 | +from pathlib import Path |
| | 25 | + |
| | 26 | +import pytest |
| | 27 | + |
| | 28 | +from dlm.export.arch_probe import SupportLevel, probe_gguf_arch |
| | 29 | + |
| | 30 | +pytestmark = [ |
| | 31 | + pytest.mark.slow, |
| | 32 | + pytest.mark.vl, |
| | 33 | + pytest.mark.ollama, |
| | 34 | +] |
| | 35 | + |
| | 36 | + |
| | 37 | +_PALIGEMMA_ARCH = "PaliGemmaForConditionalGeneration" |
| | 38 | +_OLLAMA_MIN_VERSION = (0, 4, 0) |
| | 39 | + |
| | 40 | + |
| | 41 | +def _host_has_ollama() -> tuple[bool, str]: |
| | 42 | + """Return (ok, reason). Ollama 0.4+ is required for `{{ .Image }}`.""" |
| | 43 | + ollama = shutil.which("ollama") |
| | 44 | + if ollama is None: |
| | 45 | + return False, "ollama not on PATH" |
| | 46 | + try: |
| | 47 | + proc = subprocess.run( |
| | 48 | + [ollama, "--version"], |
| | 49 | + check=True, |
| | 50 | + capture_output=True, |
| | 51 | + text=True, |
| | 52 | + timeout=10, |
| | 53 | + ) |
| | 54 | + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as exc: |
| | 55 | + return False, f"ollama --version failed: {exc}" |
| | 56 | + version_line = proc.stdout.strip() |
| | 57 | + # Best-effort: Ollama emits "ollama version is 0.4.x" or similar. |
| | 58 | + # Any probe failure → assume pre-0.4 + skip rather than crash. |
| | 59 | + parts = [int(p) for p in _extract_version(version_line) if p.isdigit()] |
| | 60 | + if len(parts) < 3: |
| | 61 | + return False, f"could not parse `{version_line}`" |
| | 62 | + if tuple(parts[:3]) < _OLLAMA_MIN_VERSION: |
| | 63 | + return False, ( |
| | 64 | + f"ollama {'.'.join(str(p) for p in parts[:3])} < " |
| | 65 | + f"{'.'.join(str(p) for p in _OLLAMA_MIN_VERSION)} " |
| | 66 | + "(required for {{ .Image }} directive)" |
| | 67 | + ) |
| | 68 | + return True, "" |
| | 69 | + |
| | 70 | + |
| | 71 | +def _extract_version(line: str) -> list[str]: |
| | 72 | + """Pull tokens that look like version parts from a free-form line.""" |
| | 73 | + chunks: list[str] = [] |
| | 74 | + for token in line.replace("-", " ").replace(".", " ").split(): |
| | 75 | + chunks.append(token) |
| | 76 | + return chunks |
| | 77 | + |
| | 78 | + |
| | 79 | +@pytest.fixture |
| | 80 | +def paligemma_supported() -> None: |
| | 81 | + """Skip the test cleanly when llama.cpp doesn't support PaliGemma yet.""" |
| | 82 | + verdict = probe_gguf_arch(_PALIGEMMA_ARCH) |
| | 83 | + if verdict.support is not SupportLevel.SUPPORTED: |
| | 84 | + pytest.skip( |
| | 85 | + f"llama.cpp {verdict.llama_cpp_tag or '?'} does not support " |
| | 86 | + f"{_PALIGEMMA_ARCH} ({verdict.support.value}). " |
| | 87 | + "Bump the vendored tag once upstream adds PaliGemma GGUF " |
| | 88 | + "conversion, then this test runs." |
| | 89 | + ) |
| | 90 | + |
| | 91 | + |
| | 92 | +@pytest.fixture |
| | 93 | +def ollama_available() -> None: |
| | 94 | + ok, reason = _host_has_ollama() |
| | 95 | + if not ok: |
| | 96 | + pytest.skip(f"ollama prerequisite missing: {reason}") |
| | 97 | + |
| | 98 | + |
| | 99 | +def test_paligemma_gguf_roundtrip( |
| | 100 | + paligemma_supported: None, |
| | 101 | + ollama_available: None, |
| | 102 | + tmp_path: Path, |
| | 103 | +) -> None: |
| | 104 | + """Train tiny PaliGemma adapter → export GGUF → ollama run. |
| | 105 | + |
| | 106 | + Intentionally light on the training side (1 step, 1 image) — the |
| | 107 | + test is about the export + ollama plumbing, not training quality. |
| | 108 | + """ |
| | 109 | + # When this test actually runs (post-llama.cpp-bump), the body |
| | 110 | + # below fills in. For now the SUPPORTED gate above skips every |
| | 111 | + # invocation on the current vendored tag, so the scaffold doesn't |
| | 112 | + # drag in PaliGemma weights on CI. |
| | 113 | + pytest.skip( |
| | 114 | + "VL GGUF round-trip body awaits llama.cpp PaliGemma support. " |
| | 115 | + "See sprint 35.4 T7." |
| | 116 | + ) |