| 1 |
"""`dlm.export.arch_probe` — SUPPORTED / PARTIAL / UNSUPPORTED verdicts. |
| 2 |
|
| 3 |
Covers: |
| 4 |
|
| 5 |
- Fixture `convert_hf_to_gguf.py` with a TextModel registration → SUPPORTED. |
| 6 |
- Fixture with MmprojModel-only registration → PARTIAL. |
| 7 |
- Fixture without the arch at all → UNSUPPORTED. |
| 8 |
- Live probe against the vendored tree: Qwen2-VL is SUPPORTED, PaliGemma |
| 9 |
and InternVL2 are UNSUPPORTED at the pinned tag (as of Sprint 35.4). |
| 10 |
The test pins those expectations; a llama.cpp bump that flips them |
| 11 |
fails the test, forcing an explicit docs/cookbook refresh. |
| 12 |
- Cache memoization across repeat calls. |
| 13 |
- `VendoringError` surfaces when the script doesn't exist. |
| 14 |
""" |
| 15 |
|
| 16 |
from __future__ import annotations |
| 17 |
|
| 18 |
from pathlib import Path |
| 19 |
|
| 20 |
import pytest |
| 21 |
|
| 22 |
import dlm.export.arch_probe as arch_probe |
| 23 |
from dlm.export.arch_probe import ( |
| 24 |
ArchProbeResult, |
| 25 |
SupportLevel, |
| 26 |
clear_cache, |
| 27 |
probe_gguf_arch, |
| 28 |
) |
| 29 |
from dlm.export.errors import VendoringError |
| 30 |
from dlm.export.vendoring import llama_cpp_root |
| 31 |
|
| 32 |
|
| 33 |
@pytest.fixture(autouse=True) |
| 34 |
def _isolate_cache() -> None: |
| 35 |
"""Each test gets a clean memoization table so fixtures don't bleed.""" |
| 36 |
clear_cache() |
| 37 |
yield |
| 38 |
clear_cache() |
| 39 |
|
| 40 |
|
| 41 |
def _fixture_llama_cpp(tmp_path: Path, script_body: str) -> Path: |
| 42 |
"""Write a minimal llama.cpp layout with the given convert script body.""" |
| 43 |
root = tmp_path / "llama.cpp" |
| 44 |
root.mkdir() |
| 45 |
(root / "convert_hf_to_gguf.py").write_text(script_body, encoding="utf-8") |
| 46 |
# Pinned-tag is optional — omit VERSION so the probe reports None. |
| 47 |
return root |
| 48 |
|
| 49 |
|
| 50 |
class TestUnsupportedVerdict: |
| 51 |
def test_arch_absent_from_script(self, tmp_path: Path) -> None: |
| 52 |
root = _fixture_llama_cpp( |
| 53 |
tmp_path, |
| 54 |
"# Some other model only.\n" |
| 55 |
'@ModelBase.register("LlamaForCausalLM")\n' |
| 56 |
"class LlamaModel(TextModel):\n" |
| 57 |
" pass\n", |
| 58 |
) |
| 59 |
result = probe_gguf_arch( |
| 60 |
"PaliGemmaForConditionalGeneration", |
| 61 |
llama_cpp_root=root, |
| 62 |
) |
| 63 |
assert isinstance(result, ArchProbeResult) |
| 64 |
assert result.arch_class == "PaliGemmaForConditionalGeneration" |
| 65 |
assert result.support is SupportLevel.UNSUPPORTED |
| 66 |
assert "not found" in result.reason.lower() |
| 67 |
|
| 68 |
def test_unsupported_reason_names_arch_and_tag(self, tmp_path: Path) -> None: |
| 69 |
root = _fixture_llama_cpp(tmp_path, "# empty script\n") |
| 70 |
(root / "VERSION").write_text("b4321\n", encoding="utf-8") |
| 71 |
result = probe_gguf_arch( |
| 72 |
"InternVLChatModel", |
| 73 |
llama_cpp_root=root, |
| 74 |
) |
| 75 |
assert "InternVLChatModel" in result.reason |
| 76 |
assert "b4321" in result.reason |
| 77 |
assert result.llama_cpp_tag == "b4321" |
| 78 |
|
| 79 |
|
| 80 |
class TestSupportedVerdict: |
| 81 |
def test_textmodel_binding_is_supported(self, tmp_path: Path) -> None: |
| 82 |
root = _fixture_llama_cpp( |
| 83 |
tmp_path, |
| 84 |
'@ModelBase.register("Qwen2VLModel", "Qwen2VLForConditionalGeneration")\n' |
| 85 |
"class Qwen2VLModel(TextModel):\n" |
| 86 |
" pass\n", |
| 87 |
) |
| 88 |
result = probe_gguf_arch( |
| 89 |
"Qwen2VLForConditionalGeneration", |
| 90 |
llama_cpp_root=root, |
| 91 |
) |
| 92 |
assert result.support is SupportLevel.SUPPORTED |
| 93 |
assert "Qwen2VLModel" in result.reason |
| 94 |
|
| 95 |
def test_dual_registration_prefers_textmodel(self, tmp_path: Path) -> None: |
| 96 |
"""When an arch is registered on both a TextModel and an MmprojModel, |
| 97 |
the TextModel binding wins → SUPPORTED. This mirrors the actual |
| 98 |
Qwen2-VL layout in upstream llama.cpp.""" |
| 99 |
root = _fixture_llama_cpp( |
| 100 |
tmp_path, |
| 101 |
'@ModelBase.register("Qwen2VLForConditionalGeneration")\n' |
| 102 |
"class Qwen2VLTextModel(TextModel):\n" |
| 103 |
" pass\n" |
| 104 |
"\n" |
| 105 |
'@ModelBase.register("Qwen2VLForConditionalGeneration")\n' |
| 106 |
"class Qwen2VLVisionModel(MmprojModel):\n" |
| 107 |
" pass\n", |
| 108 |
) |
| 109 |
result = probe_gguf_arch( |
| 110 |
"Qwen2VLForConditionalGeneration", |
| 111 |
llama_cpp_root=root, |
| 112 |
) |
| 113 |
assert result.support is SupportLevel.SUPPORTED |
| 114 |
|
| 115 |
|
| 116 |
class TestPartialVerdict: |
| 117 |
def test_mmproj_only_binding_is_partial(self, tmp_path: Path) -> None: |
| 118 |
root = _fixture_llama_cpp( |
| 119 |
tmp_path, |
| 120 |
'@ModelBase.register("SomeVLArch")\nclass SomeVisionTower(MmprojModel):\n pass\n', |
| 121 |
) |
| 122 |
result = probe_gguf_arch("SomeVLArch", llama_cpp_root=root) |
| 123 |
assert result.support is SupportLevel.PARTIAL |
| 124 |
assert "MmprojModel" in result.reason |
| 125 |
|
| 126 |
def test_partial_reason_names_mmproj_class(self, tmp_path: Path) -> None: |
| 127 |
root = _fixture_llama_cpp( |
| 128 |
tmp_path, |
| 129 |
'@ModelBase.register("FooForConditionalGeneration")\n' |
| 130 |
"class FooVisionTower(MmprojModel):\n" |
| 131 |
" pass\n", |
| 132 |
) |
| 133 |
result = probe_gguf_arch( |
| 134 |
"FooForConditionalGeneration", |
| 135 |
llama_cpp_root=root, |
| 136 |
) |
| 137 |
assert "FooVisionTower" in result.reason |
| 138 |
|
| 139 |
|
| 140 |
class TestGrammarEdgeCases: |
| 141 |
def test_single_quoted_arch_name(self, tmp_path: Path) -> None: |
| 142 |
"""Register decorators sometimes use single quotes; still match.""" |
| 143 |
root = _fixture_llama_cpp( |
| 144 |
tmp_path, |
| 145 |
"@ModelBase.register('FooForCausalLM')\nclass FooModel(TextModel):\n pass\n", |
| 146 |
) |
| 147 |
result = probe_gguf_arch("FooForCausalLM", llama_cpp_root=root) |
| 148 |
assert result.support is SupportLevel.SUPPORTED |
| 149 |
|
| 150 |
def test_multiline_decorator_args(self, tmp_path: Path) -> None: |
| 151 |
"""Decorators with arg lists wrapped across lines still parse.""" |
| 152 |
root = _fixture_llama_cpp( |
| 153 |
tmp_path, |
| 154 |
"@ModelBase.register(\n" |
| 155 |
' "Qwen2VLModel",\n' |
| 156 |
' "Qwen2VLForConditionalGeneration",\n' |
| 157 |
")\n" |
| 158 |
"class Qwen2VLModel(TextModel):\n" |
| 159 |
" pass\n", |
| 160 |
) |
| 161 |
result = probe_gguf_arch( |
| 162 |
"Qwen2VLForConditionalGeneration", |
| 163 |
llama_cpp_root=root, |
| 164 |
) |
| 165 |
assert result.support is SupportLevel.SUPPORTED |
| 166 |
|
| 167 |
def test_substring_match_does_not_fire(self, tmp_path: Path) -> None: |
| 168 |
"""`"Gemma3..."` should not match `"Gemma"` — use full quoted name.""" |
| 169 |
root = _fixture_llama_cpp( |
| 170 |
tmp_path, |
| 171 |
'@ModelBase.register("Gemma3ForCausalLM")\nclass Gemma3Model(TextModel):\n pass\n', |
| 172 |
) |
| 173 |
result = probe_gguf_arch("GemmaForCausalLM", llama_cpp_root=root) |
| 174 |
# "GemmaForCausalLM" (without the 3) isn't registered. |
| 175 |
assert result.support is SupportLevel.UNSUPPORTED |
| 176 |
|
| 177 |
def test_decorator_without_following_class_is_ignored(self, tmp_path: Path) -> None: |
| 178 |
root = _fixture_llama_cpp( |
| 179 |
tmp_path, |
| 180 |
'@ModelBase.register("FooForCausalLM")\n# no class follows\n', |
| 181 |
) |
| 182 |
result = probe_gguf_arch("FooForCausalLM", llama_cpp_root=root) |
| 183 |
assert result.support is SupportLevel.UNSUPPORTED |
| 184 |
|
| 185 |
def test_unextractable_class_name_is_ignored(self, monkeypatch: pytest.MonkeyPatch) -> None: |
| 186 |
text = '@ModelBase.register("FooForCausalLM")\nclass FooModel(TextModel):\n pass\n' |
| 187 |
monkeypatch.setattr(arch_probe, "_extract_class_name", lambda _text, _start: None) |
| 188 |
assert arch_probe._find_arch_bindings(text, "FooForCausalLM") == [] |
| 189 |
|
| 190 |
def test_extract_class_name_returns_none_without_open_paren(self) -> None: |
| 191 |
assert arch_probe._extract_class_name("class FooModel:\n pass\n", 0) is None |
| 192 |
|
| 193 |
|
| 194 |
class TestMemoization: |
| 195 |
def test_repeat_calls_hit_cache(self, tmp_path: Path) -> None: |
| 196 |
"""The second call must not re-read the script — proven by |
| 197 |
swapping the file contents and confirming the cached verdict |
| 198 |
persists.""" |
| 199 |
root = _fixture_llama_cpp( |
| 200 |
tmp_path, |
| 201 |
'@ModelBase.register("Arch1")\nclass Arch1Model(TextModel):\n pass\n', |
| 202 |
) |
| 203 |
(root / "VERSION").write_text("tag-v1\n", encoding="utf-8") |
| 204 |
first = probe_gguf_arch("Arch1", llama_cpp_root=root) |
| 205 |
assert first.support is SupportLevel.SUPPORTED |
| 206 |
|
| 207 |
# Rewrite the script so a re-read would flip the verdict to |
| 208 |
# UNSUPPORTED — the cache must defeat this. |
| 209 |
(root / "convert_hf_to_gguf.py").write_text("# No registrations now.\n", encoding="utf-8") |
| 210 |
second = probe_gguf_arch("Arch1", llama_cpp_root=root) |
| 211 |
assert second is first |
| 212 |
|
| 213 |
def test_tag_bump_invalidates_cache(self, tmp_path: Path) -> None: |
| 214 |
"""Changing the VERSION file (a llama.cpp bump) produces a |
| 215 |
distinct cache key, so the probe re-reads and may return a |
| 216 |
different verdict.""" |
| 217 |
root = _fixture_llama_cpp( |
| 218 |
tmp_path, |
| 219 |
"# No registrations.\n", |
| 220 |
) |
| 221 |
(root / "VERSION").write_text("tag-v1\n", encoding="utf-8") |
| 222 |
first = probe_gguf_arch("Arch1", llama_cpp_root=root) |
| 223 |
assert first.support is SupportLevel.UNSUPPORTED |
| 224 |
|
| 225 |
# Bump the tag AND add the registration. |
| 226 |
(root / "VERSION").write_text("tag-v2\n", encoding="utf-8") |
| 227 |
(root / "convert_hf_to_gguf.py").write_text( |
| 228 |
'@ModelBase.register("Arch1")\nclass Arch1Model(TextModel):\n pass\n', |
| 229 |
encoding="utf-8", |
| 230 |
) |
| 231 |
second = probe_gguf_arch("Arch1", llama_cpp_root=root) |
| 232 |
assert second.support is SupportLevel.SUPPORTED |
| 233 |
assert second.llama_cpp_tag == "tag-v2" |
| 234 |
|
| 235 |
|
| 236 |
class TestMissingScript: |
| 237 |
def test_missing_convert_script_raises(self, tmp_path: Path) -> None: |
| 238 |
"""Directory exists + has other files but not convert_hf_to_gguf.py. |
| 239 |
|
| 240 |
(An empty directory hits the earlier "source tree is empty" guard |
| 241 |
in `llama_cpp_root`; we want the convert-script-missing path here.) |
| 242 |
""" |
| 243 |
root = tmp_path / "partial-llama.cpp" |
| 244 |
root.mkdir() |
| 245 |
(root / "README.md").write_text("", encoding="utf-8") |
| 246 |
with pytest.raises(VendoringError, match="convert_hf_to_gguf.py"): |
| 247 |
probe_gguf_arch("AnyArch", llama_cpp_root=root) |
| 248 |
|
| 249 |
|
| 250 |
# --- Live-tree assertions ----------------------------------------------- |
| 251 |
# These test the actual pinned vendored llama.cpp — flag vendor bumps |
| 252 |
# that change support levels for the three registered VL bases. |
| 253 |
|
| 254 |
|
| 255 |
class TestLiveVendoredTree: |
| 256 |
"""Verdicts against the current vendored llama.cpp. |
| 257 |
|
| 258 |
A llama.cpp bump that changes these expectations is a meaningful |
| 259 |
event — it flips users from the HF-snapshot fallback to the GGUF |
| 260 |
path (or vice versa). Failing here forces the cookbook + vl-memory |
| 261 |
docs to be refreshed in the same commit. |
| 262 |
""" |
| 263 |
|
| 264 |
def test_paligemma_unsupported(self) -> None: |
| 265 |
_require_live_vendored_tree() |
| 266 |
result = probe_gguf_arch("PaliGemmaForConditionalGeneration") |
| 267 |
assert result.support is SupportLevel.UNSUPPORTED |
| 268 |
|
| 269 |
def test_qwen2vl_supported(self) -> None: |
| 270 |
_require_live_vendored_tree() |
| 271 |
result = probe_gguf_arch("Qwen2VLForConditionalGeneration") |
| 272 |
assert result.support is SupportLevel.SUPPORTED |
| 273 |
|
| 274 |
def test_internvl2_unsupported(self) -> None: |
| 275 |
_require_live_vendored_tree() |
| 276 |
result = probe_gguf_arch("InternVLChatModel") |
| 277 |
assert result.support is SupportLevel.UNSUPPORTED |
| 278 |
|
| 279 |
|
| 280 |
def _require_live_vendored_tree() -> None: |
| 281 |
try: |
| 282 |
llama_cpp_root() |
| 283 |
except VendoringError as exc: |
| 284 |
pytest.skip(f"live vendored llama.cpp tree unavailable: {exc}") |