Python · 7653 bytes Raw Blame History
1 """Audio-base registry + preprocessor plan + audio probe (Sprint 35.2).
2
3 Mirrors `test_vl_registry.py` for the audio-language modality. Covers:
4
5 - `qwen2-audio-7b-instruct` is present and has `modality="audio-language"`.
6 - Its `AudioPreprocessorPlan` is pinned (16 kHz, 30 s, `<|AUDIO|>`, 750).
7 - License is Apache-2.0 and the current HF row is no longer gated, so
8 the spec stays redistributable.
9 - `modality="audio-language"` without a plan rejects at validate time;
10 text bases cannot carry an audio plan; VL bases cannot carry an audio
11 plan; audio bases cannot carry a VL plan.
12 - `run_all` on an audio spec skips the llama.cpp-converter probes (no
13 audio-arch support on any llama.cpp roadmap).
14 """
15
16 from __future__ import annotations
17
18 import pytest
19 from pydantic import ValidationError
20
21 from dlm.base_models import BASE_MODELS
22 from dlm.base_models.probes import run_all
23 from dlm.base_models.schema import (
24 AudioPreprocessorPlan,
25 BaseModelSpec,
26 VlPreprocessorPlan,
27 )
28
29
30 class TestQwen2AudioRegistryEntry:
31 def test_entry_present(self) -> None:
32 assert "qwen2-audio-7b-instruct" in BASE_MODELS
33
34 def test_modality_is_audio(self) -> None:
35 spec = BASE_MODELS["qwen2-audio-7b-instruct"]
36 assert spec.modality == "audio-language"
37
38 def test_preprocessor_plan_pinned(self) -> None:
39 spec = BASE_MODELS["qwen2-audio-7b-instruct"]
40 plan = spec.audio_preprocessor_plan
41 assert plan is not None
42 assert plan.sample_rate == 16_000
43 assert plan.max_length_seconds == 30.0
44 assert plan.audio_token == "<|AUDIO|>"
45 assert plan.num_audio_tokens == 750
46
47 def test_no_vl_plan(self) -> None:
48 spec = BASE_MODELS["qwen2-audio-7b-instruct"]
49 assert spec.vl_preprocessor_plan is None
50
51 def test_license_open_and_redistributable(self) -> None:
52 spec = BASE_MODELS["qwen2-audio-7b-instruct"]
53 assert spec.requires_acceptance is False
54 assert spec.redistributable is True
55
56 def test_architecture_is_audio_conditional_generation(self) -> None:
57 spec = BASE_MODELS["qwen2-audio-7b-instruct"]
58 assert spec.architecture == "Qwen2AudioForConditionalGeneration"
59
60 def test_template_is_qwen2_audio(self) -> None:
61 spec = BASE_MODELS["qwen2-audio-7b-instruct"]
62 assert spec.template == "qwen2-audio"
63
64
65 class TestAudioPreprocessorPlan:
66 def test_rejects_non_positive_sample_rate(self) -> None:
67 with pytest.raises(ValidationError):
68 AudioPreprocessorPlan(
69 sample_rate=0,
70 max_length_seconds=30.0,
71 audio_token="<|AUDIO|>",
72 num_audio_tokens=750,
73 )
74
75 def test_rejects_non_positive_max_length(self) -> None:
76 with pytest.raises(ValidationError):
77 AudioPreprocessorPlan(
78 sample_rate=16_000,
79 max_length_seconds=0.0,
80 audio_token="<|AUDIO|>",
81 num_audio_tokens=750,
82 )
83
84 def test_rejects_empty_audio_token(self) -> None:
85 with pytest.raises(ValidationError):
86 AudioPreprocessorPlan(
87 sample_rate=16_000,
88 max_length_seconds=30.0,
89 audio_token="",
90 num_audio_tokens=750,
91 )
92
93 def test_rejects_non_positive_num_audio_tokens(self) -> None:
94 with pytest.raises(ValidationError):
95 AudioPreprocessorPlan(
96 sample_rate=16_000,
97 max_length_seconds=30.0,
98 audio_token="<|AUDIO|>",
99 num_audio_tokens=0,
100 )
101
102 def test_frozen(self) -> None:
103 plan = AudioPreprocessorPlan(
104 sample_rate=16_000,
105 max_length_seconds=30.0,
106 audio_token="<|AUDIO|>",
107 num_audio_tokens=750,
108 )
109 with pytest.raises(ValidationError):
110 plan.num_audio_tokens = 1500 # type: ignore[misc]
111
112
113 class TestSpecModalityInvariants:
114 def _base_kwargs(self) -> dict[str, object]:
115 return {
116 "key": "test-entry",
117 "hf_id": "test/entry",
118 "revision": "a" * 40,
119 "architecture": "LlamaForCausalLM",
120 "params": 1_000_000,
121 "target_modules": ["q_proj"],
122 "template": "chatml",
123 "gguf_arch": "llama",
124 "tokenizer_pre": "llama-bpe",
125 "license_spdx": "Apache-2.0",
126 "redistributable": True,
127 "size_gb_fp16": 0.5,
128 "context_length": 4096,
129 "recommended_seq_len": 1024,
130 }
131
132 def _audio_plan(self) -> AudioPreprocessorPlan:
133 return AudioPreprocessorPlan(
134 sample_rate=16_000,
135 max_length_seconds=30.0,
136 audio_token="<|AUDIO|>",
137 num_audio_tokens=750,
138 )
139
140 def _vl_plan(self) -> VlPreprocessorPlan:
141 return VlPreprocessorPlan(
142 target_size=(224, 224),
143 image_token="<image>",
144 num_image_tokens=256,
145 )
146
147 def test_audio_without_plan_rejected(self) -> None:
148 with pytest.raises(ValidationError, match="requires an audio_preprocessor_plan"):
149 BaseModelSpec(**self._base_kwargs(), modality="audio-language") # type: ignore[arg-type]
150
151 def test_text_with_audio_plan_rejected(self) -> None:
152 with pytest.raises(ValidationError, match="only valid with"):
153 BaseModelSpec( # type: ignore[arg-type]
154 **self._base_kwargs(),
155 modality="text",
156 audio_preprocessor_plan=self._audio_plan(),
157 )
158
159 def test_vl_with_audio_plan_rejected(self) -> None:
160 with pytest.raises(ValidationError, match="audio_preprocessor_plan is invalid"):
161 BaseModelSpec( # type: ignore[arg-type]
162 **self._base_kwargs(),
163 modality="vision-language",
164 vl_preprocessor_plan=self._vl_plan(),
165 audio_preprocessor_plan=self._audio_plan(),
166 )
167
168 def test_audio_with_vl_plan_rejected(self) -> None:
169 with pytest.raises(ValidationError, match="vl_preprocessor_plan is invalid"):
170 BaseModelSpec( # type: ignore[arg-type]
171 **self._base_kwargs(),
172 modality="audio-language",
173 audio_preprocessor_plan=self._audio_plan(),
174 vl_preprocessor_plan=self._vl_plan(),
175 )
176
177
178 class TestRunAllSkipsExportProbesForAudio:
179 """`run_all` on an audio spec drops the llama.cpp-converter probes.
180
181 Audio architectures aren't on any llama.cpp roadmap; GGUF export
182 refuses cleanly and emits an HF snapshot instead. The dispatcher
183 quietly omits the export probes to keep the report focused.
184 """
185
186 def test_audio_spec_yields_two_probes(self) -> None:
187 spec = BASE_MODELS["qwen2-audio-7b-instruct"]
188 report = run_all(spec)
189 probe_names = {r.name for r in report.results}
190 assert "gguf_arch" not in probe_names
191 assert "pretokenizer_label" not in probe_names
192 assert "pretokenizer_hash" not in probe_names
193 # audio_token is the audio-specific probe; it may skip if
194 # transformers/processor isn't cached locally.
195 assert "audio_token" in probe_names
196 assert "architecture" in probe_names
197 # Chat-template probe does not apply to audio bases.
198 assert "chat_template" not in probe_names
199
200 def test_audio_spec_skips_vl_probe(self) -> None:
201 spec = BASE_MODELS["qwen2-audio-7b-instruct"]
202 report = run_all(spec)
203 probe_names = {r.name for r in report.results}
204 assert "vl_image_token" not in probe_names