documentlanguagemodel Public

Watch 0 Fork 0 Star 0

Python · 7653 bytes Raw Blame History

  
        1
        """Audio-base registry + preprocessor plan + audio probe (Sprint 35.2).
      
        2
        
        3
        Mirrors `test_vl_registry.py` for the audio-language modality. Covers:
      
        4
        
        5
        - `qwen2-audio-7b-instruct` is present and has `modality="audio-language"`.
      
        6
        - Its `AudioPreprocessorPlan` is pinned (16 kHz, 30 s, `<|AUDIO|>`, 750).
      
        7
        - License is Apache-2.0 and the current HF row is no longer gated, so
      
        8
          the spec stays redistributable.
      
        9
        - `modality="audio-language"` without a plan rejects at validate time;
      
        10
          text bases cannot carry an audio plan; VL bases cannot carry an audio
      
        11
          plan; audio bases cannot carry a VL plan.
      
        12
        - `run_all` on an audio spec skips the llama.cpp-converter probes (no
      
        13
          audio-arch support on any llama.cpp roadmap).
      
        14
        """
      
        15
        
        16
        from __future__ import annotations
      
        17
        
        18
        import pytest
      
        19
        from pydantic import ValidationError
      
        20
        
        21
        from dlm.base_models import BASE_MODELS
      
        22
        from dlm.base_models.probes import run_all
      
        23
        from dlm.base_models.schema import (
      
        24
            AudioPreprocessorPlan,
      
        25
            BaseModelSpec,
      
        26
            VlPreprocessorPlan,
      
        27
        )
      
        28
        
        29
        
        30
        class TestQwen2AudioRegistryEntry:
      
        31
            def test_entry_present(self) -> None:
      
        32
                assert "qwen2-audio-7b-instruct" in BASE_MODELS
      
        33
        
        34
            def test_modality_is_audio(self) -> None:
      
        35
                spec = BASE_MODELS["qwen2-audio-7b-instruct"]
      
        36
                assert spec.modality == "audio-language"
      
        37
        
        38
            def test_preprocessor_plan_pinned(self) -> None:
      
        39
                spec = BASE_MODELS["qwen2-audio-7b-instruct"]
      
        40
                plan = spec.audio_preprocessor_plan
      
        41
                assert plan is not None
      
        42
                assert plan.sample_rate == 16_000
      
        43
                assert plan.max_length_seconds == 30.0
      
        44
                assert plan.audio_token == "<|AUDIO|>"
      
        45
                assert plan.num_audio_tokens == 750
      
        46
        
        47
            def test_no_vl_plan(self) -> None:
      
        48
                spec = BASE_MODELS["qwen2-audio-7b-instruct"]
      
        49
                assert spec.vl_preprocessor_plan is None
      
        50
        
        51
            def test_license_open_and_redistributable(self) -> None:
      
        52
                spec = BASE_MODELS["qwen2-audio-7b-instruct"]
      
        53
                assert spec.requires_acceptance is False
      
        54
                assert spec.redistributable is True
      
        55
        
        56
            def test_architecture_is_audio_conditional_generation(self) -> None:
      
        57
                spec = BASE_MODELS["qwen2-audio-7b-instruct"]
      
        58
                assert spec.architecture == "Qwen2AudioForConditionalGeneration"
      
        59
        
        60
            def test_template_is_qwen2_audio(self) -> None:
      
        61
                spec = BASE_MODELS["qwen2-audio-7b-instruct"]
      
        62
                assert spec.template == "qwen2-audio"
      
        63
        
        64
        
        65
        class TestAudioPreprocessorPlan:
      
        66
            def test_rejects_non_positive_sample_rate(self) -> None:
      
        67
                with pytest.raises(ValidationError):
      
        68
                    AudioPreprocessorPlan(
      
        69
                        sample_rate=0,
      
        70
                        max_length_seconds=30.0,
      
        71
                        audio_token="<|AUDIO|>",
      
        72
                        num_audio_tokens=750,
      
        73
                    )
      
        74
        
        75
            def test_rejects_non_positive_max_length(self) -> None:
      
        76
                with pytest.raises(ValidationError):
      
        77
                    AudioPreprocessorPlan(
      
        78
                        sample_rate=16_000,
      
        79
                        max_length_seconds=0.0,
      
        80
                        audio_token="<|AUDIO|>",
      
        81
                        num_audio_tokens=750,
      
        82
                    )
      
        83
        
        84
            def test_rejects_empty_audio_token(self) -> None:
      
        85
                with pytest.raises(ValidationError):
      
        86
                    AudioPreprocessorPlan(
      
        87
                        sample_rate=16_000,
      
        88
                        max_length_seconds=30.0,
      
        89
                        audio_token="",
      
        90
                        num_audio_tokens=750,
      
        91
                    )
      
        92
        
        93
            def test_rejects_non_positive_num_audio_tokens(self) -> None:
      
        94
                with pytest.raises(ValidationError):
      
        95
                    AudioPreprocessorPlan(
      
        96
                        sample_rate=16_000,
      
        97
                        max_length_seconds=30.0,
      
        98
                        audio_token="<|AUDIO|>",
      
        99
                        num_audio_tokens=0,
      
        100
                    )
      
        101
        
        102
            def test_frozen(self) -> None:
      
        103
                plan = AudioPreprocessorPlan(
      
        104
                    sample_rate=16_000,
      
        105
                    max_length_seconds=30.0,
      
        106
                    audio_token="<|AUDIO|>",
      
        107
                    num_audio_tokens=750,
      
        108
                )
      
        109
                with pytest.raises(ValidationError):
      
        110
                    plan.num_audio_tokens = 1500  # type: ignore[misc]
      
        111
        
        112
        
        113
        class TestSpecModalityInvariants:
      
        114
            def _base_kwargs(self) -> dict[str, object]:
      
        115
                return {
      
        116
                    "key": "test-entry",
      
        117
                    "hf_id": "test/entry",
      
        118
                    "revision": "a" * 40,
      
        119
                    "architecture": "LlamaForCausalLM",
      
        120
                    "params": 1_000_000,
      
        121
                    "target_modules": ["q_proj"],
      
        122
                    "template": "chatml",
      
        123
                    "gguf_arch": "llama",
      
        124
                    "tokenizer_pre": "llama-bpe",
      
        125
                    "license_spdx": "Apache-2.0",
      
        126
                    "redistributable": True,
      
        127
                    "size_gb_fp16": 0.5,
      
        128
                    "context_length": 4096,
      
        129
                    "recommended_seq_len": 1024,
      
        130
                }
      
        131
        
        132
            def _audio_plan(self) -> AudioPreprocessorPlan:
      
        133
                return AudioPreprocessorPlan(
      
        134
                    sample_rate=16_000,
      
        135
                    max_length_seconds=30.0,
      
        136
                    audio_token="<|AUDIO|>",
      
        137
                    num_audio_tokens=750,
      
        138
                )
      
        139
        
        140
            def _vl_plan(self) -> VlPreprocessorPlan:
      
        141
                return VlPreprocessorPlan(
      
        142
                    target_size=(224, 224),
      
        143
                    image_token="<image>",
      
        144
                    num_image_tokens=256,
      
        145
                )
      
        146
        
        147
            def test_audio_without_plan_rejected(self) -> None:
      
        148
                with pytest.raises(ValidationError, match="requires an audio_preprocessor_plan"):
      
        149
                    BaseModelSpec(**self._base_kwargs(), modality="audio-language")  # type: ignore[arg-type]
      
        150
        
        151
            def test_text_with_audio_plan_rejected(self) -> None:
      
        152
                with pytest.raises(ValidationError, match="only valid with"):
      
        153
                    BaseModelSpec(  # type: ignore[arg-type]
      
        154
                        **self._base_kwargs(),
      
        155
                        modality="text",
      
        156
                        audio_preprocessor_plan=self._audio_plan(),
      
        157
                    )
      
        158
        
        159
            def test_vl_with_audio_plan_rejected(self) -> None:
      
        160
                with pytest.raises(ValidationError, match="audio_preprocessor_plan is invalid"):
      
        161
                    BaseModelSpec(  # type: ignore[arg-type]
      
        162
                        **self._base_kwargs(),
      
        163
                        modality="vision-language",
      
        164
                        vl_preprocessor_plan=self._vl_plan(),
      
        165
                        audio_preprocessor_plan=self._audio_plan(),
      
        166
                    )
      
        167
        
        168
            def test_audio_with_vl_plan_rejected(self) -> None:
      
        169
                with pytest.raises(ValidationError, match="vl_preprocessor_plan is invalid"):
      
        170
                    BaseModelSpec(  # type: ignore[arg-type]
      
        171
                        **self._base_kwargs(),
      
        172
                        modality="audio-language",
      
        173
                        audio_preprocessor_plan=self._audio_plan(),
      
        174
                        vl_preprocessor_plan=self._vl_plan(),
      
        175
                    )
      
        176
        
        177
        
        178
        class TestRunAllSkipsExportProbesForAudio:
      
        179
            """`run_all` on an audio spec drops the llama.cpp-converter probes.
      
        180
        
        181
            Audio architectures aren't on any llama.cpp roadmap; GGUF export
      
        182
            refuses cleanly and emits an HF snapshot instead. The dispatcher
      
        183
            quietly omits the export probes to keep the report focused.
      
        184
            """
      
        185
        
        186
            def test_audio_spec_yields_two_probes(self) -> None:
      
        187
                spec = BASE_MODELS["qwen2-audio-7b-instruct"]
      
        188
                report = run_all(spec)
      
        189
                probe_names = {r.name for r in report.results}
      
        190
                assert "gguf_arch" not in probe_names
      
        191
                assert "pretokenizer_label" not in probe_names
      
        192
                assert "pretokenizer_hash" not in probe_names
      
        193
                # audio_token is the audio-specific probe; it may skip if
      
        194
                # transformers/processor isn't cached locally.
      
        195
                assert "audio_token" in probe_names
      
        196
                assert "architecture" in probe_names
      
        197
                # Chat-template probe does not apply to audio bases.
      
        198
                assert "chat_template" not in probe_names
      
        199
        
        200
            def test_audio_spec_skips_vl_probe(self) -> None:
      
        201
                spec = BASE_MODELS["qwen2-audio-7b-instruct"]
      
        202
                report = run_all(spec)
      
        203
                probe_names = {r.name for r in report.results}
      
        204
                assert "vl_image_token" not in probe_names

1	"""Audio-base registry + preprocessor plan + audio probe (Sprint 35.2).
2
3	Mirrors `test_vl_registry.py` for the audio-language modality. Covers:
4
5	- `qwen2-audio-7b-instruct` is present and has `modality="audio-language"`.
6	- Its `AudioPreprocessorPlan` is pinned (16 kHz, 30 s, `<\|AUDIO\|>`, 750).
7	- License is Apache-2.0 and the current HF row is no longer gated, so
8	the spec stays redistributable.
9	- `modality="audio-language"` without a plan rejects at validate time;
10	text bases cannot carry an audio plan; VL bases cannot carry an audio
11	plan; audio bases cannot carry a VL plan.
12	- `run_all` on an audio spec skips the llama.cpp-converter probes (no
13	audio-arch support on any llama.cpp roadmap).
14	"""
15
16	from __future__ import annotations
17
18	import pytest
19	from pydantic import ValidationError
20
21	from dlm.base_models import BASE_MODELS
22	from dlm.base_models.probes import run_all
23	from dlm.base_models.schema import (
24	AudioPreprocessorPlan,
25	BaseModelSpec,
26	VlPreprocessorPlan,
27	)
28
29
30	class TestQwen2AudioRegistryEntry:
31	def test_entry_present(self) -> None:
32	assert "qwen2-audio-7b-instruct" in BASE_MODELS
33
34	def test_modality_is_audio(self) -> None:
35	spec = BASE_MODELS["qwen2-audio-7b-instruct"]
36	assert spec.modality == "audio-language"
37
38	def test_preprocessor_plan_pinned(self) -> None:
39	spec = BASE_MODELS["qwen2-audio-7b-instruct"]
40	plan = spec.audio_preprocessor_plan
41	assert plan is not None
42	assert plan.sample_rate == 16_000
43	assert plan.max_length_seconds == 30.0
44	assert plan.audio_token == "<\|AUDIO\|>"
45	assert plan.num_audio_tokens == 750
46
47	def test_no_vl_plan(self) -> None:
48	spec = BASE_MODELS["qwen2-audio-7b-instruct"]
49	assert spec.vl_preprocessor_plan is None
50
51	def test_license_open_and_redistributable(self) -> None:
52	spec = BASE_MODELS["qwen2-audio-7b-instruct"]
53	assert spec.requires_acceptance is False
54	assert spec.redistributable is True
55
56	def test_architecture_is_audio_conditional_generation(self) -> None:
57	spec = BASE_MODELS["qwen2-audio-7b-instruct"]
58	assert spec.architecture == "Qwen2AudioForConditionalGeneration"
59
60	def test_template_is_qwen2_audio(self) -> None:
61	spec = BASE_MODELS["qwen2-audio-7b-instruct"]
62	assert spec.template == "qwen2-audio"
63
64
65	class TestAudioPreprocessorPlan:
66	def test_rejects_non_positive_sample_rate(self) -> None:
67	with pytest.raises(ValidationError):
68	AudioPreprocessorPlan(
69	sample_rate=0,
70	max_length_seconds=30.0,
71	audio_token="<\|AUDIO\|>",
72	num_audio_tokens=750,
73	)
74
75	def test_rejects_non_positive_max_length(self) -> None:
76	with pytest.raises(ValidationError):
77	AudioPreprocessorPlan(
78	sample_rate=16_000,
79	max_length_seconds=0.0,
80	audio_token="<\|AUDIO\|>",
81	num_audio_tokens=750,
82	)
83
84	def test_rejects_empty_audio_token(self) -> None:
85	with pytest.raises(ValidationError):
86	AudioPreprocessorPlan(
87	sample_rate=16_000,
88	max_length_seconds=30.0,
89	audio_token="",
90	num_audio_tokens=750,
91	)
92
93	def test_rejects_non_positive_num_audio_tokens(self) -> None:
94	with pytest.raises(ValidationError):
95	AudioPreprocessorPlan(
96	sample_rate=16_000,
97	max_length_seconds=30.0,
98	audio_token="<\|AUDIO\|>",
99	num_audio_tokens=0,
100	)
101
102	def test_frozen(self) -> None:
103	plan = AudioPreprocessorPlan(
104	sample_rate=16_000,
105	max_length_seconds=30.0,
106	audio_token="<\|AUDIO\|>",
107	num_audio_tokens=750,
108	)
109	with pytest.raises(ValidationError):
110	plan.num_audio_tokens = 1500 # type: ignore[misc]
111
112
113	class TestSpecModalityInvariants:
114	def _base_kwargs(self) -> dict[str, object]:
115	return {
116	"key": "test-entry",
117	"hf_id": "test/entry",
118	"revision": "a" * 40,
119	"architecture": "LlamaForCausalLM",
120	"params": 1_000_000,
121	"target_modules": ["q_proj"],
122	"template": "chatml",
123	"gguf_arch": "llama",
124	"tokenizer_pre": "llama-bpe",
125	"license_spdx": "Apache-2.0",
126	"redistributable": True,
127	"size_gb_fp16": 0.5,
128	"context_length": 4096,
129	"recommended_seq_len": 1024,
130	}
131
132	def _audio_plan(self) -> AudioPreprocessorPlan:
133	return AudioPreprocessorPlan(
134	sample_rate=16_000,
135	max_length_seconds=30.0,
136	audio_token="<\|AUDIO\|>",
137	num_audio_tokens=750,
138	)
139
140	def _vl_plan(self) -> VlPreprocessorPlan:
141	return VlPreprocessorPlan(
142	target_size=(224, 224),
143	image_token="<image>",
144	num_image_tokens=256,
145	)
146
147	def test_audio_without_plan_rejected(self) -> None:
148	with pytest.raises(ValidationError, match="requires an audio_preprocessor_plan"):
149	BaseModelSpec(**self._base_kwargs(), modality="audio-language") # type: ignore[arg-type]
150
151	def test_text_with_audio_plan_rejected(self) -> None:
152	with pytest.raises(ValidationError, match="only valid with"):
153	BaseModelSpec( # type: ignore[arg-type]
154	**self._base_kwargs(),
155	modality="text",
156	audio_preprocessor_plan=self._audio_plan(),
157	)
158
159	def test_vl_with_audio_plan_rejected(self) -> None:
160	with pytest.raises(ValidationError, match="audio_preprocessor_plan is invalid"):
161	BaseModelSpec( # type: ignore[arg-type]
162	**self._base_kwargs(),
163	modality="vision-language",
164	vl_preprocessor_plan=self._vl_plan(),
165	audio_preprocessor_plan=self._audio_plan(),
166	)
167
168	def test_audio_with_vl_plan_rejected(self) -> None:
169	with pytest.raises(ValidationError, match="vl_preprocessor_plan is invalid"):
170	BaseModelSpec( # type: ignore[arg-type]
171	**self._base_kwargs(),
172	modality="audio-language",
173	audio_preprocessor_plan=self._audio_plan(),
174	vl_preprocessor_plan=self._vl_plan(),
175	)
176
177
178	class TestRunAllSkipsExportProbesForAudio:
179	"""`run_all` on an audio spec drops the llama.cpp-converter probes.
180
181	Audio architectures aren't on any llama.cpp roadmap; GGUF export
182	refuses cleanly and emits an HF snapshot instead. The dispatcher
183	quietly omits the export probes to keep the report focused.
184	"""
185
186	def test_audio_spec_yields_two_probes(self) -> None:
187	spec = BASE_MODELS["qwen2-audio-7b-instruct"]
188	report = run_all(spec)
189	probe_names = {r.name for r in report.results}
190	assert "gguf_arch" not in probe_names
191	assert "pretokenizer_label" not in probe_names
192	assert "pretokenizer_hash" not in probe_names
193	# audio_token is the audio-specific probe; it may skip if
194	# transformers/processor isn't cached locally.
195	assert "audio_token" in probe_names
196	assert "architecture" in probe_names
197	# Chat-template probe does not apply to audio bases.
198	assert "chat_template" not in probe_names
199
200	def test_audio_spec_skips_vl_probe(self) -> None:
201	spec = BASE_MODELS["qwen2-audio-7b-instruct"]
202	report = run_all(spec)
203	probe_names = {r.name for r in report.results}
204	assert "vl_image_token" not in probe_names