| 1 |
"""Curated launch registry of supported base models. |
| 2 |
|
| 3 |
Every entry pins an exact HuggingFace commit SHA. Refreshed by |
| 4 |
`scripts/refresh-registry.py`; weekly CI opens a PR on drift. |
| 5 |
|
| 6 |
Notes on individual entries: |
| 7 |
|
| 8 |
- `qwen2.5-3b` ships under the Qwen Research License (free for entities |
| 9 |
with <100M MAU). We record it as `license_spdx="Other"` and surface |
| 10 |
the URL via `license_url`; it remains `redistributable=True` because |
| 11 |
the license permits bundling + redistribution with attribution. |
| 12 |
**Caveat:** the boolean `redistributable` field does not express the |
| 13 |
MAU threshold or attribution requirement. A |
| 14 |
`redistributable_conditions: str | None` field on `BaseModelSpec` |
| 15 |
plus a pack-time attestation checkbox would encode this properly — |
| 16 |
deferred follow-up work. Until then, users at the scale threshold |
| 17 |
must consult the license text themselves. |
| 18 |
- Llama-3.2 models are gated on HuggingFace. Llama-3.3 8B currently |
| 19 |
needs a mirror-backed fetch path because Meta exposes it through the |
| 20 |
Llama API but not a first-party HF repo. DLM still keeps the same |
| 21 |
acceptance + non-redistribution policy surface for the whole Llama |
| 22 |
family (`requires_acceptance=True`, `redistributable=False`) — |
| 23 |
enforced by the pack gate and share-protocol refusal. |
| 24 |
- SmolLM2 / SmolLM3 and Phi-3.5-mini are permissive (Apache-2.0 / MIT). |
| 25 |
- `size_gb_fp16` is approximate; the hardware doctor uses it to seed |
| 26 |
VRAM estimates, which then get refined by runtime checks. |
| 27 |
""" |
| 28 |
|
| 29 |
from __future__ import annotations |
| 30 |
|
| 31 |
from typing import Final |
| 32 |
|
| 33 |
from dlm.base_models.schema import AudioPreprocessorPlan, BaseModelSpec, VlPreprocessorPlan |
| 34 |
|
| 35 |
_ENTRIES: tuple[BaseModelSpec, ...] = ( |
| 36 |
BaseModelSpec( |
| 37 |
key="qwen2.5-0.5b", |
| 38 |
hf_id="Qwen/Qwen2.5-0.5B-Instruct", |
| 39 |
revision="7ae557604adf67be50417f59c2c2f167def9a775", |
| 40 |
architecture="Qwen2ForCausalLM", |
| 41 |
params=500_000_000, |
| 42 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 43 |
template="chatml", |
| 44 |
gguf_arch="qwen2", |
| 45 |
tokenizer_pre="qwen2", |
| 46 |
license_spdx="Apache-2.0", |
| 47 |
license_url="https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct/blob/main/LICENSE", |
| 48 |
requires_acceptance=False, |
| 49 |
redistributable=True, |
| 50 |
size_gb_fp16=1.0, |
| 51 |
context_length=32_768, |
| 52 |
recommended_seq_len=2048, |
| 53 |
), |
| 54 |
BaseModelSpec( |
| 55 |
key="qwen2.5-1.5b", |
| 56 |
hf_id="Qwen/Qwen2.5-1.5B-Instruct", |
| 57 |
revision="989aa7980e4cf806f80c7fef2b1adb7bc71aa306", |
| 58 |
architecture="Qwen2ForCausalLM", |
| 59 |
params=1_500_000_000, |
| 60 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 61 |
template="chatml", |
| 62 |
gguf_arch="qwen2", |
| 63 |
tokenizer_pre="qwen2", |
| 64 |
license_spdx="Apache-2.0", |
| 65 |
license_url="https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/blob/main/LICENSE", |
| 66 |
requires_acceptance=False, |
| 67 |
redistributable=True, |
| 68 |
size_gb_fp16=3.1, |
| 69 |
context_length=32_768, |
| 70 |
recommended_seq_len=2048, |
| 71 |
), |
| 72 |
BaseModelSpec( |
| 73 |
key="qwen2.5-3b", |
| 74 |
hf_id="Qwen/Qwen2.5-3B-Instruct", |
| 75 |
revision="aa8e72537993ba99e69dfaafa59ed015b17504d1", |
| 76 |
architecture="Qwen2ForCausalLM", |
| 77 |
params=3_000_000_000, |
| 78 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 79 |
template="chatml", |
| 80 |
gguf_arch="qwen2", |
| 81 |
tokenizer_pre="qwen2", |
| 82 |
license_spdx="Other", |
| 83 |
license_url="https://huggingface.co/Qwen/Qwen2.5-3B-Instruct/blob/main/LICENSE", |
| 84 |
requires_acceptance=False, |
| 85 |
redistributable=True, |
| 86 |
size_gb_fp16=6.2, |
| 87 |
context_length=32_768, |
| 88 |
recommended_seq_len=2048, |
| 89 |
), |
| 90 |
BaseModelSpec( |
| 91 |
key="qwen2.5-coder-1.5b", |
| 92 |
hf_id="Qwen/Qwen2.5-Coder-1.5B-Instruct", |
| 93 |
revision="2e1fd397ee46e1388853d2af2c993145b0f1098a", |
| 94 |
architecture="Qwen2ForCausalLM", |
| 95 |
params=1_500_000_000, |
| 96 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 97 |
template="chatml", |
| 98 |
gguf_arch="qwen2", |
| 99 |
tokenizer_pre="qwen2", |
| 100 |
license_spdx="Apache-2.0", |
| 101 |
license_url="https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct/blob/main/LICENSE", |
| 102 |
requires_acceptance=False, |
| 103 |
redistributable=True, |
| 104 |
size_gb_fp16=3.1, |
| 105 |
context_length=32_768, |
| 106 |
recommended_seq_len=2048, |
| 107 |
), |
| 108 |
BaseModelSpec( |
| 109 |
key="qwen3-1.7b", |
| 110 |
hf_id="Qwen/Qwen3-1.7B", |
| 111 |
revision="70d244cc86ccca08cf5af4e1e306ecf908b1ad5e", |
| 112 |
architecture="Qwen3ForCausalLM", |
| 113 |
params=1_700_000_000, |
| 114 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 115 |
template="chatml", |
| 116 |
gguf_arch="qwen3", |
| 117 |
tokenizer_pre="qwen2", |
| 118 |
license_spdx="Apache-2.0", |
| 119 |
license_url="https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE", |
| 120 |
requires_acceptance=False, |
| 121 |
redistributable=True, |
| 122 |
size_gb_fp16=3.4, |
| 123 |
context_length=32_768, |
| 124 |
recommended_seq_len=2048, |
| 125 |
reasoning_tuned=True, |
| 126 |
), |
| 127 |
BaseModelSpec( |
| 128 |
key="qwen3-1.7b-thinking", |
| 129 |
hf_id="Qwen/Qwen3-1.7B", |
| 130 |
revision="70d244cc86ccca08cf5af4e1e306ecf908b1ad5e", |
| 131 |
architecture="Qwen3ForCausalLM", |
| 132 |
params=1_700_000_000, |
| 133 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 134 |
template="qwen3thinking", |
| 135 |
gguf_arch="qwen3", |
| 136 |
tokenizer_pre="qwen2", |
| 137 |
license_spdx="Apache-2.0", |
| 138 |
license_url="https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE", |
| 139 |
requires_acceptance=False, |
| 140 |
redistributable=True, |
| 141 |
size_gb_fp16=3.4, |
| 142 |
context_length=32_768, |
| 143 |
recommended_seq_len=2048, |
| 144 |
reasoning_tuned=True, |
| 145 |
), |
| 146 |
BaseModelSpec( |
| 147 |
key="qwen3-4b", |
| 148 |
hf_id="Qwen/Qwen3-4B", |
| 149 |
revision="1cfa9a7208912126459214e8b04321603b3df60c", |
| 150 |
architecture="Qwen3ForCausalLM", |
| 151 |
params=4_000_000_000, |
| 152 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 153 |
template="chatml", |
| 154 |
gguf_arch="qwen3", |
| 155 |
tokenizer_pre="qwen2", |
| 156 |
license_spdx="Apache-2.0", |
| 157 |
license_url="https://huggingface.co/Qwen/Qwen3-4B/blob/main/LICENSE", |
| 158 |
requires_acceptance=False, |
| 159 |
redistributable=True, |
| 160 |
size_gb_fp16=8.0, |
| 161 |
context_length=32_768, |
| 162 |
recommended_seq_len=2048, |
| 163 |
reasoning_tuned=True, |
| 164 |
), |
| 165 |
BaseModelSpec( |
| 166 |
key="qwen3-8b", |
| 167 |
hf_id="Qwen/Qwen3-8B", |
| 168 |
revision="b968826d9c46dd6066d109eabc6255188de91218", |
| 169 |
architecture="Qwen3ForCausalLM", |
| 170 |
params=8_000_000_000, |
| 171 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 172 |
template="chatml", |
| 173 |
gguf_arch="qwen3", |
| 174 |
tokenizer_pre="qwen2", |
| 175 |
license_spdx="Apache-2.0", |
| 176 |
license_url="https://huggingface.co/Qwen/Qwen3-8B/blob/main/LICENSE", |
| 177 |
requires_acceptance=False, |
| 178 |
redistributable=True, |
| 179 |
size_gb_fp16=16.0, |
| 180 |
context_length=32_768, |
| 181 |
recommended_seq_len=2048, |
| 182 |
reasoning_tuned=True, |
| 183 |
), |
| 184 |
BaseModelSpec( |
| 185 |
key="llama-3.2-1b", |
| 186 |
hf_id="meta-llama/Llama-3.2-1B-Instruct", |
| 187 |
revision="9213176726f574b556790deb65791e0c5aa438b6", |
| 188 |
architecture="LlamaForCausalLM", |
| 189 |
params=1_000_000_000, |
| 190 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 191 |
template="llama3", |
| 192 |
gguf_arch="llama", |
| 193 |
tokenizer_pre="llama-bpe", |
| 194 |
license_spdx="Other", |
| 195 |
license_url="https://www.llama.com/llama3_2/license/", |
| 196 |
requires_acceptance=True, |
| 197 |
redistributable=False, |
| 198 |
size_gb_fp16=2.5, |
| 199 |
context_length=131_072, |
| 200 |
recommended_seq_len=4096, |
| 201 |
), |
| 202 |
BaseModelSpec( |
| 203 |
key="llama-3.2-3b", |
| 204 |
hf_id="meta-llama/Llama-3.2-3B-Instruct", |
| 205 |
revision="0cb88a4f764b7a12671c53f0838cd831a0843b95", |
| 206 |
architecture="LlamaForCausalLM", |
| 207 |
params=3_000_000_000, |
| 208 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 209 |
template="llama3", |
| 210 |
gguf_arch="llama", |
| 211 |
tokenizer_pre="llama-bpe", |
| 212 |
license_spdx="Other", |
| 213 |
license_url="https://www.llama.com/llama3_2/license/", |
| 214 |
requires_acceptance=True, |
| 215 |
redistributable=False, |
| 216 |
size_gb_fp16=6.5, |
| 217 |
context_length=131_072, |
| 218 |
recommended_seq_len=4096, |
| 219 |
), |
| 220 |
BaseModelSpec( |
| 221 |
key="llama-3.3-8b-instruct", |
| 222 |
# Meta's first-party LlamaCon announcement explicitly says the |
| 223 |
# Llama API can fine-tune "o novo modelo Llama 3.3 8B", but |
| 224 |
# there is still no first-party HF repo. DLM therefore fetches |
| 225 |
# weights from the community mirror below while |
| 226 |
# refresh-registry separately probes Meta's newsroom article |
| 227 |
# for provenance. |
| 228 |
hf_id="allura-forge/Llama-3.3-8B-Instruct", |
| 229 |
revision="df95224cf87c32d9f4958dd284a07ded620aa4fc", |
| 230 |
architecture="LlamaForCausalLM", |
| 231 |
params=8_000_000_000, |
| 232 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 233 |
template="llama3", |
| 234 |
gguf_arch="llama", |
| 235 |
tokenizer_pre="llama-bpe", |
| 236 |
license_spdx="Other", |
| 237 |
license_url="https://llama.meta.com/llama3/license", |
| 238 |
requires_acceptance=True, |
| 239 |
redistributable=False, |
| 240 |
size_gb_fp16=16.5, |
| 241 |
context_length=131_072, |
| 242 |
context_length_effective=8_192, |
| 243 |
recommended_seq_len=4096, |
| 244 |
refresh_check_hf_gating=False, |
| 245 |
provenance_url=( |
| 246 |
"https://about.fb.com/br/news/2025/04/tudo-o-que-anunciamos-no-nosso-primeiro-llamacon/" |
| 247 |
), |
| 248 |
provenance_match_text="novo modelo Llama 3.3 8B", |
| 249 |
), |
| 250 |
BaseModelSpec( |
| 251 |
key="smollm3-3b", |
| 252 |
hf_id="HuggingFaceTB/SmolLM3-3B", |
| 253 |
revision="a07cc9a04f16550a088caea529712d1d335b0ac1", |
| 254 |
architecture="SmolLM3ForCausalLM", |
| 255 |
params=3_000_000_000, |
| 256 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 257 |
template="smollm3", |
| 258 |
gguf_arch="llama", |
| 259 |
tokenizer_pre="smollm", |
| 260 |
license_spdx="Apache-2.0", |
| 261 |
license_url="https://huggingface.co/HuggingFaceTB/SmolLM3-3B", |
| 262 |
requires_acceptance=False, |
| 263 |
redistributable=True, |
| 264 |
size_gb_fp16=6.2, |
| 265 |
context_length=65_536, |
| 266 |
recommended_seq_len=4096, |
| 267 |
reasoning_tuned=True, |
| 268 |
), |
| 269 |
BaseModelSpec( |
| 270 |
key="olmo-2-7b-instruct", |
| 271 |
hf_id="allenai/OLMo-2-1124-7B-Instruct", |
| 272 |
revision="470b1fba1ae01581f270116362ee4aa1b97f4c84", |
| 273 |
architecture="Olmo2ForCausalLM", |
| 274 |
params=7_000_000_000, |
| 275 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 276 |
template="olmo2", |
| 277 |
gguf_arch="olmo2", |
| 278 |
tokenizer_pre="superbpe", |
| 279 |
license_spdx="Apache-2.0", |
| 280 |
license_url="https://huggingface.co/allenai/OLMo-2-1124-7B-Instruct", |
| 281 |
requires_acceptance=False, |
| 282 |
redistributable=True, |
| 283 |
size_gb_fp16=14.6, |
| 284 |
context_length=4096, |
| 285 |
recommended_seq_len=2048, |
| 286 |
), |
| 287 |
BaseModelSpec( |
| 288 |
key="gemma-2-2b-it", |
| 289 |
hf_id="google/gemma-2-2b-it", |
| 290 |
revision="299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", |
| 291 |
architecture="Gemma2ForCausalLM", |
| 292 |
params=2_600_000_000, |
| 293 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 294 |
template="gemma2", |
| 295 |
gguf_arch="gemma2", |
| 296 |
tokenizer_pre="gemma", |
| 297 |
license_spdx="Gemma", |
| 298 |
license_url="https://ai.google.dev/gemma/terms", |
| 299 |
requires_acceptance=True, |
| 300 |
redistributable=False, |
| 301 |
size_gb_fp16=5.2, |
| 302 |
context_length=8192, |
| 303 |
recommended_seq_len=2048, |
| 304 |
), |
| 305 |
BaseModelSpec( |
| 306 |
key="gemma-2-9b-it", |
| 307 |
hf_id="google/gemma-2-9b-it", |
| 308 |
revision="11c9b309abf73637e4b6f9a3fa1e92e615547819", |
| 309 |
architecture="Gemma2ForCausalLM", |
| 310 |
params=9_000_000_000, |
| 311 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 312 |
template="gemma2", |
| 313 |
gguf_arch="gemma2", |
| 314 |
tokenizer_pre="gemma", |
| 315 |
license_spdx="Gemma", |
| 316 |
license_url="https://ai.google.dev/gemma/terms", |
| 317 |
requires_acceptance=True, |
| 318 |
redistributable=False, |
| 319 |
size_gb_fp16=18.0, |
| 320 |
context_length=8192, |
| 321 |
recommended_seq_len=2048, |
| 322 |
), |
| 323 |
BaseModelSpec( |
| 324 |
key="smollm2-135m", |
| 325 |
hf_id="HuggingFaceTB/SmolLM2-135M-Instruct", |
| 326 |
revision="12fd25f77366fa6b3b4b768ec3050bf629380bac", |
| 327 |
architecture="LlamaForCausalLM", |
| 328 |
params=135_000_000, |
| 329 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 330 |
template="chatml", |
| 331 |
gguf_arch="llama", |
| 332 |
tokenizer_pre="smollm", |
| 333 |
license_spdx="Apache-2.0", |
| 334 |
license_url="https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct", |
| 335 |
requires_acceptance=False, |
| 336 |
redistributable=True, |
| 337 |
size_gb_fp16=0.27, |
| 338 |
context_length=8_192, |
| 339 |
recommended_seq_len=1024, |
| 340 |
), |
| 341 |
BaseModelSpec( |
| 342 |
key="smollm2-360m", |
| 343 |
hf_id="HuggingFaceTB/SmolLM2-360M-Instruct", |
| 344 |
revision="a10cc1512eabd3dde888204e902eca88bddb4951", |
| 345 |
architecture="LlamaForCausalLM", |
| 346 |
params=360_000_000, |
| 347 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 348 |
template="chatml", |
| 349 |
gguf_arch="llama", |
| 350 |
tokenizer_pre="smollm", |
| 351 |
license_spdx="Apache-2.0", |
| 352 |
license_url="https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct", |
| 353 |
requires_acceptance=False, |
| 354 |
redistributable=True, |
| 355 |
size_gb_fp16=0.72, |
| 356 |
context_length=8_192, |
| 357 |
recommended_seq_len=1024, |
| 358 |
), |
| 359 |
BaseModelSpec( |
| 360 |
key="smollm2-1.7b", |
| 361 |
hf_id="HuggingFaceTB/SmolLM2-1.7B-Instruct", |
| 362 |
revision="31b70e2e869a7173562077fd711b654946d38674", |
| 363 |
architecture="LlamaForCausalLM", |
| 364 |
params=1_700_000_000, |
| 365 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 366 |
template="chatml", |
| 367 |
gguf_arch="llama", |
| 368 |
tokenizer_pre="smollm", |
| 369 |
license_spdx="Apache-2.0", |
| 370 |
license_url="https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct", |
| 371 |
requires_acceptance=False, |
| 372 |
redistributable=True, |
| 373 |
size_gb_fp16=3.4, |
| 374 |
context_length=8_192, |
| 375 |
recommended_seq_len=2048, |
| 376 |
), |
| 377 |
BaseModelSpec( |
| 378 |
key="phi-3.5-mini", |
| 379 |
hf_id="microsoft/Phi-3.5-mini-instruct", |
| 380 |
revision="2fe192450127e6a83f7441aef6e3ca586c338b77", |
| 381 |
architecture="Phi3ForCausalLM", |
| 382 |
params=3_800_000_000, |
| 383 |
target_modules=["qkv_proj", "o_proj", "gate_up_proj", "down_proj"], |
| 384 |
template="phi3", |
| 385 |
gguf_arch="phi3", |
| 386 |
tokenizer_pre="phi-2", |
| 387 |
license_spdx="MIT", |
| 388 |
license_url="https://huggingface.co/microsoft/Phi-3.5-mini-instruct/blob/main/LICENSE", |
| 389 |
requires_acceptance=False, |
| 390 |
redistributable=True, |
| 391 |
size_gb_fp16=7.6, |
| 392 |
context_length=131_072, |
| 393 |
recommended_seq_len=2048, |
| 394 |
), |
| 395 |
BaseModelSpec( |
| 396 |
key="phi-4-mini-reasoning", |
| 397 |
hf_id="microsoft/Phi-4-mini-reasoning", |
| 398 |
revision="0e3b1e2d02ee478a3743abe3f629e9c0cb722e0a", |
| 399 |
architecture="Phi3ForCausalLM", |
| 400 |
params=3_800_000_000, |
| 401 |
target_modules=["qkv_proj", "o_proj", "gate_up_proj", "down_proj"], |
| 402 |
template="phi4mini", |
| 403 |
gguf_arch="phi3", |
| 404 |
tokenizer_pre="phi-2", |
| 405 |
license_spdx="MIT", |
| 406 |
license_url="https://huggingface.co/microsoft/Phi-4-mini-reasoning/blob/main/LICENSE", |
| 407 |
requires_acceptance=False, |
| 408 |
redistributable=True, |
| 409 |
size_gb_fp16=7.6, |
| 410 |
context_length=131_072, |
| 411 |
recommended_seq_len=2048, |
| 412 |
reasoning_tuned=True, |
| 413 |
), |
| 414 |
# Mixtral-8x7B-Instruct-v0.1 — Apache-2.0 sparse MoE base. |
| 415 |
# |
| 416 |
# HF exposes this as `MixtralForCausalLM`, but the current vendored |
| 417 |
# llama.cpp converter routes it through the Llama path rather than a |
| 418 |
# distinct Mixtral architecture class. We therefore keep |
| 419 |
# `gguf_arch="llama"` while marking the modality as `text-moe` so |
| 420 |
# DLM's gate substrate can detect the sparse-MoE family explicitly. |
| 421 |
BaseModelSpec( |
| 422 |
key="mixtral-8x7b-instruct", |
| 423 |
hf_id="mistralai/Mixtral-8x7B-Instruct-v0.1", |
| 424 |
revision="eba92302a2861cdc0098cc54bc9f17cb2c47eb61", |
| 425 |
architecture="MixtralForCausalLM", |
| 426 |
params=46_700_000_000, |
| 427 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 428 |
template="mistral", |
| 429 |
gguf_arch="llama", |
| 430 |
tokenizer_pre="llama-bpe", |
| 431 |
license_spdx="Apache-2.0", |
| 432 |
license_url="https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1", |
| 433 |
requires_acceptance=False, |
| 434 |
redistributable=True, |
| 435 |
size_gb_fp16=93.4, |
| 436 |
context_length=32_768, |
| 437 |
recommended_seq_len=2048, |
| 438 |
modality="text-moe", |
| 439 |
), |
| 440 |
# Mistral Small 3.1 24B Instruct — Apache-2.0 multimodal base with |
| 441 |
# native vision support and 128k context. |
| 442 |
# |
| 443 |
# An earlier draft treated this as text-only; the live HF config |
| 444 |
# is `Mistral3ForConditionalGeneration` with both text and |
| 445 |
# vision towers, so we register it as vision-language. The current |
| 446 |
# processor config pins `[IMG]` as the image placeholder and a |
| 447 |
# longest edge of 1540 px. DLM's current `VlPreprocessorPlan` |
| 448 |
# abstraction is fixed-size only, so we conservatively pin |
| 449 |
# 1540×1540 here until dynamic ranges land. |
| 450 |
BaseModelSpec( |
| 451 |
key="mistral-small-3.1-24b-instruct", |
| 452 |
hf_id="mistralai/Mistral-Small-3.1-24B-Instruct-2503", |
| 453 |
revision="68faf511d618ef198fef186659617cfd2eb8e33a", |
| 454 |
architecture="Mistral3ForConditionalGeneration", |
| 455 |
params=24_000_000_000, |
| 456 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 457 |
template="mistral", |
| 458 |
gguf_arch="mistral3", |
| 459 |
tokenizer_pre="tekken", |
| 460 |
license_spdx="Apache-2.0", |
| 461 |
license_url="https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503", |
| 462 |
requires_acceptance=False, |
| 463 |
redistributable=True, |
| 464 |
size_gb_fp16=48.0, |
| 465 |
context_length=131_072, |
| 466 |
recommended_seq_len=4096, |
| 467 |
modality="vision-language", |
| 468 |
vl_preprocessor_plan=VlPreprocessorPlan( |
| 469 |
target_size=(1540, 1540), |
| 470 |
resize_policy="fixed", |
| 471 |
image_token="[IMG]", |
| 472 |
num_image_tokens=3025, |
| 473 |
), |
| 474 |
), |
| 475 |
# --- Vision-language bases ---------------------------------------------- |
| 476 |
# PaliGemma-3B-mix-224 — Google's instruction-tuned VL base built on |
| 477 |
# Gemma-2B + SigLIP-So400m. Gated under the Gemma license; cannot |
| 478 |
# redistribute inside a `.dlm.pack` (same pattern as Llama-3.2). |
| 479 |
# Training targets Gemma's transformer blocks; the vision tower is |
| 480 |
# trained jointly when modules_to_save expands to ["embed_tokens", |
| 481 |
# "lm_head"], but the current entry keeps modules_to_save empty so |
| 482 |
# only the LLM-side LoRA adapters move — the vision tower is frozen. |
| 483 |
# |
| 484 |
# `gguf_arch` / `tokenizer_pre` are set to tags the current vendored |
| 485 |
# llama.cpp doesn't recognize; the export probes surface |
| 486 |
# UNSUPPORTED + refuse GGUF conversion until GGUF support lands. |
| 487 |
# HF-snapshot export (`dlm export --hf-snapshot`) still works. |
| 488 |
BaseModelSpec( |
| 489 |
key="paligemma-3b-mix-224", |
| 490 |
hf_id="google/paligemma-3b-mix-224", |
| 491 |
revision="d1d8734c9c3ad0ccfeea4afc270faa356c2ba515", |
| 492 |
architecture="PaliGemmaForConditionalGeneration", |
| 493 |
params=2_900_000_000, |
| 494 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 495 |
template="paligemma", |
| 496 |
gguf_arch="paligemma", |
| 497 |
tokenizer_pre="gemma", |
| 498 |
license_spdx="Other", |
| 499 |
license_url="https://ai.google.dev/gemma/terms", |
| 500 |
requires_acceptance=True, |
| 501 |
redistributable=False, |
| 502 |
size_gb_fp16=6.5, |
| 503 |
context_length=8_192, |
| 504 |
recommended_seq_len=2048, |
| 505 |
modality="vision-language", |
| 506 |
vl_preprocessor_plan=VlPreprocessorPlan( |
| 507 |
target_size=(224, 224), |
| 508 |
resize_policy="fixed", |
| 509 |
image_token="<image>", |
| 510 |
num_image_tokens=256, |
| 511 |
), |
| 512 |
), |
| 513 |
# Qwen2-VL-2B-Instruct — Alibaba's Apache-2.0 VL base with dynamic- |
| 514 |
# resolution support in native HF. The current entry pins a |
| 515 |
# conservative fixed 672×672 preprocessing plan to avoid growing |
| 516 |
# the VlPreprocessorPlan abstraction for dynamic ranges yet; a |
| 517 |
# future extension can add {min_pixels, max_pixels} when needed. |
| 518 |
# |
| 519 |
# 672×672 with Qwen2-VL's 28-pixel patch-merger grid yields 24×24 = |
| 520 |
# 576 vision tokens per image. `<|image_pad|>` is the runtime |
| 521 |
# placeholder the processor expands into that window. |
| 522 |
# |
| 523 |
# Apache-2.0 (redistributable, no acceptance). `AutoModelForImageTextToText` |
| 524 |
# handles this arch natively since transformers ≥4.45 — same path |
| 525 |
# PaliGemma loads through. |
| 526 |
BaseModelSpec( |
| 527 |
key="qwen2-vl-2b-instruct", |
| 528 |
hf_id="Qwen/Qwen2-VL-2B-Instruct", |
| 529 |
revision="895c3a49bc3fa70a340399125c650a463535e71c", |
| 530 |
architecture="Qwen2VLForConditionalGeneration", |
| 531 |
params=2_200_000_000, |
| 532 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 533 |
template="qwen2-vl", |
| 534 |
gguf_arch="qwen2-vl", |
| 535 |
tokenizer_pre="qwen2", |
| 536 |
license_spdx="Apache-2.0", |
| 537 |
license_url="https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct/blob/main/LICENSE", |
| 538 |
requires_acceptance=False, |
| 539 |
redistributable=True, |
| 540 |
size_gb_fp16=4.5, |
| 541 |
context_length=32_768, |
| 542 |
recommended_seq_len=2048, |
| 543 |
modality="vision-language", |
| 544 |
vl_preprocessor_plan=VlPreprocessorPlan( |
| 545 |
target_size=(672, 672), |
| 546 |
resize_policy="fixed", |
| 547 |
image_token="<|image_pad|>", |
| 548 |
num_image_tokens=576, |
| 549 |
), |
| 550 |
), |
| 551 |
# InternVL2-2B — OpenGVLab's MIT-licensed 2B VL model. Uses fixed |
| 552 |
# 448×448 input (32×32 patch grid with 2×2 pixel-shuffle → 256 |
| 553 |
# vision tokens per image). |
| 554 |
# |
| 555 |
# **Security surface: trust_remote_code=True**. InternVL2's HF |
| 556 |
# integration is `InternVLChatModel`, a custom class defined in |
| 557 |
# `modeling_internvl_chat.py` inside the model repo — not in |
| 558 |
# transformers. Loading it requires executing that repo's code. |
| 559 |
# The loader sets `trust_remote_code=True` when this spec is |
| 560 |
# picked (`trust_remote_code` field below), so picking this base |
| 561 |
# as `base_model: internvl2-2b` in a .dlm is the user's |
| 562 |
# informed acknowledgment that remote code runs at load time. |
| 563 |
# The cookbook + vl-memory.md flag this too. |
| 564 |
BaseModelSpec( |
| 565 |
key="internvl2-2b", |
| 566 |
hf_id="OpenGVLab/InternVL2-2B", |
| 567 |
revision="e4f6747bd20f139e637642c6a058c6bd00b36919", |
| 568 |
architecture="InternVLChatModel", |
| 569 |
params=2_200_000_000, |
| 570 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 571 |
template="internvl2", |
| 572 |
gguf_arch="internvl2", |
| 573 |
tokenizer_pre="internvl2", |
| 574 |
license_spdx="MIT", |
| 575 |
license_url="https://huggingface.co/OpenGVLab/InternVL2-2B/blob/main/LICENSE", |
| 576 |
requires_acceptance=False, |
| 577 |
redistributable=True, |
| 578 |
trust_remote_code=True, |
| 579 |
size_gb_fp16=4.4, |
| 580 |
context_length=8_192, |
| 581 |
recommended_seq_len=2048, |
| 582 |
modality="vision-language", |
| 583 |
vl_preprocessor_plan=VlPreprocessorPlan( |
| 584 |
target_size=(448, 448), |
| 585 |
resize_policy="fixed", |
| 586 |
image_token="<IMG_CONTEXT>", |
| 587 |
num_image_tokens=256, |
| 588 |
), |
| 589 |
), |
| 590 |
BaseModelSpec( |
| 591 |
key="internvl3-2b", |
| 592 |
hf_id="OpenGVLab/InternVL3-2B", |
| 593 |
revision="899155015275a9b7338c7f4677e19c784e0e5a21", |
| 594 |
architecture="InternVLChatModel", |
| 595 |
params=2_000_000_000, |
| 596 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 597 |
template="internvl2", |
| 598 |
gguf_arch="internvl3", |
| 599 |
tokenizer_pre="internvl3", |
| 600 |
license_spdx="Apache-2.0", |
| 601 |
license_url="https://huggingface.co/OpenGVLab/InternVL3-2B", |
| 602 |
requires_acceptance=False, |
| 603 |
redistributable=True, |
| 604 |
trust_remote_code=True, |
| 605 |
size_gb_fp16=4.0, |
| 606 |
context_length=32_768, |
| 607 |
recommended_seq_len=2048, |
| 608 |
modality="vision-language", |
| 609 |
vl_preprocessor_plan=VlPreprocessorPlan( |
| 610 |
target_size=(448, 448), |
| 611 |
resize_policy="dynamic", |
| 612 |
image_token="<image>", |
| 613 |
num_image_tokens=256, |
| 614 |
), |
| 615 |
), |
| 616 |
# --- Audio-language bases ----------------------------------------------- |
| 617 |
# Qwen2-Audio-7B-Instruct — Alibaba's open audio-text model. Uses |
| 618 |
# the Qwen2 LLM backbone + a dedicated audio encoder. Apache-2.0 |
| 619 |
# and currently ungated on HF, so the registry keeps it open and |
| 620 |
# redistributable like the other permissive Qwen rows. |
| 621 |
# |
| 622 |
# The 16 kHz pin + 30 s max-length match the training-time |
| 623 |
# defaults documented in the Qwen2-Audio card. Resampling support |
| 624 |
# lands as follow-up work; current releases refuse mismatched |
| 625 |
# sample rates with an actionable error at preprocess time. |
| 626 |
# |
| 627 |
# Placeholder SHA flagged the same way as paligemma — the weekly |
| 628 |
# `scripts/refresh-registry.py --check` run surfaces drift and a |
| 629 |
# maintainer pastes in the real SHA. |
| 630 |
BaseModelSpec( |
| 631 |
key="qwen2-audio-7b-instruct", |
| 632 |
hf_id="Qwen/Qwen2-Audio-7B-Instruct", |
| 633 |
revision="0a095220c30b7b31434169c3086508ef3ea5bf0a", |
| 634 |
architecture="Qwen2AudioForConditionalGeneration", |
| 635 |
params=8_400_000_000, |
| 636 |
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
| 637 |
template="qwen2-audio", |
| 638 |
gguf_arch="qwen2-audio", |
| 639 |
tokenizer_pre="qwen2", |
| 640 |
license_spdx="Apache-2.0", |
| 641 |
license_url="https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct", |
| 642 |
requires_acceptance=False, |
| 643 |
redistributable=True, |
| 644 |
size_gb_fp16=15.5, |
| 645 |
context_length=8_192, |
| 646 |
recommended_seq_len=2048, |
| 647 |
modality="audio-language", |
| 648 |
audio_preprocessor_plan=AudioPreprocessorPlan( |
| 649 |
sample_rate=16_000, |
| 650 |
max_length_seconds=30.0, |
| 651 |
audio_token="<|AUDIO|>", |
| 652 |
num_audio_tokens=750, |
| 653 |
), |
| 654 |
), |
| 655 |
) |
| 656 |
|
| 657 |
|
| 658 |
BASE_MODELS: Final[dict[str, BaseModelSpec]] = {entry.key: entry for entry in _ENTRIES} |
| 659 |
|
| 660 |
|
| 661 |
def known_keys() -> tuple[str, ...]: |
| 662 |
"""Stable ordering for use in error messages / CLI listings.""" |
| 663 |
return tuple(BASE_MODELS.keys()) |