"""Curated launch registry of supported base models. Every entry pins an exact HuggingFace commit SHA. Refreshed by `scripts/refresh-registry.py`; weekly CI opens a PR on drift. Notes on individual entries: - `qwen2.5-3b` ships under the Qwen Research License (free for entities with <100M MAU). We record it as `license_spdx="Other"` and surface the URL via `license_url`; it remains `redistributable=True` because the license permits bundling + redistribution with attribution. **Caveat:** the boolean `redistributable` field does not express the MAU threshold or attribution requirement. A `redistributable_conditions: str | None` field on `BaseModelSpec` plus a pack-time attestation checkbox would encode this properly — deferred follow-up work. Until then, users at the scale threshold must consult the license text themselves. - Llama-3.2 models are gated on HuggingFace. Llama-3.3 8B currently needs a mirror-backed fetch path because Meta exposes it through the Llama API but not a first-party HF repo. DLM still keeps the same acceptance + non-redistribution policy surface for the whole Llama family (`requires_acceptance=True`, `redistributable=False`) — enforced by the pack gate and share-protocol refusal. - SmolLM2 / SmolLM3 and Phi-3.5-mini are permissive (Apache-2.0 / MIT). - `size_gb_fp16` is approximate; the hardware doctor uses it to seed VRAM estimates, which then get refined by runtime checks. """ from __future__ import annotations from typing import Final from dlm.base_models.schema import AudioPreprocessorPlan, BaseModelSpec, VlPreprocessorPlan _ENTRIES: tuple[BaseModelSpec, ...] = ( BaseModelSpec( key="qwen2.5-0.5b", hf_id="Qwen/Qwen2.5-0.5B-Instruct", revision="7ae557604adf67be50417f59c2c2f167def9a775", architecture="Qwen2ForCausalLM", params=500_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="chatml", gguf_arch="qwen2", tokenizer_pre="qwen2", license_spdx="Apache-2.0", license_url="https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct/blob/main/LICENSE", requires_acceptance=False, redistributable=True, size_gb_fp16=1.0, context_length=32_768, recommended_seq_len=2048, ), BaseModelSpec( key="qwen2.5-1.5b", hf_id="Qwen/Qwen2.5-1.5B-Instruct", revision="989aa7980e4cf806f80c7fef2b1adb7bc71aa306", architecture="Qwen2ForCausalLM", params=1_500_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="chatml", gguf_arch="qwen2", tokenizer_pre="qwen2", license_spdx="Apache-2.0", license_url="https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/blob/main/LICENSE", requires_acceptance=False, redistributable=True, size_gb_fp16=3.1, context_length=32_768, recommended_seq_len=2048, ), BaseModelSpec( key="qwen2.5-3b", hf_id="Qwen/Qwen2.5-3B-Instruct", revision="aa8e72537993ba99e69dfaafa59ed015b17504d1", architecture="Qwen2ForCausalLM", params=3_000_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="chatml", gguf_arch="qwen2", tokenizer_pre="qwen2", license_spdx="Other", license_url="https://huggingface.co/Qwen/Qwen2.5-3B-Instruct/blob/main/LICENSE", requires_acceptance=False, redistributable=True, size_gb_fp16=6.2, context_length=32_768, recommended_seq_len=2048, ), BaseModelSpec( key="qwen2.5-coder-1.5b", hf_id="Qwen/Qwen2.5-Coder-1.5B-Instruct", revision="2e1fd397ee46e1388853d2af2c993145b0f1098a", architecture="Qwen2ForCausalLM", params=1_500_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="chatml", gguf_arch="qwen2", tokenizer_pre="qwen2", license_spdx="Apache-2.0", license_url="https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct/blob/main/LICENSE", requires_acceptance=False, redistributable=True, size_gb_fp16=3.1, context_length=32_768, recommended_seq_len=2048, ), BaseModelSpec( key="qwen3-1.7b", hf_id="Qwen/Qwen3-1.7B", revision="70d244cc86ccca08cf5af4e1e306ecf908b1ad5e", architecture="Qwen3ForCausalLM", params=1_700_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="chatml", gguf_arch="qwen3", tokenizer_pre="qwen2", license_spdx="Apache-2.0", license_url="https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE", requires_acceptance=False, redistributable=True, size_gb_fp16=3.4, context_length=32_768, recommended_seq_len=2048, reasoning_tuned=True, ), BaseModelSpec( key="qwen3-1.7b-thinking", hf_id="Qwen/Qwen3-1.7B", revision="70d244cc86ccca08cf5af4e1e306ecf908b1ad5e", architecture="Qwen3ForCausalLM", params=1_700_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="qwen3thinking", gguf_arch="qwen3", tokenizer_pre="qwen2", license_spdx="Apache-2.0", license_url="https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE", requires_acceptance=False, redistributable=True, size_gb_fp16=3.4, context_length=32_768, recommended_seq_len=2048, reasoning_tuned=True, ), BaseModelSpec( key="qwen3-4b", hf_id="Qwen/Qwen3-4B", revision="1cfa9a7208912126459214e8b04321603b3df60c", architecture="Qwen3ForCausalLM", params=4_000_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="chatml", gguf_arch="qwen3", tokenizer_pre="qwen2", license_spdx="Apache-2.0", license_url="https://huggingface.co/Qwen/Qwen3-4B/blob/main/LICENSE", requires_acceptance=False, redistributable=True, size_gb_fp16=8.0, context_length=32_768, recommended_seq_len=2048, reasoning_tuned=True, ), BaseModelSpec( key="qwen3-8b", hf_id="Qwen/Qwen3-8B", revision="b968826d9c46dd6066d109eabc6255188de91218", architecture="Qwen3ForCausalLM", params=8_000_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="chatml", gguf_arch="qwen3", tokenizer_pre="qwen2", license_spdx="Apache-2.0", license_url="https://huggingface.co/Qwen/Qwen3-8B/blob/main/LICENSE", requires_acceptance=False, redistributable=True, size_gb_fp16=16.0, context_length=32_768, recommended_seq_len=2048, reasoning_tuned=True, ), BaseModelSpec( key="llama-3.2-1b", hf_id="meta-llama/Llama-3.2-1B-Instruct", revision="9213176726f574b556790deb65791e0c5aa438b6", architecture="LlamaForCausalLM", params=1_000_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="llama3", gguf_arch="llama", tokenizer_pre="llama-bpe", license_spdx="Other", license_url="https://www.llama.com/llama3_2/license/", requires_acceptance=True, redistributable=False, size_gb_fp16=2.5, context_length=131_072, recommended_seq_len=4096, ), BaseModelSpec( key="llama-3.2-3b", hf_id="meta-llama/Llama-3.2-3B-Instruct", revision="0cb88a4f764b7a12671c53f0838cd831a0843b95", architecture="LlamaForCausalLM", params=3_000_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="llama3", gguf_arch="llama", tokenizer_pre="llama-bpe", license_spdx="Other", license_url="https://www.llama.com/llama3_2/license/", requires_acceptance=True, redistributable=False, size_gb_fp16=6.5, context_length=131_072, recommended_seq_len=4096, ), BaseModelSpec( key="llama-3.3-8b-instruct", # Meta's first-party LlamaCon announcement explicitly says the # Llama API can fine-tune "o novo modelo Llama 3.3 8B", but # there is still no first-party HF repo. DLM therefore fetches # weights from the community mirror below while # refresh-registry separately probes Meta's newsroom article # for provenance. hf_id="allura-forge/Llama-3.3-8B-Instruct", revision="df95224cf87c32d9f4958dd284a07ded620aa4fc", architecture="LlamaForCausalLM", params=8_000_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="llama3", gguf_arch="llama", tokenizer_pre="llama-bpe", license_spdx="Other", license_url="https://llama.meta.com/llama3/license", requires_acceptance=True, redistributable=False, size_gb_fp16=16.5, context_length=131_072, context_length_effective=8_192, recommended_seq_len=4096, refresh_check_hf_gating=False, provenance_url=( "https://about.fb.com/br/news/2025/04/tudo-o-que-anunciamos-no-nosso-primeiro-llamacon/" ), provenance_match_text="novo modelo Llama 3.3 8B", ), BaseModelSpec( key="smollm3-3b", hf_id="HuggingFaceTB/SmolLM3-3B", revision="a07cc9a04f16550a088caea529712d1d335b0ac1", architecture="SmolLM3ForCausalLM", params=3_000_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="smollm3", gguf_arch="llama", tokenizer_pre="smollm", license_spdx="Apache-2.0", license_url="https://huggingface.co/HuggingFaceTB/SmolLM3-3B", requires_acceptance=False, redistributable=True, size_gb_fp16=6.2, context_length=65_536, recommended_seq_len=4096, reasoning_tuned=True, ), BaseModelSpec( key="olmo-2-7b-instruct", hf_id="allenai/OLMo-2-1124-7B-Instruct", revision="470b1fba1ae01581f270116362ee4aa1b97f4c84", architecture="Olmo2ForCausalLM", params=7_000_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="olmo2", gguf_arch="olmo2", tokenizer_pre="superbpe", license_spdx="Apache-2.0", license_url="https://huggingface.co/allenai/OLMo-2-1124-7B-Instruct", requires_acceptance=False, redistributable=True, size_gb_fp16=14.6, context_length=4096, recommended_seq_len=2048, ), BaseModelSpec( key="gemma-2-2b-it", hf_id="google/gemma-2-2b-it", revision="299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", architecture="Gemma2ForCausalLM", params=2_600_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="gemma2", gguf_arch="gemma2", tokenizer_pre="gemma", license_spdx="Gemma", license_url="https://ai.google.dev/gemma/terms", requires_acceptance=True, redistributable=False, size_gb_fp16=5.2, context_length=8192, recommended_seq_len=2048, ), BaseModelSpec( key="gemma-2-9b-it", hf_id="google/gemma-2-9b-it", revision="11c9b309abf73637e4b6f9a3fa1e92e615547819", architecture="Gemma2ForCausalLM", params=9_000_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="gemma2", gguf_arch="gemma2", tokenizer_pre="gemma", license_spdx="Gemma", license_url="https://ai.google.dev/gemma/terms", requires_acceptance=True, redistributable=False, size_gb_fp16=18.0, context_length=8192, recommended_seq_len=2048, ), BaseModelSpec( key="smollm2-135m", hf_id="HuggingFaceTB/SmolLM2-135M-Instruct", revision="12fd25f77366fa6b3b4b768ec3050bf629380bac", architecture="LlamaForCausalLM", params=135_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="chatml", gguf_arch="llama", tokenizer_pre="smollm", license_spdx="Apache-2.0", license_url="https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct", requires_acceptance=False, redistributable=True, size_gb_fp16=0.27, context_length=8_192, recommended_seq_len=1024, ), BaseModelSpec( key="smollm2-360m", hf_id="HuggingFaceTB/SmolLM2-360M-Instruct", revision="a10cc1512eabd3dde888204e902eca88bddb4951", architecture="LlamaForCausalLM", params=360_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="chatml", gguf_arch="llama", tokenizer_pre="smollm", license_spdx="Apache-2.0", license_url="https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct", requires_acceptance=False, redistributable=True, size_gb_fp16=0.72, context_length=8_192, recommended_seq_len=1024, ), BaseModelSpec( key="smollm2-1.7b", hf_id="HuggingFaceTB/SmolLM2-1.7B-Instruct", revision="31b70e2e869a7173562077fd711b654946d38674", architecture="LlamaForCausalLM", params=1_700_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="chatml", gguf_arch="llama", tokenizer_pre="smollm", license_spdx="Apache-2.0", license_url="https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct", requires_acceptance=False, redistributable=True, size_gb_fp16=3.4, context_length=8_192, recommended_seq_len=2048, ), BaseModelSpec( key="phi-3.5-mini", hf_id="microsoft/Phi-3.5-mini-instruct", revision="2fe192450127e6a83f7441aef6e3ca586c338b77", architecture="Phi3ForCausalLM", params=3_800_000_000, target_modules=["qkv_proj", "o_proj", "gate_up_proj", "down_proj"], template="phi3", gguf_arch="phi3", tokenizer_pre="phi-2", license_spdx="MIT", license_url="https://huggingface.co/microsoft/Phi-3.5-mini-instruct/blob/main/LICENSE", requires_acceptance=False, redistributable=True, size_gb_fp16=7.6, context_length=131_072, recommended_seq_len=2048, ), BaseModelSpec( key="phi-4-mini-reasoning", hf_id="microsoft/Phi-4-mini-reasoning", revision="0e3b1e2d02ee478a3743abe3f629e9c0cb722e0a", architecture="Phi3ForCausalLM", params=3_800_000_000, target_modules=["qkv_proj", "o_proj", "gate_up_proj", "down_proj"], template="phi4mini", gguf_arch="phi3", tokenizer_pre="phi-2", license_spdx="MIT", license_url="https://huggingface.co/microsoft/Phi-4-mini-reasoning/blob/main/LICENSE", requires_acceptance=False, redistributable=True, size_gb_fp16=7.6, context_length=131_072, recommended_seq_len=2048, reasoning_tuned=True, ), # Mixtral-8x7B-Instruct-v0.1 — Apache-2.0 sparse MoE base. # # HF exposes this as `MixtralForCausalLM`, but the current vendored # llama.cpp converter routes it through the Llama path rather than a # distinct Mixtral architecture class. We therefore keep # `gguf_arch="llama"` while marking the modality as `text-moe` so # DLM's gate substrate can detect the sparse-MoE family explicitly. BaseModelSpec( key="mixtral-8x7b-instruct", hf_id="mistralai/Mixtral-8x7B-Instruct-v0.1", revision="eba92302a2861cdc0098cc54bc9f17cb2c47eb61", architecture="MixtralForCausalLM", params=46_700_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="mistral", gguf_arch="llama", tokenizer_pre="llama-bpe", license_spdx="Apache-2.0", license_url="https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1", requires_acceptance=False, redistributable=True, size_gb_fp16=93.4, context_length=32_768, recommended_seq_len=2048, modality="text-moe", ), # Mistral Small 3.1 24B Instruct — Apache-2.0 multimodal base with # native vision support and 128k context. # # An earlier draft treated this as text-only; the live HF config # is `Mistral3ForConditionalGeneration` with both text and # vision towers, so we register it as vision-language. The current # processor config pins `[IMG]` as the image placeholder and a # longest edge of 1540 px. DLM's current `VlPreprocessorPlan` # abstraction is fixed-size only, so we conservatively pin # 1540×1540 here until dynamic ranges land. BaseModelSpec( key="mistral-small-3.1-24b-instruct", hf_id="mistralai/Mistral-Small-3.1-24B-Instruct-2503", revision="68faf511d618ef198fef186659617cfd2eb8e33a", architecture="Mistral3ForConditionalGeneration", params=24_000_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="mistral", gguf_arch="mistral3", tokenizer_pre="tekken", license_spdx="Apache-2.0", license_url="https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503", requires_acceptance=False, redistributable=True, size_gb_fp16=48.0, context_length=131_072, recommended_seq_len=4096, modality="vision-language", vl_preprocessor_plan=VlPreprocessorPlan( target_size=(1540, 1540), resize_policy="fixed", image_token="[IMG]", num_image_tokens=3025, ), ), # --- Vision-language bases ---------------------------------------------- # PaliGemma-3B-mix-224 — Google's instruction-tuned VL base built on # Gemma-2B + SigLIP-So400m. Gated under the Gemma license; cannot # redistribute inside a `.dlm.pack` (same pattern as Llama-3.2). # Training targets Gemma's transformer blocks; the vision tower is # trained jointly when modules_to_save expands to ["embed_tokens", # "lm_head"], but the current entry keeps modules_to_save empty so # only the LLM-side LoRA adapters move — the vision tower is frozen. # # `gguf_arch` / `tokenizer_pre` are set to tags the current vendored # llama.cpp doesn't recognize; the export probes surface # UNSUPPORTED + refuse GGUF conversion until GGUF support lands. # HF-snapshot export (`dlm export --hf-snapshot`) still works. BaseModelSpec( key="paligemma-3b-mix-224", hf_id="google/paligemma-3b-mix-224", revision="d1d8734c9c3ad0ccfeea4afc270faa356c2ba515", architecture="PaliGemmaForConditionalGeneration", params=2_900_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="paligemma", gguf_arch="paligemma", tokenizer_pre="gemma", license_spdx="Other", license_url="https://ai.google.dev/gemma/terms", requires_acceptance=True, redistributable=False, size_gb_fp16=6.5, context_length=8_192, recommended_seq_len=2048, modality="vision-language", vl_preprocessor_plan=VlPreprocessorPlan( target_size=(224, 224), resize_policy="fixed", image_token="", num_image_tokens=256, ), ), # Qwen2-VL-2B-Instruct — Alibaba's Apache-2.0 VL base with dynamic- # resolution support in native HF. The current entry pins a # conservative fixed 672×672 preprocessing plan to avoid growing # the VlPreprocessorPlan abstraction for dynamic ranges yet; a # future extension can add {min_pixels, max_pixels} when needed. # # 672×672 with Qwen2-VL's 28-pixel patch-merger grid yields 24×24 = # 576 vision tokens per image. `<|image_pad|>` is the runtime # placeholder the processor expands into that window. # # Apache-2.0 (redistributable, no acceptance). `AutoModelForImageTextToText` # handles this arch natively since transformers ≥4.45 — same path # PaliGemma loads through. BaseModelSpec( key="qwen2-vl-2b-instruct", hf_id="Qwen/Qwen2-VL-2B-Instruct", revision="895c3a49bc3fa70a340399125c650a463535e71c", architecture="Qwen2VLForConditionalGeneration", params=2_200_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="qwen2-vl", gguf_arch="qwen2-vl", tokenizer_pre="qwen2", license_spdx="Apache-2.0", license_url="https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct/blob/main/LICENSE", requires_acceptance=False, redistributable=True, size_gb_fp16=4.5, context_length=32_768, recommended_seq_len=2048, modality="vision-language", vl_preprocessor_plan=VlPreprocessorPlan( target_size=(672, 672), resize_policy="fixed", image_token="<|image_pad|>", num_image_tokens=576, ), ), # InternVL2-2B — OpenGVLab's MIT-licensed 2B VL model. Uses fixed # 448×448 input (32×32 patch grid with 2×2 pixel-shuffle → 256 # vision tokens per image). # # **Security surface: trust_remote_code=True**. InternVL2's HF # integration is `InternVLChatModel`, a custom class defined in # `modeling_internvl_chat.py` inside the model repo — not in # transformers. Loading it requires executing that repo's code. # The loader sets `trust_remote_code=True` when this spec is # picked (`trust_remote_code` field below), so picking this base # as `base_model: internvl2-2b` in a .dlm is the user's # informed acknowledgment that remote code runs at load time. # The cookbook + vl-memory.md flag this too. BaseModelSpec( key="internvl2-2b", hf_id="OpenGVLab/InternVL2-2B", revision="e4f6747bd20f139e637642c6a058c6bd00b36919", architecture="InternVLChatModel", params=2_200_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="internvl2", gguf_arch="internvl2", tokenizer_pre="internvl2", license_spdx="MIT", license_url="https://huggingface.co/OpenGVLab/InternVL2-2B/blob/main/LICENSE", requires_acceptance=False, redistributable=True, trust_remote_code=True, size_gb_fp16=4.4, context_length=8_192, recommended_seq_len=2048, modality="vision-language", vl_preprocessor_plan=VlPreprocessorPlan( target_size=(448, 448), resize_policy="fixed", image_token="", num_image_tokens=256, ), ), BaseModelSpec( key="internvl3-2b", hf_id="OpenGVLab/InternVL3-2B", revision="899155015275a9b7338c7f4677e19c784e0e5a21", architecture="InternVLChatModel", params=2_000_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="internvl2", gguf_arch="internvl3", tokenizer_pre="internvl3", license_spdx="Apache-2.0", license_url="https://huggingface.co/OpenGVLab/InternVL3-2B", requires_acceptance=False, redistributable=True, trust_remote_code=True, size_gb_fp16=4.0, context_length=32_768, recommended_seq_len=2048, modality="vision-language", vl_preprocessor_plan=VlPreprocessorPlan( target_size=(448, 448), resize_policy="dynamic", image_token="", num_image_tokens=256, ), ), # --- Audio-language bases ----------------------------------------------- # Qwen2-Audio-7B-Instruct — Alibaba's open audio-text model. Uses # the Qwen2 LLM backbone + a dedicated audio encoder. Apache-2.0 # and currently ungated on HF, so the registry keeps it open and # redistributable like the other permissive Qwen rows. # # The 16 kHz pin + 30 s max-length match the training-time # defaults documented in the Qwen2-Audio card. Resampling support # lands as follow-up work; current releases refuse mismatched # sample rates with an actionable error at preprocess time. # # Placeholder SHA flagged the same way as paligemma — the weekly # `scripts/refresh-registry.py --check` run surfaces drift and a # maintainer pastes in the real SHA. BaseModelSpec( key="qwen2-audio-7b-instruct", hf_id="Qwen/Qwen2-Audio-7B-Instruct", revision="0a095220c30b7b31434169c3086508ef3ea5bf0a", architecture="Qwen2AudioForConditionalGeneration", params=8_400_000_000, target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], template="qwen2-audio", gguf_arch="qwen2-audio", tokenizer_pre="qwen2", license_spdx="Apache-2.0", license_url="https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct", requires_acceptance=False, redistributable=True, size_gb_fp16=15.5, context_length=8_192, recommended_seq_len=2048, modality="audio-language", audio_preprocessor_plan=AudioPreprocessorPlan( sample_rate=16_000, max_length_seconds=30.0, audio_token="<|AUDIO|>", num_audio_tokens=750, ), ), ) BASE_MODELS: Final[dict[str, BaseModelSpec]] = {entry.key: entry for entry in _ENTRIES} def known_keys() -> tuple[str, ...]: """Stable ordering for use in error messages / CLI listings.""" return tuple(BASE_MODELS.keys())