Refresh Sprint 40 closeout proofs
- SHA
29a900f3fb6e9f24b4830d7c0a9479b4f76a7a3d- Parents
-
fdc4063 - Tree
491766b
29a900f
29a900f3fb6e9f24b4830d7c0a9479b4f76a7a3dfdc4063
491766bdocs/cookbook/audio-training.mdmodified@@ -10,9 +10,6 @@ spoken-corpus workflow end-to-end: scaffold → drop clips + transcripts | ||
| 10 | 10 | 24 GB VRAM. Qwen2-Audio-7B-Instruct fp16 weighs ~15 GB; the 16 GB |
| 11 | 11 | consumer GPUs don't fit this base without quantization (4-bit audio |
| 12 | 12 | training is deferred). |
| 13 | -- A Hugging Face account with the [Qwen2-Audio-7B-Instruct terms | |
| 14 | - accepted](https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct) and | |
| 15 | - `HF_TOKEN` exported. | |
| 16 | 13 | - Qwen2-Audio cached locally (`huggingface-cli download |
| 17 | 14 | Qwen/Qwen2-Audio-7B-Instruct`). First train without this triggers |
| 18 | 15 | the download automatically. |
@@ -22,7 +19,7 @@ spoken-corpus workflow end-to-end: scaffold → drop clips + transcripts | ||
| 22 | 19 | ## Step 1 — Scaffold an audio `.dlm` |
| 23 | 20 | |
| 24 | 21 | ```bash |
| 25 | -dlm init my-audio.dlm --audio --i-accept-license | |
| 22 | +dlm init my-audio.dlm --audio | |
| 26 | 23 | ``` |
| 27 | 24 | |
| 28 | 25 | `--audio` pins the base to `qwen2-audio-7b-instruct` and emits a |
docs/cookbook/choosing-a-base.mdadded@@ -0,0 +1,37 @@ | ||
| 1 | +# Choosing a base | |
| 2 | + | |
| 3 | +The fastest way to pick a DLM base is to decide three things first: | |
| 4 | + | |
| 5 | +1. Do you need plain text, multimodal vision, or audio? | |
| 6 | +2. Do you want the most permissive license possible, or are gated rows fine? | |
| 7 | +3. Are you targeting Apple Silicon, a mid-size CUDA card, or a large CUDA box? | |
| 8 | + | |
| 9 | +## Quick picks | |
| 10 | + | |
| 11 | +| If you want… | Start with… | Why | | |
| 12 | +|---|---|---| | |
| 13 | +| Fast local iteration on almost any laptop | `smollm2-135m` | Tiny, cheap, and ideal for testing authoring loops. | | |
| 14 | +| Best general-purpose 2026 text base around the 4B tier | `qwen3-4b` | Strong default quality, permissive license, and current-generation tokenizer/chat behavior. | | |
| 15 | +| A reasoning-first 1.7B profile | `qwen3-1.7b-thinking` | Same upstream Qwen3 weights, but a curated reasoning-profile key with cooler defaults. | | |
| 16 | +| Fully open-model story | `olmo-2-7b-instruct` | Open weights and open-data lineage make it the cleanest reproducibility pitch. | | |
| 17 | +| Apache sparse-MoE experiments | `mixtral-8x7b-instruct` | First `text-moe` row in the registry; pairs with the learned gate work. | | |
| 18 | +| Small gated text base | `gemma-2-2b-it` | Useful when Gemma’s instruction style or ecosystem matters more than license friction. | | |
| 19 | +| Larger gated text base | `gemma-2-9b-it` | Upper-tier Gemma pick; large enough to want real GPU planning. | | |
| 20 | +| Large multimodal capability | `mistral-small-3.1-24b-instruct` | Strongest shipped VL row, but large-CUDA-first. | | |
| 21 | +| Safe default multimodal row on a smaller box | `qwen2-vl-2b-instruct` | Permissive, solid, and compatible with the current generic VL runtime. | | |
| 22 | +| Audio-language training | `qwen2-audio-7b-instruct` | Current shipped audio row; open-license and no longer gated on HF. | | |
| 23 | + | |
| 24 | +## Notes on the sharp edges | |
| 25 | + | |
| 26 | +- `llama-3.3-8b-instruct` is still treated like the Llama family in DLM’s policy surface: acceptance required, not redistributable, and intended for users who already know they want the Llama line. | |
| 27 | +- `internvl2-2b` and `internvl3-2b` are registry-visible planning targets, but the current generic VL runtime still refuses the InternVL family until DLM owns its custom processor/collator contract. | |
| 28 | +- `mistral-small-3.1-24b-instruct` is intentionally refused on MPS by default. It is a real shipped row, just not a casual laptop target. | |
| 29 | + | |
| 30 | +## Hardware-first view | |
| 31 | + | |
| 32 | +- Apple Silicon, 16 GB: `smollm2-*`, `qwen2.5-*`, `qwen3-1.7b`, and `qwen3-4b` are the comfortable text picks; `qwen2-vl-2b-instruct` is the safer VL row. | |
| 33 | +- Apple Silicon, 32 GB+: `qwen3-8b`, `gemma-2-2b-it`, and `phi-4-mini-reasoning` become practical. Large VL rows still need caution. | |
| 34 | +- CUDA, 24 GB: this is where `gemma-2-9b-it`, `mixtral-8x7b-instruct`, and the heavier multimodal rows start becoming realistic. | |
| 35 | +- CUDA, 48 GB+: this is the intended home for `mistral-small-3.1-24b-instruct`. | |
| 36 | + | |
| 37 | +See [hardware/memory-estimates](../hardware/memory-estimates.md) for the text-family budget table and [hardware/vl-memory](../hardware/vl-memory.md) for the VL rows. | |
docs/cookbook/multimodal-training.mdmodified@@ -28,7 +28,7 @@ drop real images into that path before the first train). | ||
| 28 | 28 | |
| 29 | 29 | ### Picking a different VL base |
| 30 | 30 | |
| 31 | -Four VL bases ship in the registry today: | |
| 31 | +Five VL bases ship in the registry today: | |
| 32 | 32 | |
| 33 | 33 | ```bash |
| 34 | 34 | # Permissive + Apache-2.0 + strong general-purpose VL (pinned 672²): |
@@ -37,6 +37,9 @@ dlm init my-diagrams.dlm --multimodal --base qwen2-vl-2b-instruct | ||
| 37 | 37 | # MIT-licensed, smallest per-image footprint (448²): |
| 38 | 38 | dlm init my-diagrams.dlm --multimodal --base internvl2-2b |
| 39 | 39 | |
| 40 | +# Newer InternVL planning row (dynamic 448-tiling, still runtime-deferred): | |
| 41 | +dlm init my-diagrams.dlm --multimodal --base internvl3-2b | |
| 42 | + | |
| 40 | 43 | # Largest-capability VL row, CUDA-first (pinned 1540²): |
| 41 | 44 | dlm init my-diagrams.dlm --multimodal --base mistral-small-3.1-24b-instruct |
| 42 | 45 | |
@@ -50,8 +53,10 @@ base-selection matrix. **Heads-up on InternVL2**: the row is visible in | ||
| 50 | 53 | the registry, but on the current stack DLM now refuses it for actual |
| 51 | 54 | prompt/train/HF-snapshot-export work. The upstream family still needs a |
| 52 | 55 | custom processor/collator path for its tokenizer-only `AutoProcessor`, |
| 53 | -`<image>` expansion, and `image_flags` forward contract. That same | |
| 54 | -family gap is the reason `internvl3-2b` has not been added yet. | |
| 56 | +`<image>` expansion, and `image_flags` forward contract. The same | |
| 57 | +family gap applies to `internvl3-2b` as well: it is now registry- | |
| 58 | +visible and scaffoldable, but the generic runtime still refuses the | |
| 59 | +whole InternVL family until DLM owns that custom contract. | |
| 55 | 60 | **Heads-up on Mistral Small 3.1**: it is a real VL registry row now, |
| 56 | 61 | but it is intentionally treated as a large-CUDA-first base. `dlm |
| 57 | 62 | doctor` refuses it on Apple Silicon by default unless you explicitly |
@@ -147,8 +152,9 @@ coverage of the base's arch class and routes to one of three paths: | ||
| 147 | 152 | None of the registered bases hit this verdict at the pinned tag. |
| 148 | 153 | - **UNSUPPORTED** — llama.cpp doesn't know the arch at all. Falls |
| 149 | 154 | back to HF-snapshot with an actionable banner naming the arch |
| 150 | - class and the vendored tag. **paligemma-3b-mix-224** and | |
| 151 | - **internvl2-2b** are UNSUPPORTED at the pinned tag. | |
| 155 | + class and the vendored tag. **paligemma-3b-mix-224**, | |
| 156 | + **internvl2-2b**, and **internvl3-2b** are UNSUPPORTED at the | |
| 157 | + pinned tag. | |
| 152 | 158 | |
| 153 | 159 | See [docs/hardware/vl-memory.md](../hardware/vl-memory.md#llamacpp-gguf-support-matrix-sprint-354) |
| 154 | 160 | for the current support verdicts; bump the vendored tag with |
docs/format/frontmatter.mdmodified@@ -35,12 +35,12 @@ it; don't edit it by hand. | ||
| 35 | 35 | The shipped registry is broader than this quick-start table. Current |
| 36 | 36 | additions include: |
| 37 | 37 | |
| 38 | -- 2026 text-family refresh rows: `qwen3-1.7b`, `qwen3-4b`, `qwen3-8b`, | |
| 39 | - `llama-3.3-8b-instruct`, `phi-4-mini-reasoning`, `gemma-2-2b-it`, | |
| 40 | - `gemma-2-9b-it`, `smollm3-3b`, `olmo-2-7b-instruct`, and | |
| 41 | - `mixtral-8x7b-instruct`. | |
| 38 | +- 2026 text-family refresh rows: `qwen3-1.7b`, `qwen3-1.7b-thinking`, | |
| 39 | + `qwen3-4b`, `qwen3-8b`, `llama-3.3-8b-instruct`, | |
| 40 | + `phi-4-mini-reasoning`, `gemma-2-2b-it`, `gemma-2-9b-it`, | |
| 41 | + `smollm3-3b`, `olmo-2-7b-instruct`, and `mixtral-8x7b-instruct`. | |
| 42 | 42 | - Vision-language rows: `paligemma-3b-mix-224`, |
| 43 | - `qwen2-vl-2b-instruct`, `internvl2-2b`, and | |
| 43 | + `qwen2-vl-2b-instruct`, `internvl2-2b`, `internvl3-2b`, and | |
| 44 | 44 | `mistral-small-3.1-24b-instruct`. |
| 45 | 45 | - Audio-language row: `qwen2-audio-7b-instruct`. |
| 46 | 46 | |
docs/hardware/memory-estimates.mdadded@@ -0,0 +1,38 @@ | ||
| 1 | +# Memory estimates | |
| 2 | + | |
| 3 | +These are planning numbers, not a promise. DLM’s doctor still does the | |
| 4 | +real refusal/fit decision, but the table below is the quick mental map | |
| 5 | +for the Sprint 40 refresh rows that changed the most user expectations. | |
| 6 | + | |
| 7 | +## Text-family checkpoints | |
| 8 | + | |
| 9 | +| Base | fp16 weights | Practical target | | |
| 10 | +|---|---:|---| | |
| 11 | +| `qwen3-8b` | ~16 GB | 24 GB CUDA or high-memory Apple Silicon for LoRA; lighter inference on smaller boxes. | | |
| 12 | +| `llama-3.3-8b-instruct` | ~16.5 GB | Same class as other 8B text rows: real GPU planning required for training. | | |
| 13 | +| `gemma-2-9b-it` | ~18 GB | 24 GB CUDA is the comfortable floor. | | |
| 14 | +| `mistral-small-3.1-24b-instruct` | ~48 GB | Large-CUDA-first. Refused on MPS by default unless forced. | | |
| 15 | + | |
| 16 | +## What the doctor is approximating | |
| 17 | + | |
| 18 | +For LoRA/QLoRA, the planner estimates: | |
| 19 | + | |
| 20 | +- base weights at the chosen load precision | |
| 21 | +- activation memory from `sequence_len × micro_batch × layers` | |
| 22 | +- optimizer state for the trainable adapter params | |
| 23 | +- LoRA parameter storage | |
| 24 | +- a 20% safety margin on top | |
| 25 | + | |
| 26 | +That estimator lives in `src/dlm/hardware/memory.py` and is intentionally conservative. | |
| 27 | + | |
| 28 | +## Rules of thumb | |
| 29 | + | |
| 30 | +- 8B-class rows are where laptop experimentation starts turning into real hardware planning. | |
| 31 | +- 9B-class rows are usually fine on 24 GB CUDA, but not “casual” on smaller hosts. | |
| 32 | +- 24B-class rows are not broad consumer defaults. In DLM they are treated as explicit high-capacity picks. | |
| 33 | +- MPS can be surprisingly good for text LoRA, but DLM now refuses oversized bases like `mistral-small-3.1-24b-instruct` by default because unified memory headroom disappears too quickly. | |
| 34 | + | |
| 35 | +## Related | |
| 36 | + | |
| 37 | +- [Choosing a base](../cookbook/choosing-a-base.md) | |
| 38 | +- [Vision-language memory budget](vl-memory.md) | |
docs/hardware/vl-memory.mdmodified@@ -1,7 +1,7 @@ | ||
| 1 | 1 | # Vision-language memory budget |
| 2 | 2 | |
| 3 | -Four VL rows now ship in the registry: **PaliGemma-3B-mix-224**, | |
| 4 | -**Qwen2-VL-2B-Instruct**, **InternVL2-2B**, and | |
| 3 | +Five VL rows now ship in the registry: **PaliGemma-3B-mix-224**, | |
| 4 | +**Qwen2-VL-2B-Instruct**, **InternVL2-2B**, **InternVL3-2B**, and | |
| 5 | 5 | **Mistral-Small-3.1-24B-Instruct-2503**. Each row carries a pinned |
| 6 | 6 | preprocessing plan; dynamic-resolution support (Qwen2-VL's native |
| 7 | 7 | capability, Mistral Small 3.1's longer-edge policy, and the broader |
@@ -10,9 +10,10 @@ work so the current `VlPreprocessorPlan` cache key stays stable. | ||
| 10 | 10 | |
| 11 | 11 | **Reality check.** The generic VL train/prompt path is complete today |
| 12 | 12 | for PaliGemma, Qwen2-VL, and Mistral Small 3.1. InternVL2 remains |
| 13 | -registry-visible for planning and future support, but on the current | |
| 14 | -transformers stack its HF path still exposes a tokenizer-only | |
| 15 | -`AutoProcessor` and needs a custom collator/runtime contract. DLM now | |
| 13 | +registry-visible for planning and future support, and InternVL3 now | |
| 14 | +joins it under the same honest caveat: on the current transformers | |
| 15 | +stack the InternVL family still exposes a tokenizer-only | |
| 16 | +`AutoProcessor` and needs a custom collator/runtime contract. DLM | |
| 16 | 17 | refuses that family with a clear error instead of pretending the |
| 17 | 18 | generic VL path is enough. |
| 18 | 19 | |
@@ -23,6 +24,7 @@ generic VL path is enough. | ||
| 23 | 24 | | paligemma-3b-mix-224 | Gemma (gated) | The cleanest PEFT path + proven chart/doc QA; accept the Gemma license first. | |
| 24 | 25 | | qwen2-vl-2b-instruct | Apache-2.0 | Permissive licensing + strong general-purpose VL; dynamic-res is capped to 672² in v1 but native runtime supports more. | |
| 25 | 26 | | internvl2-2b | MIT | Registry-visible planning target for a future custom InternVL path; current train/prompt/export-snapshot flows refuse it on this stack. | |
| 27 | +| internvl3-2b | Apache-2.0 | Newer InternVL planning target with dynamic 448-tiling and `trust_remote_code`; currently registry-visible but still refused by the generic runtime. | | |
| 26 | 28 | | mistral-small-3.1-24b-instruct | Apache-2.0 | Highest-capability VL row in the registry today; targets large CUDA boxes first and is refused on MPS by default unless you explicitly force it. | |
| 27 | 29 | |
| 28 | 30 | ## PaliGemma-3B-mix-224 (224×224, fp16) |
@@ -70,11 +72,14 @@ between vision + text tokens. Gradient checkpointing on the tower | ||
| 70 | 72 | trims ~30% of peak; `training.gradient_checkpointing: true` in |
| 71 | 73 | frontmatter enables it. |
| 72 | 74 | |
| 73 | -## InternVL2-2B (448×448, fp16) | |
| 75 | +## InternVL2-2B / InternVL3-2B (448×448, fp16) | |
| 74 | 76 | |
| 75 | 77 | InternVL2 uses ViT-L/14 + pixel-shuffle 2×2 so 448² input yields 256 |
| 76 | 78 | image tokens per 448-tile — the smallest InternVL-family budget and |
| 77 | -the cheapest of the four rows on paper. | |
| 79 | +the cheapest of the registry rows on paper. InternVL3 keeps the same | |
| 80 | +448 target size but switches the registry row to `resize_policy: | |
| 81 | +dynamic` and a user-visible `<image>` placeholder while still | |
| 82 | +expanding into the same hidden InternVL context window at runtime. | |
| 78 | 83 | |
| 79 | 84 | | Config | Base weights | Adapter | Activations | Total (peak) | |
| 80 | 85 | |-----------------|-------------:|--------:|------------:|-------------:| |
@@ -86,15 +91,15 @@ the cheapest of the four rows on paper. | ||
| 86 | 91 | memory alone. 12 GB CUDA would handle batch=1; 16 GB CUDA would handle |
| 87 | 92 | batch=4. |
| 88 | 93 | |
| 89 | -**Current runtime status.** This row is not trainable/promptable via | |
| 90 | -the generic VL path today. InternVL2 ships as `InternVLChatModel`, a | |
| 91 | -custom remote-code family whose upstream runtime expands `<image>` into | |
| 92 | -repeated `<IMG_CONTEXT>` spans and threads `image_flags` through the | |
| 93 | -forward pass. On the current stack, `AutoProcessor.from_pretrained(...)` | |
| 94 | -resolves to a tokenizer-only object, so DLM refuses the family early | |
| 95 | -instead of failing later inside the model. Keep the budget numbers here | |
| 96 | -for planning, but use PaliGemma, Qwen2-VL, or Mistral Small 3.1 for | |
| 97 | -actual runs today. | |
| 94 | +**Current runtime status.** These rows are not trainable/promptable via | |
| 95 | +the generic VL path today. InternVL2 and InternVL3 both ship as | |
| 96 | +`InternVLChatModel`, a custom remote-code family whose upstream runtime | |
| 97 | +expands `<image>` into repeated `<IMG_CONTEXT>` spans and threads | |
| 98 | +`image_flags` through the forward pass. On the current stack, | |
| 99 | +`AutoProcessor.from_pretrained(...)` resolves to a tokenizer-only | |
| 100 | +object, so DLM refuses the family early instead of failing later inside | |
| 101 | +the model. Keep the budget numbers here for planning, but use | |
| 102 | +PaliGemma, Qwen2-VL, or Mistral Small 3.1 for actual runs today. | |
| 98 | 103 | |
| 99 | 104 | ## Mistral Small 3.1 24B Instruct (pinned 1540×1540, fp16) |
| 100 | 105 | |
@@ -129,6 +134,7 @@ by `scripts/bump-llama-cpp.sh bump <tag>`): | ||
| 129 | 134 | | paligemma-3b-mix-224 | PaliGemmaForConditionalGeneration | UNSUPPORTED | |
| 130 | 135 | | qwen2-vl-2b-instruct | Qwen2VLForConditionalGeneration | SUPPORTED | |
| 131 | 136 | | internvl2-2b | InternVLChatModel | UNSUPPORTED | |
| 137 | +| internvl3-2b | InternVLChatModel | UNSUPPORTED | | |
| 132 | 138 | |
| 133 | 139 | **UNSUPPORTED** means `dlm export` falls back to the HF-snapshot path |
| 134 | 140 | with an actionable banner. **SUPPORTED** means single-file VL GGUF |
@@ -175,6 +181,7 @@ with the preprocessing plan: | ||
| 175 | 181 | |---------------------------|------------:|----------------:| |
| 176 | 182 | | paligemma-3b-mix-224 | 224×224 | ~0.5 MB | |
| 177 | 183 | | internvl2-2b | 448×448 | ~2.0 MB | |
| 184 | +| internvl3-2b | 448×448 | ~2.0 MB | | |
| 178 | 185 | | qwen2-vl-2b-instruct | 672×672 | ~4.5 MB | |
| 179 | 186 | | mistral-small-3.1-24b-instruct | 1540×1540 | ~23.5 MB | |
| 180 | 187 | |
docs/index.mdmodified@@ -19,10 +19,11 @@ you to run a 70B model you can't afford. DLM sits in the gap: | ||
| 19 | 19 | control is both the prose you're training on and the configuration |
| 20 | 20 | for how the training runs. Edit, retrain, share. |
| 21 | 21 | - **Real pretrained bases.** SmolLM2-135M for fast iteration; newer |
| 22 | - registry rows like Qwen3, Llama 3.3, Gemma 2, SmolLM3, Phi-4-mini- | |
| 23 | - reasoning, OLMo-2, Mixtral, and Mistral Small 3.1 cover current | |
| 24 | - text, sparse-MoE, and multimodal use cases. No from-scratch | |
| 25 | - transformers, no toy experiments. | |
| 22 | + registry rows like Qwen3 (including a reasoning-profile key), | |
| 23 | + Llama 3.3, Gemma 2, SmolLM3, Phi-4-mini-reasoning, OLMo-2, Mixtral, | |
| 24 | + Mistral Small 3.1, and InternVL3 cover current text, sparse-MoE, | |
| 25 | + and multimodal planning use cases. No from-scratch transformers, | |
| 26 | + no toy experiments. | |
| 26 | 27 | - **Deterministic by contract.** Same document + same hardware tier + |
| 27 | 28 | pinned versions produce bit-identical adapters. [Determinism](determinism.md) |
| 28 | 29 | is a first-class feature. |
mkdocs.ymlmodified@@ -62,6 +62,7 @@ nav: | ||
| 62 | 62 | - .dlm/ignore: format/dlm-ignore.md |
| 63 | 63 | - CLI reference: cli/reference.md |
| 64 | 64 | - Cookbook: |
| 65 | + - Choosing a base: cookbook/choosing-a-base.md | |
| 65 | 66 | - Coding tutor: cookbook/coding-tutor.md |
| 66 | 67 | - Domain knowledge base: cookbook/domain-kb.md |
| 67 | 68 | - Writing partner: cookbook/writing-partner.md |
@@ -82,5 +83,7 @@ nav: | ||
| 82 | 83 | - Architecture: architecture.md |
| 83 | 84 | - Determinism: determinism.md |
| 84 | 85 | - Hardware: |
| 86 | + - Memory estimates: hardware/memory-estimates.md | |
| 87 | + - Vision-language memory: hardware/vl-memory.md | |
| 85 | 88 | - AMD ROCm: hardware/rocm.md |
| 86 | 89 | - Troubleshooting: troubleshooting.md |
src/dlm/base_models/registry.pymodified@@ -107,10 +107,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 107 | 107 | BaseModelSpec( |
| 108 | 108 | key="qwen3-1.7b", |
| 109 | 109 | hf_id="Qwen/Qwen3-1.7B", |
| 110 | - # Placeholder SHA: format-valid, not a real HF commit. The | |
| 111 | - # weekly `scripts/refresh-registry.py --check` run surfaces | |
| 112 | - # drift and prints the live value for manual review. | |
| 113 | - revision="1a2b3c4d5e6f7890abcdeffedcba0987654321ab", | |
| 110 | + revision="70d244cc86ccca08cf5af4e1e306ecf908b1ad5e", | |
| 114 | 111 | architecture="Qwen3ForCausalLM", |
| 115 | 112 | params=1_700_000_000, |
| 116 | 113 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -126,10 +123,29 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 126 | 123 | recommended_seq_len=2048, |
| 127 | 124 | reasoning_tuned=True, |
| 128 | 125 | ), |
| 126 | + BaseModelSpec( | |
| 127 | + key="qwen3-1.7b-thinking", | |
| 128 | + hf_id="Qwen/Qwen3-1.7B", | |
| 129 | + revision="70d244cc86ccca08cf5af4e1e306ecf908b1ad5e", | |
| 130 | + architecture="Qwen3ForCausalLM", | |
| 131 | + params=1_700_000_000, | |
| 132 | + target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], | |
| 133 | + template="qwen3thinking", | |
| 134 | + gguf_arch="qwen3", | |
| 135 | + tokenizer_pre="qwen2", | |
| 136 | + license_spdx="Apache-2.0", | |
| 137 | + license_url="https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE", | |
| 138 | + requires_acceptance=False, | |
| 139 | + redistributable=True, | |
| 140 | + size_gb_fp16=3.4, | |
| 141 | + context_length=32_768, | |
| 142 | + recommended_seq_len=2048, | |
| 143 | + reasoning_tuned=True, | |
| 144 | + ), | |
| 129 | 145 | BaseModelSpec( |
| 130 | 146 | key="qwen3-4b", |
| 131 | 147 | hf_id="Qwen/Qwen3-4B", |
| 132 | - revision="2b3c4d5e6f7890abcdeffedcba0987654321abc2", | |
| 148 | + revision="1cfa9a7208912126459214e8b04321603b3df60c", | |
| 133 | 149 | architecture="Qwen3ForCausalLM", |
| 134 | 150 | params=4_000_000_000, |
| 135 | 151 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -148,7 +164,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 148 | 164 | BaseModelSpec( |
| 149 | 165 | key="qwen3-8b", |
| 150 | 166 | hf_id="Qwen/Qwen3-8B", |
| 151 | - revision="3c4d5e6f7890abcdeffedcba0987654321abc2d3", | |
| 167 | + revision="b968826d9c46dd6066d109eabc6255188de91218", | |
| 152 | 168 | architecture="Qwen3ForCausalLM", |
| 153 | 169 | params=8_000_000_000, |
| 154 | 170 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -225,10 +241,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 225 | 241 | BaseModelSpec( |
| 226 | 242 | key="smollm3-3b", |
| 227 | 243 | hf_id="HuggingFaceTB/SmolLM3-3B", |
| 228 | - # Placeholder SHA: format-valid, not a real HF commit. The | |
| 229 | - # weekly `scripts/refresh-registry.py --check` run surfaces | |
| 230 | - # drift and prints the live value for manual review. | |
| 231 | - revision="5e6f7890abcdeffedcba0987654321abc2d3e4f5", | |
| 244 | + revision="a07cc9a04f16550a088caea529712d1d335b0ac1", | |
| 232 | 245 | architecture="SmolLM3ForCausalLM", |
| 233 | 246 | params=3_000_000_000, |
| 234 | 247 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -247,10 +260,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 247 | 260 | BaseModelSpec( |
| 248 | 261 | key="olmo-2-7b-instruct", |
| 249 | 262 | hf_id="allenai/OLMo-2-1124-7B-Instruct", |
| 250 | - # Placeholder SHA: format-valid, not a real HF commit. The | |
| 251 | - # weekly `scripts/refresh-registry.py --check` run surfaces | |
| 252 | - # drift and prints the live value for manual review. | |
| 253 | - revision="6f7890abcdeffedcba0987654321abc2d3e4f5a6", | |
| 263 | + revision="470b1fba1ae01581f270116362ee4aa1b97f4c84", | |
| 254 | 264 | architecture="Olmo2ForCausalLM", |
| 255 | 265 | params=7_000_000_000, |
| 256 | 266 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -268,10 +278,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 268 | 278 | BaseModelSpec( |
| 269 | 279 | key="gemma-2-2b-it", |
| 270 | 280 | hf_id="google/gemma-2-2b-it", |
| 271 | - # Placeholder SHA: format-valid, not a real HF commit. The | |
| 272 | - # weekly `scripts/refresh-registry.py --check` run surfaces | |
| 273 | - # drift and prints the live value for manual review. | |
| 274 | - revision="7a890abcdeffedcba0987654321abc2d3e4f5a6b", | |
| 281 | + revision="299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", | |
| 275 | 282 | architecture="Gemma2ForCausalLM", |
| 276 | 283 | params=2_600_000_000, |
| 277 | 284 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -289,10 +296,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 289 | 296 | BaseModelSpec( |
| 290 | 297 | key="gemma-2-9b-it", |
| 291 | 298 | hf_id="google/gemma-2-9b-it", |
| 292 | - # Placeholder SHA: format-valid, not a real HF commit. The | |
| 293 | - # weekly `scripts/refresh-registry.py --check` run surfaces | |
| 294 | - # drift and prints the live value for manual review. | |
| 295 | - revision="8f90abcdeffedcba0987654321abc2d3e4f5a6b7", | |
| 299 | + revision="11c9b309abf73637e4b6f9a3fa1e92e615547819", | |
| 296 | 300 | architecture="Gemma2ForCausalLM", |
| 297 | 301 | params=9_000_000_000, |
| 298 | 302 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -382,10 +386,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 382 | 386 | BaseModelSpec( |
| 383 | 387 | key="phi-4-mini-reasoning", |
| 384 | 388 | hf_id="microsoft/Phi-4-mini-reasoning", |
| 385 | - # Placeholder SHA: format-valid, not a real HF commit. The | |
| 386 | - # weekly `scripts/refresh-registry.py --check` run surfaces | |
| 387 | - # drift and prints the live value for manual review. | |
| 388 | - revision="9a0bcdeffedcba0987654321abc2d3e4f5a6b7c8", | |
| 389 | + revision="0e3b1e2d02ee478a3743abe3f629e9c0cb722e0a", | |
| 389 | 390 | architecture="Phi3ForCausalLM", |
| 390 | 391 | params=3_800_000_000, |
| 391 | 392 | target_modules=["qkv_proj", "o_proj", "gate_up_proj", "down_proj"], |
@@ -411,10 +412,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 411 | 412 | BaseModelSpec( |
| 412 | 413 | key="mixtral-8x7b-instruct", |
| 413 | 414 | hf_id="mistralai/Mixtral-8x7B-Instruct-v0.1", |
| 414 | - # Placeholder SHA: format-valid, not a real HF commit. The | |
| 415 | - # weekly `scripts/refresh-registry.py --check` run surfaces | |
| 416 | - # drift and prints the live value for manual review. | |
| 417 | - revision="bc0deffedcba0987654321abc2d3e4f5a6b7c8d9", | |
| 415 | + revision="eba92302a2861cdc0098cc54bc9f17cb2c47eb61", | |
| 418 | 416 | architecture="MixtralForCausalLM", |
| 419 | 417 | params=46_700_000_000, |
| 420 | 418 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -443,10 +441,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 443 | 441 | BaseModelSpec( |
| 444 | 442 | key="mistral-small-3.1-24b-instruct", |
| 445 | 443 | hf_id="mistralai/Mistral-Small-3.1-24B-Instruct-2503", |
| 446 | - # Placeholder SHA: format-valid, not a real HF commit. The | |
| 447 | - # weekly `scripts/refresh-registry.py --check` run surfaces | |
| 448 | - # drift and prints the live value for manual review. | |
| 449 | - revision="ab0cdeffedcba0987654321abc2d3e4f5a6b7c8d", | |
| 444 | + revision="68faf511d618ef198fef186659617cfd2eb8e33a", | |
| 450 | 445 | architecture="Mistral3ForConditionalGeneration", |
| 451 | 446 | params=24_000_000_000, |
| 452 | 447 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -484,14 +479,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 484 | 479 | BaseModelSpec( |
| 485 | 480 | key="paligemma-3b-mix-224", |
| 486 | 481 | hf_id="google/paligemma-3b-mix-224", |
| 487 | - # Placeholder SHA: format-valid, not a real HF commit. The | |
| 488 | - # weekly `scripts/refresh-registry.py --check` run surfaces | |
| 489 | - # it as drift; a maintainer pastes in the observed SHA from | |
| 490 | - # the script's output. Offline probe tests skip cleanly | |
| 491 | - # until then (see tests/unit/base_models/test_vl_registry.py). | |
| 492 | - # To verify, run: | |
| 493 | - # uv run python scripts/refresh-registry.py --check | |
| 494 | - revision="8d2f7bc9c15d71a00c14f9eb7e4c7b99c79e0a11", | |
| 482 | + revision="d1d8734c9c3ad0ccfeea4afc270faa356c2ba515", | |
| 495 | 483 | architecture="PaliGemmaForConditionalGeneration", |
| 496 | 484 | params=2_900_000_000, |
| 497 | 485 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -529,10 +517,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 529 | 517 | BaseModelSpec( |
| 530 | 518 | key="qwen2-vl-2b-instruct", |
| 531 | 519 | hf_id="Qwen/Qwen2-VL-2B-Instruct", |
| 532 | - # Placeholder SHA (format-valid, not a real commit). See the | |
| 533 | - # paligemma entry for the self-healing workflow via | |
| 534 | - # `scripts/refresh-registry.py --check`. | |
| 535 | - revision="c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9", | |
| 520 | + revision="895c3a49bc3fa70a340399125c650a463535e71c", | |
| 536 | 521 | architecture="Qwen2VLForConditionalGeneration", |
| 537 | 522 | params=2_200_000_000, |
| 538 | 523 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -570,8 +555,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 570 | 555 | BaseModelSpec( |
| 571 | 556 | key="internvl2-2b", |
| 572 | 557 | hf_id="OpenGVLab/InternVL2-2B", |
| 573 | - # Placeholder SHA (format-valid, not a real commit). | |
| 574 | - revision="d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0", | |
| 558 | + revision="e4f6747bd20f139e637642c6a058c6bd00b36919", | |
| 575 | 559 | architecture="InternVLChatModel", |
| 576 | 560 | params=2_200_000_000, |
| 577 | 561 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -594,14 +578,37 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 594 | 578 | num_image_tokens=256, |
| 595 | 579 | ), |
| 596 | 580 | ), |
| 581 | + BaseModelSpec( | |
| 582 | + key="internvl3-2b", | |
| 583 | + hf_id="OpenGVLab/InternVL3-2B", | |
| 584 | + revision="899155015275a9b7338c7f4677e19c784e0e5a21", | |
| 585 | + architecture="InternVLChatModel", | |
| 586 | + params=2_000_000_000, | |
| 587 | + target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], | |
| 588 | + template="internvl2", | |
| 589 | + gguf_arch="internvl3", | |
| 590 | + tokenizer_pre="internvl3", | |
| 591 | + license_spdx="Apache-2.0", | |
| 592 | + license_url="https://huggingface.co/OpenGVLab/InternVL3-2B", | |
| 593 | + requires_acceptance=False, | |
| 594 | + redistributable=True, | |
| 595 | + trust_remote_code=True, | |
| 596 | + size_gb_fp16=4.0, | |
| 597 | + context_length=32_768, | |
| 598 | + recommended_seq_len=2048, | |
| 599 | + modality="vision-language", | |
| 600 | + vl_preprocessor_plan=VlPreprocessorPlan( | |
| 601 | + target_size=(448, 448), | |
| 602 | + resize_policy="dynamic", | |
| 603 | + image_token="<image>", | |
| 604 | + num_image_tokens=256, | |
| 605 | + ), | |
| 606 | + ), | |
| 597 | 607 | # --- Audio-language bases ----------------------------------------------- |
| 598 | 608 | # Qwen2-Audio-7B-Instruct — Alibaba's open audio-text model. Uses |
| 599 | 609 | # the Qwen2 LLM backbone + a dedicated audio encoder. Apache-2.0 |
| 600 | - # but the 7B checkpoint is gated on HF via license acceptance, so | |
| 601 | - # `requires_acceptance=True` flows through the same pattern the | |
| 602 | - # Llama-3.2 / PaliGemma entries use. Redistributable under | |
| 603 | - # Apache-2.0, but not-bundled-by-default because the pack size | |
| 604 | - # (~14 GB fp16) dominates the tarball. | |
| 610 | + # and currently ungated on HF, so the registry keeps it open and | |
| 611 | + # redistributable like the other permissive Qwen rows. | |
| 605 | 612 | # |
| 606 | 613 | # The 16 kHz pin + 30 s max-length match the training-time |
| 607 | 614 | # defaults documented in the Qwen2-Audio card. Resampling support |
@@ -614,10 +621,7 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 614 | 621 | BaseModelSpec( |
| 615 | 622 | key="qwen2-audio-7b-instruct", |
| 616 | 623 | hf_id="Qwen/Qwen2-Audio-7B-Instruct", |
| 617 | - # Placeholder SHA (format-valid, not a real commit). See the | |
| 618 | - # paligemma entry for the self-healing workflow via | |
| 619 | - # `scripts/refresh-registry.py --check`. | |
| 620 | - revision="a1b2c3d4e5f678901234567890abcdef01234567", | |
| 624 | + revision="0a095220c30b7b31434169c3086508ef3ea5bf0a", | |
| 621 | 625 | architecture="Qwen2AudioForConditionalGeneration", |
| 622 | 626 | params=8_400_000_000, |
| 623 | 627 | target_modules=["q_proj", "k_proj", "v_proj", "o_proj"], |
@@ -626,8 +630,8 @@ _ENTRIES: tuple[BaseModelSpec, ...] = ( | ||
| 626 | 630 | tokenizer_pre="qwen2", |
| 627 | 631 | license_spdx="Apache-2.0", |
| 628 | 632 | license_url="https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct", |
| 629 | - requires_acceptance=True, | |
| 630 | - redistributable=False, | |
| 633 | + requires_acceptance=False, | |
| 634 | + redistributable=True, | |
| 631 | 635 | size_gb_fp16=15.5, |
| 632 | 636 | context_length=8_192, |
| 633 | 637 | recommended_seq_len=2048, |
src/dlm/base_models/resolver.pymodified@@ -30,6 +30,7 @@ from dlm.base_models.schema import BaseModelSpec | ||
| 30 | 30 | |
| 31 | 31 | TemplateDialect = Literal[ |
| 32 | 32 | "chatml", |
| 33 | + "qwen3thinking", | |
| 33 | 34 | "gemma2", |
| 34 | 35 | "smollm3", |
| 35 | 36 | "olmo2", |
src/dlm/base_models/schema.pymodified@@ -101,6 +101,7 @@ class BaseModelSpec(BaseModel): | ||
| 101 | 101 | target_modules: list[str] = Field(..., min_length=1) |
| 102 | 102 | template: Literal[ |
| 103 | 103 | "chatml", |
| 104 | + "qwen3thinking", | |
| 104 | 105 | "gemma2", |
| 105 | 106 | "smollm3", |
| 106 | 107 | "olmo2", |
src/dlm/base_models/templates/qwen3thinking.jinjaadded@@ -0,0 +1,14 @@ | ||
| 1 | +{# | |
| 2 | +Qwen3 reasoning-profile reference template. | |
| 3 | + | |
| 4 | +Upstream keeps ChatML framing for request construction; the profile | |
| 5 | +delta is the reasoning-first sampling/runtime behavior rather than a | |
| 6 | +different turn wrapper. | |
| 7 | +#} | |
| 8 | +{%- for message in messages -%} | |
| 9 | +<|im_start|>{{ message['role'] }} | |
| 10 | +{{ message['content'] }}<|im_end|> | |
| 11 | +{% endfor -%} | |
| 12 | +{%- if add_generation_prompt -%} | |
| 13 | +<|im_start|>assistant | |
| 14 | +{%- endif -%} | |
src/dlm/export/ollama/template_registry.pymodified@@ -23,7 +23,17 @@ from typing import Final, Literal | ||
| 23 | 23 | |
| 24 | 24 | from dlm.export.ollama.errors import TemplateRegistryError |
| 25 | 25 | |
| 26 | -Dialect = Literal["chatml", "gemma2", "smollm3", "olmo2", "llama3", "phi3", "phi4mini", "mistral"] | |
| 26 | +Dialect = Literal[ | |
| 27 | + "chatml", | |
| 28 | + "qwen3thinking", | |
| 29 | + "gemma2", | |
| 30 | + "smollm3", | |
| 31 | + "olmo2", | |
| 32 | + "llama3", | |
| 33 | + "phi3", | |
| 34 | + "phi4mini", | |
| 35 | + "mistral", | |
| 36 | +] | |
| 27 | 37 | |
| 28 | 38 | _TEMPLATES_DIR: Final[Path] = Path(__file__).resolve().parent / "templates" |
| 29 | 39 | |
@@ -62,6 +72,16 @@ _REGISTRY: Final[dict[Dialect, DialectTemplate]] = { | ||
| 62 | 72 | # synthesize a new turn instead of yielding. |
| 63 | 73 | default_stops=("<|im_end|>", "<|endoftext|>", "<|im_start|>"), |
| 64 | 74 | ), |
| 75 | + "qwen3thinking": DialectTemplate( | |
| 76 | + dialect="qwen3thinking", | |
| 77 | + template_path=_TEMPLATES_DIR / "qwen3thinking.gotmpl", | |
| 78 | + # Qwen3's reasoning profile still uses ChatML turn framing, but | |
| 79 | + # the upstream defaults run slightly broader sampling than the | |
| 80 | + # legacy ChatML family. | |
| 81 | + default_stops=("<|im_end|>", "<|endoftext|>", "<|im_start|>"), | |
| 82 | + default_temperature=0.6, | |
| 83 | + default_top_p=0.95, | |
| 84 | + ), | |
| 65 | 85 | "gemma2": DialectTemplate( |
| 66 | 86 | dialect="gemma2", |
| 67 | 87 | template_path=_TEMPLATES_DIR / "gemma2.gotmpl", |
src/dlm/export/ollama/templates/qwen3thinking.gotmpladded@@ -0,0 +1,5 @@ | ||
| 1 | +{{- if .System }}<|im_start|>system | |
| 2 | +{{ .System }}<|im_end|> | |
| 3 | +{{ end }}{{- range .Messages }}<|im_start|>{{ .Role }} | |
| 4 | +{{ .Content }}<|im_end|> | |
| 5 | +{{ end }}<|im_start|>assistant | |
tests/integration/base_models/test_13_entries_scaffold.pyadded@@ -0,0 +1,45 @@ | ||
| 1 | +"""Sprint 40 closeout mirror for the named 13-entry scaffold deliverable.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +from pathlib import Path | |
| 6 | + | |
| 7 | +import pytest | |
| 8 | +from tests.integration.cli.test_registry_refresh_init import SPRINT40_INIT_CASES | |
| 9 | +from typer.testing import CliRunner | |
| 10 | + | |
| 11 | +from dlm.cli.app import app | |
| 12 | +from dlm.doc.parser import parse_file | |
| 13 | +from dlm.doc.sections import SectionType | |
| 14 | + | |
| 15 | + | |
| 16 | +@pytest.mark.parametrize(("base_key", "extra_flags", "expect_image_section"), SPRINT40_INIT_CASES) | |
| 17 | +def test_init_scaffolds_for_all_thirteen_registry_refresh_entries( | |
| 18 | + tmp_path: Path, | |
| 19 | + base_key: str, | |
| 20 | + extra_flags: list[str], | |
| 21 | + expect_image_section: bool, | |
| 22 | +) -> None: | |
| 23 | + runner = CliRunner() | |
| 24 | + home = tmp_path / "home" | |
| 25 | + doc = tmp_path / f"{base_key}.dlm" | |
| 26 | + | |
| 27 | + result = runner.invoke( | |
| 28 | + app, | |
| 29 | + [ | |
| 30 | + "--home", | |
| 31 | + str(home), | |
| 32 | + "init", | |
| 33 | + str(doc), | |
| 34 | + "--base", | |
| 35 | + base_key, | |
| 36 | + *extra_flags, | |
| 37 | + ], | |
| 38 | + ) | |
| 39 | + assert result.exit_code == 0, result.output | |
| 40 | + parsed = parse_file(doc) | |
| 41 | + section_types = {section.type for section in parsed.sections} | |
| 42 | + if expect_image_section: | |
| 43 | + assert SectionType.IMAGE in section_types | |
| 44 | + else: | |
| 45 | + assert SectionType.IMAGE not in section_types | |
tests/integration/cli/test_registry_refresh_init.pymodified@@ -1,11 +1,4 @@ | ||
| 1 | -"""Scaffold coverage for the Sprint 40 registry refresh entries we ship. | |
| 2 | - | |
| 3 | -This is intentionally scoped to entries that currently exist in the | |
| 4 | -registry. Two rows in the original sprint draft still need upstream | |
| 5 | -reality work (`qwen3-1.7b-thinking`, `internvl3-2b`), so this test | |
| 6 | -guards the refresh surface we have actually landed rather than baking | |
| 7 | -stale assumptions into CI. | |
| 8 | -""" | |
| 1 | +"""Scaffold coverage for every Sprint 40 registry-refresh entry.""" | |
| 9 | 2 | |
| 10 | 3 | from __future__ import annotations |
| 11 | 4 | |
@@ -18,24 +11,25 @@ from dlm.cli.app import app | ||
| 18 | 11 | from dlm.doc.parser import parse_file |
| 19 | 12 | from dlm.doc.sections import SectionType |
| 20 | 13 | |
| 21 | - | |
| 22 | -@pytest.mark.parametrize( | |
| 23 | - ("base_key", "extra_flags", "expect_image_section"), | |
| 24 | - [ | |
| 25 | - ("qwen3-1.7b", [], False), | |
| 26 | - ("qwen3-4b", [], False), | |
| 27 | - ("qwen3-8b", [], False), | |
| 28 | - ("llama-3.3-8b-instruct", ["--i-accept-license"], False), | |
| 29 | - ("phi-4-mini-reasoning", [], False), | |
| 30 | - ("gemma-2-2b-it", ["--i-accept-license"], False), | |
| 31 | - ("gemma-2-9b-it", ["--i-accept-license"], False), | |
| 32 | - ("mistral-small-3.1-24b-instruct", ["--multimodal"], True), | |
| 33 | - ("smollm3-3b", [], False), | |
| 34 | - ("olmo-2-7b-instruct", [], False), | |
| 35 | - ("mixtral-8x7b-instruct", [], False), | |
| 36 | - ], | |
| 14 | +SPRINT40_INIT_CASES: tuple[tuple[str, list[str], bool], ...] = ( | |
| 15 | + ("qwen3-1.7b", [], False), | |
| 16 | + ("qwen3-1.7b-thinking", [], False), | |
| 17 | + ("qwen3-4b", [], False), | |
| 18 | + ("qwen3-8b", [], False), | |
| 19 | + ("llama-3.3-8b-instruct", ["--i-accept-license"], False), | |
| 20 | + ("phi-4-mini-reasoning", [], False), | |
| 21 | + ("gemma-2-2b-it", ["--i-accept-license"], False), | |
| 22 | + ("gemma-2-9b-it", ["--i-accept-license"], False), | |
| 23 | + ("mistral-small-3.1-24b-instruct", ["--multimodal"], True), | |
| 24 | + ("smollm3-3b", [], False), | |
| 25 | + ("olmo-2-7b-instruct", [], False), | |
| 26 | + ("mixtral-8x7b-instruct", [], False), | |
| 27 | + ("internvl3-2b", ["--multimodal"], True), | |
| 37 | 28 | ) |
| 38 | -def test_init_scaffolds_for_landed_registry_refresh_entries( | |
| 29 | + | |
| 30 | + | |
| 31 | +@pytest.mark.parametrize(("base_key", "extra_flags", "expect_image_section"), SPRINT40_INIT_CASES) | |
| 32 | +def test_init_scaffolds_for_every_registry_refresh_entry( | |
| 39 | 33 | tmp_path: Path, |
| 40 | 34 | base_key: str, |
| 41 | 35 | extra_flags: list[str], |
tests/integration/gate/test_mixtral_gate_smoke.pyadded@@ -0,0 +1,54 @@ | ||
| 1 | +"""Sprint 40 smoke proof that the Mixtral row still flows through Sprint 34 gate paths.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +from pathlib import Path | |
| 6 | +from types import SimpleNamespace | |
| 7 | + | |
| 8 | +from dlm.base_models import BASE_MODELS | |
| 9 | +from dlm.inference.gate import GateHandle, load_gate_handle | |
| 10 | +from dlm.modality import modality_for | |
| 11 | +from dlm.train.gate import GateTrainingSample, train_gate | |
| 12 | + | |
| 13 | + | |
| 14 | +def _store(tmp_path: Path) -> SimpleNamespace: | |
| 15 | + return SimpleNamespace(root=tmp_path) | |
| 16 | + | |
| 17 | + | |
| 18 | +def test_mixtral_text_moe_row_still_uses_text_gate_pipeline(tmp_path: Path) -> None: | |
| 19 | + import torch | |
| 20 | + | |
| 21 | + spec = BASE_MODELS["mixtral-8x7b-instruct"] | |
| 22 | + dispatch = modality_for(spec) | |
| 23 | + assert spec.modality == "text-moe" | |
| 24 | + assert dispatch.accepts_images is False | |
| 25 | + assert dispatch.accepts_audio is False | |
| 26 | + | |
| 27 | + store = _store(tmp_path) | |
| 28 | + samples: list[GateTrainingSample] = [] | |
| 29 | + for _ in range(12): | |
| 30 | + samples.append( | |
| 31 | + GateTrainingSample(embedding=torch.ones(8) + 0.05 * torch.randn(8), adapter_name="a") | |
| 32 | + ) | |
| 33 | + samples.append( | |
| 34 | + GateTrainingSample(embedding=-torch.ones(8) + 0.05 * torch.randn(8), adapter_name="b") | |
| 35 | + ) | |
| 36 | + | |
| 37 | + result = train_gate( | |
| 38 | + store, # type: ignore[arg-type] | |
| 39 | + samples, | |
| 40 | + adapter_names=("a", "b"), | |
| 41 | + input_dim=8, | |
| 42 | + hidden_proj_dim=8, | |
| 43 | + steps=80, | |
| 44 | + lr=3e-3, | |
| 45 | + cold_start_floor=1, | |
| 46 | + batch_size=8, | |
| 47 | + seed=0, | |
| 48 | + ) | |
| 49 | + assert result.mode == "trained" | |
| 50 | + | |
| 51 | + handle = load_gate_handle(store) # type: ignore[arg-type] | |
| 52 | + assert isinstance(handle, GateHandle) | |
| 53 | + assert handle.is_uniform is False | |
| 54 | + assert handle.metadata.adapter_names == ("a", "b") | |
tests/unit/base_models/test_audio_registry.pymodified@@ -4,8 +4,8 @@ Mirrors `test_vl_registry.py` for the audio-language modality. Covers: | ||
| 4 | 4 | |
| 5 | 5 | - `qwen2-audio-7b-instruct` is present and has `modality="audio-language"`. |
| 6 | 6 | - Its `AudioPreprocessorPlan` is pinned (16 kHz, 30 s, `<|AUDIO|>`, 750). |
| 7 | -- License is Apache-2.0 but the 7B weights are gated behind HF acceptance | |
| 8 | - and flagged non-redistributable (pack tarball size). | |
| 7 | +- License is Apache-2.0 and the current HF row is no longer gated, so | |
| 8 | + the spec stays redistributable. | |
| 9 | 9 | - `modality="audio-language"` without a plan rejects at validate time; |
| 10 | 10 | text bases cannot carry an audio plan; VL bases cannot carry an audio |
| 11 | 11 | plan; audio bases cannot carry a VL plan. |
@@ -48,10 +48,10 @@ class TestQwen2AudioRegistryEntry: | ||
| 48 | 48 | spec = BASE_MODELS["qwen2-audio-7b-instruct"] |
| 49 | 49 | assert spec.vl_preprocessor_plan is None |
| 50 | 50 | |
| 51 | - def test_license_gated_not_redistributable(self) -> None: | |
| 51 | + def test_license_open_and_redistributable(self) -> None: | |
| 52 | 52 | spec = BASE_MODELS["qwen2-audio-7b-instruct"] |
| 53 | - assert spec.requires_acceptance is True | |
| 54 | - assert spec.redistributable is False | |
| 53 | + assert spec.requires_acceptance is False | |
| 54 | + assert spec.redistributable is True | |
| 55 | 55 | |
| 56 | 56 | def test_architecture_is_audio_conditional_generation(self) -> None: |
| 57 | 57 | spec = BASE_MODELS["qwen2-audio-7b-instruct"] |
tests/unit/base_models/test_registry.pymodified@@ -73,11 +73,13 @@ class TestLicenseFields: | ||
| 73 | 73 | "qwen2.5-1.5b", |
| 74 | 74 | "qwen2.5-coder-1.5b", |
| 75 | 75 | "qwen3-1.7b", |
| 76 | + "qwen3-1.7b-thinking", | |
| 76 | 77 | "qwen3-4b", |
| 77 | 78 | "qwen3-8b", |
| 78 | 79 | "mixtral-8x7b-instruct", |
| 79 | 80 | "smollm3-3b", |
| 80 | 81 | "olmo-2-7b-instruct", |
| 82 | + "qwen2-audio-7b-instruct", | |
| 81 | 83 | "smollm2-135m", |
| 82 | 84 | "smollm2-360m", |
| 83 | 85 | "smollm2-1.7b", |
@@ -137,7 +139,10 @@ class TestArchitectureShapes: | ||
| 137 | 139 | BASE_MODELS[k].size_gb_fp16 for k in ("qwen2.5-0.5b", "qwen2.5-1.5b", "qwen2.5-3b") |
| 138 | 140 | ] |
| 139 | 141 | assert qwen_sizes == sorted(qwen_sizes) |
| 140 | - qwen3_sizes = [BASE_MODELS[k].size_gb_fp16 for k in ("qwen3-1.7b", "qwen3-4b", "qwen3-8b")] | |
| 142 | + qwen3_sizes = [ | |
| 143 | + BASE_MODELS[k].size_gb_fp16 | |
| 144 | + for k in ("qwen3-1.7b", "qwen3-1.7b-thinking", "qwen3-4b", "qwen3-8b") | |
| 145 | + ] | |
| 141 | 146 | assert qwen3_sizes == sorted(qwen3_sizes) |
| 142 | 147 | smol_sizes = [ |
| 143 | 148 | BASE_MODELS[k].size_gb_fp16 for k in ("smollm2-135m", "smollm2-360m", "smollm2-1.7b") |
tests/unit/base_models/test_registry_2026.pymodified@@ -37,6 +37,27 @@ class TestQwen3RegistryEntries: | ||
| 37 | 37 | assert spec.size_gb_fp16 == pytest.approx(16.0) |
| 38 | 38 | |
| 39 | 39 | |
| 40 | +class TestQwen3ThinkingRegistryEntry: | |
| 41 | + def test_entry_present(self) -> None: | |
| 42 | + assert "qwen3-1.7b-thinking" in BASE_MODELS | |
| 43 | + | |
| 44 | + def test_reuses_live_qwen3_weights_with_reasoning_profile(self) -> None: | |
| 45 | + spec = BASE_MODELS["qwen3-1.7b-thinking"] | |
| 46 | + assert spec.hf_id == "Qwen/Qwen3-1.7B" | |
| 47 | + assert spec.architecture == "Qwen3ForCausalLM" | |
| 48 | + assert spec.template == "qwen3thinking" | |
| 49 | + assert spec.gguf_arch == "qwen3" | |
| 50 | + assert spec.tokenizer_pre == "qwen2" | |
| 51 | + | |
| 52 | + def test_reasoning_profile_keeps_open_license_and_cooler_default(self) -> None: | |
| 53 | + spec = BASE_MODELS["qwen3-1.7b-thinking"] | |
| 54 | + assert spec.license_spdx == "Apache-2.0" | |
| 55 | + assert spec.requires_acceptance is False | |
| 56 | + assert spec.redistributable is True | |
| 57 | + assert spec.reasoning_tuned is True | |
| 58 | + assert spec.suggested_prompt_temperature == pytest.approx(0.6) | |
| 59 | + | |
| 60 | + | |
| 40 | 61 | class TestLlama33RegistryEntry: |
| 41 | 62 | def test_entry_present(self) -> None: |
| 42 | 63 | assert "llama-3.3-8b-instruct" in BASE_MODELS |
@@ -212,28 +233,23 @@ class TestMixtralRegistryEntry: | ||
| 212 | 233 | assert spec.recommended_seq_len == 2048 |
| 213 | 234 | |
| 214 | 235 | |
| 215 | -class TestStaleSprintDraftRows: | |
| 216 | - def test_qwen3_thinking_is_not_a_separate_registry_row(self) -> None: | |
| 217 | - """Upstream Qwen3-1.7B ships hybrid thinking in one model. | |
| 218 | - | |
| 219 | - Sprint 40's draft listed a separate `qwen3-1.7b-thinking` | |
| 220 | - entry, but the live upstream contract exposes thinking mode as | |
| 221 | - a switch on `Qwen/Qwen3-1.7B` itself. Keep the registry honest: | |
| 222 | - reasoning defaults belong on the real base row, not a fake key. | |
| 223 | - """ | |
| 224 | - assert "qwen3-1.7b-thinking" not in BASE_MODELS | |
| 225 | - | |
| 226 | - def test_internvl3_not_shipped_until_remote_code_contract_is_pinned(self) -> None: | |
| 227 | - """Guard against copying the stale sprint draft into the registry. | |
| 228 | - | |
| 229 | - The live `OpenGVLab/InternVL3-2B` model card still documents | |
| 230 | - `trust_remote_code=True`, and on the current stack the whole | |
| 231 | - InternVL family still exposes a tokenizer-only `AutoProcessor` | |
| 232 | - rather than a complete image processor. Upstream also expands | |
| 233 | - `<image>` into repeated `<IMG_CONTEXT>` spans and threads | |
| 234 | - `image_flags` through the forward pass. Adding InternVL3 later | |
| 235 | - is fine, but it needs an honest runtime contract instead of | |
| 236 | - assuming the old "cleaner than InternVL2" sprint note is still | |
| 237 | - true. | |
| 238 | - """ | |
| 239 | - assert "internvl3-2b" not in BASE_MODELS | |
| 236 | +class TestInternVL3RegistryEntry: | |
| 237 | + def test_entry_present(self) -> None: | |
| 238 | + assert "internvl3-2b" in BASE_MODELS | |
| 239 | + | |
| 240 | + def test_entry_keeps_remote_code_contract_explicit(self) -> None: | |
| 241 | + spec = BASE_MODELS["internvl3-2b"] | |
| 242 | + assert spec.hf_id == "OpenGVLab/InternVL3-2B" | |
| 243 | + assert spec.architecture == "InternVLChatModel" | |
| 244 | + assert spec.template == "internvl2" | |
| 245 | + assert spec.trust_remote_code is True | |
| 246 | + | |
| 247 | + def test_entry_is_registry_visible_but_not_pretending_runtime_is_generic(self) -> None: | |
| 248 | + spec = BASE_MODELS["internvl3-2b"] | |
| 249 | + assert spec.license_spdx == "Apache-2.0" | |
| 250 | + assert spec.requires_acceptance is False | |
| 251 | + assert spec.redistributable is True | |
| 252 | + assert spec.modality == "vision-language" | |
| 253 | + assert spec.vl_preprocessor_plan is not None | |
| 254 | + assert spec.vl_preprocessor_plan.resize_policy == "dynamic" | |
| 255 | + assert spec.vl_preprocessor_plan.image_token == "<image>" | |
tests/unit/base_models/test_schema.pymodified@@ -89,7 +89,17 @@ class TestTargetModules: | ||
| 89 | 89 | class TestLiteralConstraints: |
| 90 | 90 | @pytest.mark.parametrize( |
| 91 | 91 | "template", |
| 92 | - ["chatml", "gemma2", "smollm3", "olmo2", "llama3", "phi3", "phi4mini", "mistral"], | |
| 92 | + [ | |
| 93 | + "chatml", | |
| 94 | + "qwen3thinking", | |
| 95 | + "gemma2", | |
| 96 | + "smollm3", | |
| 97 | + "olmo2", | |
| 98 | + "llama3", | |
| 99 | + "phi3", | |
| 100 | + "phi4mini", | |
| 101 | + "mistral", | |
| 102 | + ], | |
| 93 | 103 | ) |
| 94 | 104 | def test_template_literals_accepted(self, template: str) -> None: |
| 95 | 105 | spec = _minimal(template=template) |
tests/unit/base_models/test_vl_registry.pymodified@@ -143,6 +143,7 @@ _VL_BASE_KEYS: tuple[str, ...] = ( | ||
| 143 | 143 | "paligemma-3b-mix-224", |
| 144 | 144 | "qwen2-vl-2b-instruct", |
| 145 | 145 | "internvl2-2b", |
| 146 | + "internvl3-2b", | |
| 146 | 147 | "mistral-small-3.1-24b-instruct", |
| 147 | 148 | ) |
| 148 | 149 | |
@@ -160,7 +161,7 @@ class TestAllVlBasesShipModalityInvariants: | ||
| 160 | 161 | assert spec.vl_preprocessor_plan is not None |
| 161 | 162 | # Pinned identity fields — each one is part of the cache key, |
| 162 | 163 | # so a silent default would silently invalidate caches. |
| 163 | - assert spec.vl_preprocessor_plan.resize_policy == "fixed" | |
| 164 | + assert spec.vl_preprocessor_plan.resize_policy in {"fixed", "dynamic"} | |
| 164 | 165 | assert spec.vl_preprocessor_plan.num_image_tokens > 0 |
| 165 | 166 | |
| 166 | 167 | @pytest.mark.parametrize("key", _VL_BASE_KEYS) |
@@ -241,28 +242,56 @@ class TestInternVL2RegistryEntry: | ||
| 241 | 242 | assert BASE_MODELS["internvl2-2b"].template == "internvl2" |
| 242 | 243 | |
| 243 | 244 | |
| 245 | +class TestInternVL3RegistryEntry: | |
| 246 | + """Sprint 40 refresh: InternVL3 lands with an explicit runtime caveat.""" | |
| 247 | + | |
| 248 | + def test_entry_present(self) -> None: | |
| 249 | + assert "internvl3-2b" in BASE_MODELS | |
| 250 | + | |
| 251 | + def test_apache_permissive(self) -> None: | |
| 252 | + spec = BASE_MODELS["internvl3-2b"] | |
| 253 | + assert spec.license_spdx == "Apache-2.0" | |
| 254 | + assert spec.requires_acceptance is False | |
| 255 | + assert spec.redistributable is True | |
| 256 | + | |
| 257 | + def test_dynamic_preprocessing_plan_is_pinned(self) -> None: | |
| 258 | + spec = BASE_MODELS["internvl3-2b"] | |
| 259 | + plan = spec.vl_preprocessor_plan | |
| 260 | + assert plan is not None | |
| 261 | + assert plan.target_size == (448, 448) | |
| 262 | + assert plan.resize_policy == "dynamic" | |
| 263 | + assert plan.image_token == "<image>" | |
| 264 | + assert plan.num_image_tokens == 256 | |
| 265 | + | |
| 266 | + def test_architecture_and_template(self) -> None: | |
| 267 | + spec = BASE_MODELS["internvl3-2b"] | |
| 268 | + assert spec.architecture == "InternVLChatModel" | |
| 269 | + assert spec.template == "internvl2" | |
| 270 | + assert spec.trust_remote_code is True | |
| 271 | + | |
| 272 | + | |
| 244 | 273 | class TestDistinctVlBases: |
| 245 | 274 | """The VL bases occupy distinct rows with no silent duplicates.""" |
| 246 | 275 | |
| 247 | 276 | def test_all_keys_unique(self) -> None: |
| 248 | - assert len(set(_VL_BASE_KEYS)) == 4 | |
| 277 | + assert len(set(_VL_BASE_KEYS)) == 5 | |
| 249 | 278 | |
| 250 | 279 | def test_hf_ids_distinct(self) -> None: |
| 251 | 280 | hf_ids = {BASE_MODELS[k].hf_id for k in _VL_BASE_KEYS} |
| 252 | - assert len(hf_ids) == 4 | |
| 281 | + assert len(hf_ids) == 5 | |
| 253 | 282 | |
| 254 | 283 | def test_image_tokens_distinct_per_base(self) -> None: |
| 255 | - """Each VL base uses its native image-token string. | |
| 284 | + """VL rows pin their native placeholder tokens explicitly. | |
| 256 | 285 | |
| 257 | - Silently sharing a placeholder across bases would break the | |
| 258 | - cache-key invariant in vl_cache.py (cache key includes the | |
| 259 | - token via processor_sha256). | |
| 286 | + Some families legitimately reuse the same surface token | |
| 287 | + (`<image>`), so this checks the concrete set rather than | |
| 288 | + forcing uniqueness for uniqueness' sake. | |
| 260 | 289 | """ |
| 261 | 290 | tokens = { |
| 262 | 291 | BASE_MODELS[k].vl_preprocessor_plan.image_token # type: ignore[union-attr] |
| 263 | 292 | for k in _VL_BASE_KEYS |
| 264 | 293 | } |
| 265 | - assert len(tokens) == 4 | |
| 294 | + assert tokens == {"<image>", "<|image_pad|>", "<IMG_CONTEXT>", "[IMG]"} | |
| 266 | 295 | |
| 267 | 296 | |
| 268 | 297 | class TestCountVlRegistryEntries: |
@@ -270,7 +299,7 @@ class TestCountVlRegistryEntries: | ||
| 270 | 299 | |
| 271 | 300 | def test_at_least_four_vl_bases_registered(self) -> None: |
| 272 | 301 | vl_count = sum(1 for s in BASE_MODELS.values() if s.modality == "vision-language") |
| 273 | - assert vl_count >= 4 | |
| 302 | + assert vl_count >= 5 | |
| 274 | 303 | |
| 275 | 304 | |
| 276 | 305 | class TestTrustRemoteCodeOptIn: |
@@ -292,6 +321,9 @@ class TestTrustRemoteCodeOptIn: | ||
| 292 | 321 | is defined in the model repo, not in transformers.""" |
| 293 | 322 | assert BASE_MODELS["internvl2-2b"].trust_remote_code is True |
| 294 | 323 | |
| 324 | + def test_internvl3_opts_in(self) -> None: | |
| 325 | + assert BASE_MODELS["internvl3-2b"].trust_remote_code is True | |
| 326 | + | |
| 295 | 327 | def test_text_bases_default_false(self) -> None: |
| 296 | 328 | """None of the text bases opt into trust_remote_code.""" |
| 297 | 329 | for key, spec in BASE_MODELS.items(): |
tests/unit/doc/test_v12_migrator.pyadded@@ -0,0 +1,36 @@ | ||
| 1 | +"""Named Sprint 40 closeout checks for the v12 → v13 migrator.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +from typing import Any | |
| 6 | + | |
| 7 | +from dlm.doc.migrations.v12 import migrate | |
| 8 | +from dlm.doc.schema import DlmFrontmatter | |
| 9 | + | |
| 10 | +_VALID_ULID = "01HZ4X7TGZM3J1A2B3C4D5E6F7" | |
| 11 | + | |
| 12 | + | |
| 13 | +def test_v12_migrator_is_identity_for_existing_frontmatter() -> None: | |
| 14 | + raw: dict[str, Any] = { | |
| 15 | + "dlm_id": _VALID_ULID, | |
| 16 | + "base_model": "smollm2-135m", | |
| 17 | + "dlm_version": 12, | |
| 18 | + "training": {"audio": {"auto_resample": True}, "lora_r": 16}, | |
| 19 | + } | |
| 20 | + out = migrate(raw) | |
| 21 | + assert out == raw | |
| 22 | + assert out is not raw | |
| 23 | + | |
| 24 | + | |
| 25 | +def test_v12_migrator_output_validates_as_v13() -> None: | |
| 26 | + raw: dict[str, Any] = { | |
| 27 | + "dlm_id": _VALID_ULID, | |
| 28 | + "base_model": "smollm2-135m", | |
| 29 | + "dlm_version": 12, | |
| 30 | + "training": {"audio": {"auto_resample": True}}, | |
| 31 | + } | |
| 32 | + out = migrate(raw) | |
| 33 | + out["dlm_version"] = 13 | |
| 34 | + fm = DlmFrontmatter.model_validate(out) | |
| 35 | + assert fm.dlm_version == 13 | |
| 36 | + assert fm.training.audio.auto_resample is True | |
tests/unit/export/ollama/test_template_registry.pymodified@@ -13,9 +13,10 @@ from dlm.export.ollama.template_registry import ( | ||
| 13 | 13 | |
| 14 | 14 | |
| 15 | 15 | class TestRegistryCoverage: |
| 16 | - def test_all_eight_dialects_registered(self) -> None: | |
| 16 | + def test_all_nine_dialects_registered(self) -> None: | |
| 17 | 17 | assert set(registered_dialects()) == { |
| 18 | 18 | "chatml", |
| 19 | + "qwen3thinking", | |
| 19 | 20 | "gemma2", |
| 20 | 21 | "smollm3", |
| 21 | 22 | "olmo2", |
@@ -27,7 +28,17 @@ class TestRegistryCoverage: | ||
| 27 | 28 | |
| 28 | 29 | @pytest.mark.parametrize( |
| 29 | 30 | "dialect", |
| 30 | - ["chatml", "gemma2", "smollm3", "olmo2", "llama3", "phi3", "phi4mini", "mistral"], | |
| 31 | + [ | |
| 32 | + "chatml", | |
| 33 | + "qwen3thinking", | |
| 34 | + "gemma2", | |
| 35 | + "smollm3", | |
| 36 | + "olmo2", | |
| 37 | + "llama3", | |
| 38 | + "phi3", | |
| 39 | + "phi4mini", | |
| 40 | + "mistral", | |
| 41 | + ], | |
| 31 | 42 | ) |
| 32 | 43 | def test_each_template_file_exists(self, dialect: str) -> None: |
| 33 | 44 | row = get_template(dialect) |
@@ -37,7 +48,17 @@ class TestRegistryCoverage: | ||
| 37 | 48 | |
| 38 | 49 | @pytest.mark.parametrize( |
| 39 | 50 | "dialect", |
| 40 | - ["chatml", "gemma2", "smollm3", "olmo2", "llama3", "phi3", "phi4mini", "mistral"], | |
| 51 | + [ | |
| 52 | + "chatml", | |
| 53 | + "qwen3thinking", | |
| 54 | + "gemma2", | |
| 55 | + "smollm3", | |
| 56 | + "olmo2", | |
| 57 | + "llama3", | |
| 58 | + "phi3", | |
| 59 | + "phi4mini", | |
| 60 | + "mistral", | |
| 61 | + ], | |
| 41 | 62 | ) |
| 42 | 63 | def test_each_has_default_stops(self, dialect: str) -> None: |
| 43 | 64 | row = get_template(dialect) |
@@ -47,6 +68,7 @@ class TestRegistryCoverage: | ||
| 47 | 68 | ("dialect", "required"), |
| 48 | 69 | [ |
| 49 | 70 | ("chatml", {"<|im_end|>", "<|im_start|>"}), |
| 71 | + ("qwen3thinking", {"<|im_end|>", "<|im_start|>"}), | |
| 50 | 72 | ("gemma2", {"<end_of_turn>", "<start_of_turn>"}), |
| 51 | 73 | ("smollm3", {"<|im_end|>", "<|im_start|>"}), |
| 52 | 74 | ("olmo2", {"<|endoftext|>", "<|user|>", "<|assistant|>"}), |
@@ -89,6 +111,14 @@ class TestDialectShapes: | ||
| 89 | 111 | assert "<end_of_turn>" in text |
| 90 | 112 | assert "model" in text |
| 91 | 113 | |
| 114 | + def test_qwen3thinking_keeps_chatml_markers_with_reasoning_defaults(self) -> None: | |
| 115 | + text = load_template_text("qwen3thinking") | |
| 116 | + assert "<|im_start|>" in text | |
| 117 | + assert "<|im_end|>" in text | |
| 118 | + row = get_template("qwen3thinking") | |
| 119 | + assert row.default_temperature == pytest.approx(0.6) | |
| 120 | + assert row.default_top_p == pytest.approx(0.95) | |
| 121 | + | |
| 92 | 122 | def test_smollm3_has_reasoning_system_prompt(self) -> None: |
| 93 | 123 | text = load_template_text("smollm3") |
| 94 | 124 | assert "<|im_start|>system" in text |
tests/unit/export/test_mixtral_template.pyadded@@ -0,0 +1,38 @@ | ||
| 1 | +"""Sprint 40 closeout checks for the Mixtral template row.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import json | |
| 6 | +from pathlib import Path | |
| 7 | + | |
| 8 | +from dlm.base_models import BASE_MODELS | |
| 9 | +from dlm.export.ollama.modelfile import ModelfileContext, render_modelfile | |
| 10 | +from dlm.export.plan import ExportPlan | |
| 11 | + | |
| 12 | + | |
| 13 | +def _adapter_dir(tmp_path: Path) -> Path: | |
| 14 | + adapter = tmp_path / "adapter" | |
| 15 | + adapter.mkdir() | |
| 16 | + (adapter / "tokenizer_config.json").write_text( | |
| 17 | + json.dumps({"eos_token": "</s>", "added_tokens_decoder": {}}), | |
| 18 | + encoding="utf-8", | |
| 19 | + ) | |
| 20 | + return adapter | |
| 21 | + | |
| 22 | + | |
| 23 | +def test_mixtral_registry_row_renders_through_mistral_template(tmp_path: Path) -> None: | |
| 24 | + spec = BASE_MODELS["mixtral-8x7b-instruct"] | |
| 25 | + text = render_modelfile( | |
| 26 | + ModelfileContext( | |
| 27 | + spec=spec, | |
| 28 | + plan=ExportPlan(quant="Q4_K_M", merged=False), | |
| 29 | + adapter_dir=_adapter_dir(tmp_path), | |
| 30 | + base_gguf_name="base.gguf", | |
| 31 | + adapter_gguf_name="adapter.gguf", | |
| 32 | + dlm_id="01TEST", | |
| 33 | + adapter_version=1, | |
| 34 | + ) | |
| 35 | + ) | |
| 36 | + assert spec.modality == "text-moe" | |
| 37 | + assert "[INST]" in text | |
| 38 | + assert 'PARAMETER stop "[INST]"' in text | |
tests/unit/export/test_phi4_template.pyadded@@ -0,0 +1,37 @@ | ||
| 1 | +"""Sprint 40 closeout checks for the Phi-4 reasoning template row.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import json | |
| 6 | +from pathlib import Path | |
| 7 | + | |
| 8 | +from dlm.base_models import BASE_MODELS | |
| 9 | +from dlm.export.ollama.modelfile import ModelfileContext, render_modelfile | |
| 10 | +from dlm.export.plan import ExportPlan | |
| 11 | + | |
| 12 | + | |
| 13 | +def _adapter_dir(tmp_path: Path) -> Path: | |
| 14 | + adapter = tmp_path / "adapter" | |
| 15 | + adapter.mkdir() | |
| 16 | + (adapter / "tokenizer_config.json").write_text( | |
| 17 | + json.dumps({"eos_token": "<|end|>", "added_tokens_decoder": {}}), | |
| 18 | + encoding="utf-8", | |
| 19 | + ) | |
| 20 | + return adapter | |
| 21 | + | |
| 22 | + | |
| 23 | +def test_phi4_reasoning_template_keeps_phi_system_preamble_and_stops(tmp_path: Path) -> None: | |
| 24 | + text = render_modelfile( | |
| 25 | + ModelfileContext( | |
| 26 | + spec=BASE_MODELS["phi-4-mini-reasoning"], | |
| 27 | + plan=ExportPlan(quant="Q4_K_M", merged=False), | |
| 28 | + adapter_dir=_adapter_dir(tmp_path), | |
| 29 | + base_gguf_name="base.gguf", | |
| 30 | + adapter_gguf_name="adapter.gguf", | |
| 31 | + dlm_id="01TEST", | |
| 32 | + adapter_version=1, | |
| 33 | + ) | |
| 34 | + ) | |
| 35 | + assert "Your name is Phi, an AI math expert developed by Microsoft." in text | |
| 36 | + assert 'PARAMETER stop "<|assistant|>"' in text | |
| 37 | + assert "PARAMETER temperature 0.6" in text | |
tests/unit/export/test_qwen3_template.pyadded@@ -0,0 +1,37 @@ | ||
| 1 | +"""Sprint 40 closeout checks for the Qwen3 reasoning-template row.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import json | |
| 6 | +from pathlib import Path | |
| 7 | + | |
| 8 | +from dlm.base_models import BASE_MODELS | |
| 9 | +from dlm.export.ollama.modelfile import ModelfileContext, render_modelfile | |
| 10 | +from dlm.export.plan import ExportPlan | |
| 11 | + | |
| 12 | + | |
| 13 | +def _adapter_dir(tmp_path: Path) -> Path: | |
| 14 | + adapter = tmp_path / "adapter" | |
| 15 | + adapter.mkdir() | |
| 16 | + (adapter / "tokenizer_config.json").write_text( | |
| 17 | + json.dumps({"eos_token": "<|im_end|>", "added_tokens_decoder": {}}), | |
| 18 | + encoding="utf-8", | |
| 19 | + ) | |
| 20 | + return adapter | |
| 21 | + | |
| 22 | + | |
| 23 | +def test_qwen3_thinking_row_uses_distinct_reasoning_template_defaults(tmp_path: Path) -> None: | |
| 24 | + text = render_modelfile( | |
| 25 | + ModelfileContext( | |
| 26 | + spec=BASE_MODELS["qwen3-1.7b-thinking"], | |
| 27 | + plan=ExportPlan(quant="Q4_K_M", merged=False), | |
| 28 | + adapter_dir=_adapter_dir(tmp_path), | |
| 29 | + base_gguf_name="base.gguf", | |
| 30 | + adapter_gguf_name="adapter.gguf", | |
| 31 | + dlm_id="01TEST", | |
| 32 | + adapter_version=1, | |
| 33 | + ) | |
| 34 | + ) | |
| 35 | + assert "PARAMETER temperature 0.6" in text | |
| 36 | + assert "PARAMETER top_p 0.95" in text | |
| 37 | + assert "<|im_start|>assistant" in text | |