documentlanguagemodel Public

Watch 0 Fork 0 Star 0
Python · 26154 bytes Raw Blame History
  
        1
        """Curated launch registry of supported base models.
      
        2
        
        3
        Every entry pins an exact HuggingFace commit SHA. Refreshed by
      
        4
        `scripts/refresh-registry.py`; weekly CI opens a PR on drift.
      
        5
        
        6
        Notes on individual entries:
      
        7
        
        8
        - `qwen2.5-3b` ships under the Qwen Research License (free for entities
      
        9
          with <100M MAU). We record it as `license_spdx="Other"` and surface
      
        10
          the URL via `license_url`; it remains `redistributable=True` because
      
        11
          the license permits bundling + redistribution with attribution.
      
        12
          **Caveat:** the boolean `redistributable` field does not express the
      
        13
          MAU threshold or attribution requirement. A
      
        14
          `redistributable_conditions: str | None` field on `BaseModelSpec`
      
        15
          plus a pack-time attestation checkbox would encode this properly —
      
        16
          deferred follow-up work. Until then, users at the scale threshold
      
        17
          must consult the license text themselves.
      
        18
        - Llama-3.2 models are gated on HuggingFace. Llama-3.3 8B currently
      
        19
          needs a mirror-backed fetch path because Meta exposes it through the
      
        20
          Llama API but not a first-party HF repo. DLM still keeps the same
      
        21
          acceptance + non-redistribution policy surface for the whole Llama
      
        22
          family (`requires_acceptance=True`, `redistributable=False`) —
      
        23
          enforced by the pack gate and share-protocol refusal.
      
        24
        - SmolLM2 / SmolLM3 and Phi-3.5-mini are permissive (Apache-2.0 / MIT).
      
        25
        - `size_gb_fp16` is approximate; the hardware doctor uses it to seed
      
        26
          VRAM estimates, which then get refined by runtime checks.
      
        27
        """
      
        28
        
        29
        from __future__ import annotations
      
        30
        
        31
        from typing import Final
      
        32
        
        33
        from dlm.base_models.schema import AudioPreprocessorPlan, BaseModelSpec, VlPreprocessorPlan
      
        34
        
        35
        _ENTRIES: tuple[BaseModelSpec, ...] = (
      
        36
            BaseModelSpec(
      
        37
                key="qwen2.5-0.5b",
      
        38
                hf_id="Qwen/Qwen2.5-0.5B-Instruct",
      
        39
                revision="7ae557604adf67be50417f59c2c2f167def9a775",
      
        40
                architecture="Qwen2ForCausalLM",
      
        41
                params=500_000_000,
      
        42
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        43
                template="chatml",
      
        44
                gguf_arch="qwen2",
      
        45
                tokenizer_pre="qwen2",
      
        46
                license_spdx="Apache-2.0",
      
        47
                license_url="https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct/blob/main/LICENSE",
      
        48
                requires_acceptance=False,
      
        49
                redistributable=True,
      
        50
                size_gb_fp16=1.0,
      
        51
                context_length=32_768,
      
        52
                recommended_seq_len=2048,
      
        53
            ),
      
        54
            BaseModelSpec(
      
        55
                key="qwen2.5-1.5b",
      
        56
                hf_id="Qwen/Qwen2.5-1.5B-Instruct",
      
        57
                revision="989aa7980e4cf806f80c7fef2b1adb7bc71aa306",
      
        58
                architecture="Qwen2ForCausalLM",
      
        59
                params=1_500_000_000,
      
        60
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        61
                template="chatml",
      
        62
                gguf_arch="qwen2",
      
        63
                tokenizer_pre="qwen2",
      
        64
                license_spdx="Apache-2.0",
      
        65
                license_url="https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct/blob/main/LICENSE",
      
        66
                requires_acceptance=False,
      
        67
                redistributable=True,
      
        68
                size_gb_fp16=3.1,
      
        69
                context_length=32_768,
      
        70
                recommended_seq_len=2048,
      
        71
            ),
      
        72
            BaseModelSpec(
      
        73
                key="qwen2.5-3b",
      
        74
                hf_id="Qwen/Qwen2.5-3B-Instruct",
      
        75
                revision="aa8e72537993ba99e69dfaafa59ed015b17504d1",
      
        76
                architecture="Qwen2ForCausalLM",
      
        77
                params=3_000_000_000,
      
        78
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        79
                template="chatml",
      
        80
                gguf_arch="qwen2",
      
        81
                tokenizer_pre="qwen2",
      
        82
                license_spdx="Other",
      
        83
                license_url="https://huggingface.co/Qwen/Qwen2.5-3B-Instruct/blob/main/LICENSE",
      
        84
                requires_acceptance=False,
      
        85
                redistributable=True,
      
        86
                size_gb_fp16=6.2,
      
        87
                context_length=32_768,
      
        88
                recommended_seq_len=2048,
      
        89
            ),
      
        90
            BaseModelSpec(
      
        91
                key="qwen2.5-coder-1.5b",
      
        92
                hf_id="Qwen/Qwen2.5-Coder-1.5B-Instruct",
      
        93
                revision="2e1fd397ee46e1388853d2af2c993145b0f1098a",
      
        94
                architecture="Qwen2ForCausalLM",
      
        95
                params=1_500_000_000,
      
        96
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        97
                template="chatml",
      
        98
                gguf_arch="qwen2",
      
        99
                tokenizer_pre="qwen2",
      
        100
                license_spdx="Apache-2.0",
      
        101
                license_url="https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct/blob/main/LICENSE",
      
        102
                requires_acceptance=False,
      
        103
                redistributable=True,
      
        104
                size_gb_fp16=3.1,
      
        105
                context_length=32_768,
      
        106
                recommended_seq_len=2048,
      
        107
            ),
      
        108
            BaseModelSpec(
      
        109
                key="qwen3-1.7b",
      
        110
                hf_id="Qwen/Qwen3-1.7B",
      
        111
                revision="70d244cc86ccca08cf5af4e1e306ecf908b1ad5e",
      
        112
                architecture="Qwen3ForCausalLM",
      
        113
                params=1_700_000_000,
      
        114
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        115
                template="chatml",
      
        116
                gguf_arch="qwen3",
      
        117
                tokenizer_pre="qwen2",
      
        118
                license_spdx="Apache-2.0",
      
        119
                license_url="https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE",
      
        120
                requires_acceptance=False,
      
        121
                redistributable=True,
      
        122
                size_gb_fp16=3.4,
      
        123
                context_length=32_768,
      
        124
                recommended_seq_len=2048,
      
        125
                reasoning_tuned=True,
      
        126
            ),
      
        127
            BaseModelSpec(
      
        128
                key="qwen3-1.7b-thinking",
      
        129
                hf_id="Qwen/Qwen3-1.7B",
      
        130
                revision="70d244cc86ccca08cf5af4e1e306ecf908b1ad5e",
      
        131
                architecture="Qwen3ForCausalLM",
      
        132
                params=1_700_000_000,
      
        133
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        134
                template="qwen3thinking",
      
        135
                gguf_arch="qwen3",
      
        136
                tokenizer_pre="qwen2",
      
        137
                license_spdx="Apache-2.0",
      
        138
                license_url="https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE",
      
        139
                requires_acceptance=False,
      
        140
                redistributable=True,
      
        141
                size_gb_fp16=3.4,
      
        142
                context_length=32_768,
      
        143
                recommended_seq_len=2048,
      
        144
                reasoning_tuned=True,
      
        145
            ),
      
        146
            BaseModelSpec(
      
        147
                key="qwen3-4b",
      
        148
                hf_id="Qwen/Qwen3-4B",
      
        149
                revision="1cfa9a7208912126459214e8b04321603b3df60c",
      
        150
                architecture="Qwen3ForCausalLM",
      
        151
                params=4_000_000_000,
      
        152
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        153
                template="chatml",
      
        154
                gguf_arch="qwen3",
      
        155
                tokenizer_pre="qwen2",
      
        156
                license_spdx="Apache-2.0",
      
        157
                license_url="https://huggingface.co/Qwen/Qwen3-4B/blob/main/LICENSE",
      
        158
                requires_acceptance=False,
      
        159
                redistributable=True,
      
        160
                size_gb_fp16=8.0,
      
        161
                context_length=32_768,
      
        162
                recommended_seq_len=2048,
      
        163
                reasoning_tuned=True,
      
        164
            ),
      
        165
            BaseModelSpec(
      
        166
                key="qwen3-8b",
      
        167
                hf_id="Qwen/Qwen3-8B",
      
        168
                revision="b968826d9c46dd6066d109eabc6255188de91218",
      
        169
                architecture="Qwen3ForCausalLM",
      
        170
                params=8_000_000_000,
      
        171
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        172
                template="chatml",
      
        173
                gguf_arch="qwen3",
      
        174
                tokenizer_pre="qwen2",
      
        175
                license_spdx="Apache-2.0",
      
        176
                license_url="https://huggingface.co/Qwen/Qwen3-8B/blob/main/LICENSE",
      
        177
                requires_acceptance=False,
      
        178
                redistributable=True,
      
        179
                size_gb_fp16=16.0,
      
        180
                context_length=32_768,
      
        181
                recommended_seq_len=2048,
      
        182
                reasoning_tuned=True,
      
        183
            ),
      
        184
            BaseModelSpec(
      
        185
                key="llama-3.2-1b",
      
        186
                hf_id="meta-llama/Llama-3.2-1B-Instruct",
      
        187
                revision="9213176726f574b556790deb65791e0c5aa438b6",
      
        188
                architecture="LlamaForCausalLM",
      
        189
                params=1_000_000_000,
      
        190
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        191
                template="llama3",
      
        192
                gguf_arch="llama",
      
        193
                tokenizer_pre="llama-bpe",
      
        194
                license_spdx="Other",
      
        195
                license_url="https://www.llama.com/llama3_2/license/",
      
        196
                requires_acceptance=True,
      
        197
                redistributable=False,
      
        198
                size_gb_fp16=2.5,
      
        199
                context_length=131_072,
      
        200
                recommended_seq_len=4096,
      
        201
            ),
      
        202
            BaseModelSpec(
      
        203
                key="llama-3.2-3b",
      
        204
                hf_id="meta-llama/Llama-3.2-3B-Instruct",
      
        205
                revision="0cb88a4f764b7a12671c53f0838cd831a0843b95",
      
        206
                architecture="LlamaForCausalLM",
      
        207
                params=3_000_000_000,
      
        208
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        209
                template="llama3",
      
        210
                gguf_arch="llama",
      
        211
                tokenizer_pre="llama-bpe",
      
        212
                license_spdx="Other",
      
        213
                license_url="https://www.llama.com/llama3_2/license/",
      
        214
                requires_acceptance=True,
      
        215
                redistributable=False,
      
        216
                size_gb_fp16=6.5,
      
        217
                context_length=131_072,
      
        218
                recommended_seq_len=4096,
      
        219
            ),
      
        220
            BaseModelSpec(
      
        221
                key="llama-3.3-8b-instruct",
      
        222
                # Meta's first-party LlamaCon announcement explicitly says the
      
        223
                # Llama API can fine-tune "o novo modelo Llama 3.3 8B", but
      
        224
                # there is still no first-party HF repo. DLM therefore fetches
      
        225
                # weights from the community mirror below while
      
        226
                # refresh-registry separately probes Meta's newsroom article
      
        227
                # for provenance.
      
        228
                hf_id="allura-forge/Llama-3.3-8B-Instruct",
      
        229
                revision="df95224cf87c32d9f4958dd284a07ded620aa4fc",
      
        230
                architecture="LlamaForCausalLM",
      
        231
                params=8_000_000_000,
      
        232
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        233
                template="llama3",
      
        234
                gguf_arch="llama",
      
        235
                tokenizer_pre="llama-bpe",
      
        236
                license_spdx="Other",
      
        237
                license_url="https://llama.meta.com/llama3/license",
      
        238
                requires_acceptance=True,
      
        239
                redistributable=False,
      
        240
                size_gb_fp16=16.5,
      
        241
                context_length=131_072,
      
        242
                context_length_effective=8_192,
      
        243
                recommended_seq_len=4096,
      
        244
                refresh_check_hf_gating=False,
      
        245
                provenance_url=(
      
        246
                    "https://about.fb.com/br/news/2025/04/tudo-o-que-anunciamos-no-nosso-primeiro-llamacon/"
      
        247
                ),
      
        248
                provenance_match_text="novo modelo Llama 3.3 8B",
      
        249
            ),
      
        250
            BaseModelSpec(
      
        251
                key="smollm3-3b",
      
        252
                hf_id="HuggingFaceTB/SmolLM3-3B",
      
        253
                revision="a07cc9a04f16550a088caea529712d1d335b0ac1",
      
        254
                architecture="SmolLM3ForCausalLM",
      
        255
                params=3_000_000_000,
      
        256
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        257
                template="smollm3",
      
        258
                gguf_arch="llama",
      
        259
                tokenizer_pre="smollm",
      
        260
                license_spdx="Apache-2.0",
      
        261
                license_url="https://huggingface.co/HuggingFaceTB/SmolLM3-3B",
      
        262
                requires_acceptance=False,
      
        263
                redistributable=True,
      
        264
                size_gb_fp16=6.2,
      
        265
                context_length=65_536,
      
        266
                recommended_seq_len=4096,
      
        267
                reasoning_tuned=True,
      
        268
            ),
      
        269
            BaseModelSpec(
      
        270
                key="olmo-2-7b-instruct",
      
        271
                hf_id="allenai/OLMo-2-1124-7B-Instruct",
      
        272
                revision="470b1fba1ae01581f270116362ee4aa1b97f4c84",
      
        273
                architecture="Olmo2ForCausalLM",
      
        274
                params=7_000_000_000,
      
        275
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        276
                template="olmo2",
      
        277
                gguf_arch="olmo2",
      
        278
                tokenizer_pre="superbpe",
      
        279
                license_spdx="Apache-2.0",
      
        280
                license_url="https://huggingface.co/allenai/OLMo-2-1124-7B-Instruct",
      
        281
                requires_acceptance=False,
      
        282
                redistributable=True,
      
        283
                size_gb_fp16=14.6,
      
        284
                context_length=4096,
      
        285
                recommended_seq_len=2048,
      
        286
            ),
      
        287
            BaseModelSpec(
      
        288
                key="gemma-2-2b-it",
      
        289
                hf_id="google/gemma-2-2b-it",
      
        290
                revision="299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8",
      
        291
                architecture="Gemma2ForCausalLM",
      
        292
                params=2_600_000_000,
      
        293
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        294
                template="gemma2",
      
        295
                gguf_arch="gemma2",
      
        296
                tokenizer_pre="gemma",
      
        297
                license_spdx="Gemma",
      
        298
                license_url="https://ai.google.dev/gemma/terms",
      
        299
                requires_acceptance=True,
      
        300
                redistributable=False,
      
        301
                size_gb_fp16=5.2,
      
        302
                context_length=8192,
      
        303
                recommended_seq_len=2048,
      
        304
            ),
      
        305
            BaseModelSpec(
      
        306
                key="gemma-2-9b-it",
      
        307
                hf_id="google/gemma-2-9b-it",
      
        308
                revision="11c9b309abf73637e4b6f9a3fa1e92e615547819",
      
        309
                architecture="Gemma2ForCausalLM",
      
        310
                params=9_000_000_000,
      
        311
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        312
                template="gemma2",
      
        313
                gguf_arch="gemma2",
      
        314
                tokenizer_pre="gemma",
      
        315
                license_spdx="Gemma",
      
        316
                license_url="https://ai.google.dev/gemma/terms",
      
        317
                requires_acceptance=True,
      
        318
                redistributable=False,
      
        319
                size_gb_fp16=18.0,
      
        320
                context_length=8192,
      
        321
                recommended_seq_len=2048,
      
        322
            ),
      
        323
            BaseModelSpec(
      
        324
                key="smollm2-135m",
      
        325
                hf_id="HuggingFaceTB/SmolLM2-135M-Instruct",
      
        326
                revision="12fd25f77366fa6b3b4b768ec3050bf629380bac",
      
        327
                architecture="LlamaForCausalLM",
      
        328
                params=135_000_000,
      
        329
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        330
                template="chatml",
      
        331
                gguf_arch="llama",
      
        332
                tokenizer_pre="smollm",
      
        333
                license_spdx="Apache-2.0",
      
        334
                license_url="https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct",
      
        335
                requires_acceptance=False,
      
        336
                redistributable=True,
      
        337
                size_gb_fp16=0.27,
      
        338
                context_length=8_192,
      
        339
                recommended_seq_len=1024,
      
        340
            ),
      
        341
            BaseModelSpec(
      
        342
                key="smollm2-360m",
      
        343
                hf_id="HuggingFaceTB/SmolLM2-360M-Instruct",
      
        344
                revision="a10cc1512eabd3dde888204e902eca88bddb4951",
      
        345
                architecture="LlamaForCausalLM",
      
        346
                params=360_000_000,
      
        347
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        348
                template="chatml",
      
        349
                gguf_arch="llama",
      
        350
                tokenizer_pre="smollm",
      
        351
                license_spdx="Apache-2.0",
      
        352
                license_url="https://huggingface.co/HuggingFaceTB/SmolLM2-360M-Instruct",
      
        353
                requires_acceptance=False,
      
        354
                redistributable=True,
      
        355
                size_gb_fp16=0.72,
      
        356
                context_length=8_192,
      
        357
                recommended_seq_len=1024,
      
        358
            ),
      
        359
            BaseModelSpec(
      
        360
                key="smollm2-1.7b",
      
        361
                hf_id="HuggingFaceTB/SmolLM2-1.7B-Instruct",
      
        362
                revision="31b70e2e869a7173562077fd711b654946d38674",
      
        363
                architecture="LlamaForCausalLM",
      
        364
                params=1_700_000_000,
      
        365
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        366
                template="chatml",
      
        367
                gguf_arch="llama",
      
        368
                tokenizer_pre="smollm",
      
        369
                license_spdx="Apache-2.0",
      
        370
                license_url="https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct",
      
        371
                requires_acceptance=False,
      
        372
                redistributable=True,
      
        373
                size_gb_fp16=3.4,
      
        374
                context_length=8_192,
      
        375
                recommended_seq_len=2048,
      
        376
            ),
      
        377
            BaseModelSpec(
      
        378
                key="phi-3.5-mini",
      
        379
                hf_id="microsoft/Phi-3.5-mini-instruct",
      
        380
                revision="2fe192450127e6a83f7441aef6e3ca586c338b77",
      
        381
                architecture="Phi3ForCausalLM",
      
        382
                params=3_800_000_000,
      
        383
                target_modules=["qkv_proj", "o_proj", "gate_up_proj", "down_proj"],
      
        384
                template="phi3",
      
        385
                gguf_arch="phi3",
      
        386
                tokenizer_pre="phi-2",
      
        387
                license_spdx="MIT",
      
        388
                license_url="https://huggingface.co/microsoft/Phi-3.5-mini-instruct/blob/main/LICENSE",
      
        389
                requires_acceptance=False,
      
        390
                redistributable=True,
      
        391
                size_gb_fp16=7.6,
      
        392
                context_length=131_072,
      
        393
                recommended_seq_len=2048,
      
        394
            ),
      
        395
            BaseModelSpec(
      
        396
                key="phi-4-mini-reasoning",
      
        397
                hf_id="microsoft/Phi-4-mini-reasoning",
      
        398
                revision="0e3b1e2d02ee478a3743abe3f629e9c0cb722e0a",
      
        399
                architecture="Phi3ForCausalLM",
      
        400
                params=3_800_000_000,
      
        401
                target_modules=["qkv_proj", "o_proj", "gate_up_proj", "down_proj"],
      
        402
                template="phi4mini",
      
        403
                gguf_arch="phi3",
      
        404
                tokenizer_pre="phi-2",
      
        405
                license_spdx="MIT",
      
        406
                license_url="https://huggingface.co/microsoft/Phi-4-mini-reasoning/blob/main/LICENSE",
      
        407
                requires_acceptance=False,
      
        408
                redistributable=True,
      
        409
                size_gb_fp16=7.6,
      
        410
                context_length=131_072,
      
        411
                recommended_seq_len=2048,
      
        412
                reasoning_tuned=True,
      
        413
            ),
      
        414
            # Mixtral-8x7B-Instruct-v0.1 — Apache-2.0 sparse MoE base.
      
        415
            #
      
        416
            # HF exposes this as `MixtralForCausalLM`, but the current vendored
      
        417
            # llama.cpp converter routes it through the Llama path rather than a
      
        418
            # distinct Mixtral architecture class. We therefore keep
      
        419
            # `gguf_arch="llama"` while marking the modality as `text-moe` so
      
        420
            # DLM's gate substrate can detect the sparse-MoE family explicitly.
      
        421
            BaseModelSpec(
      
        422
                key="mixtral-8x7b-instruct",
      
        423
                hf_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
      
        424
                revision="eba92302a2861cdc0098cc54bc9f17cb2c47eb61",
      
        425
                architecture="MixtralForCausalLM",
      
        426
                params=46_700_000_000,
      
        427
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        428
                template="mistral",
      
        429
                gguf_arch="llama",
      
        430
                tokenizer_pre="llama-bpe",
      
        431
                license_spdx="Apache-2.0",
      
        432
                license_url="https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
      
        433
                requires_acceptance=False,
      
        434
                redistributable=True,
      
        435
                size_gb_fp16=93.4,
      
        436
                context_length=32_768,
      
        437
                recommended_seq_len=2048,
      
        438
                modality="text-moe",
      
        439
            ),
      
        440
            # Mistral Small 3.1 24B Instruct — Apache-2.0 multimodal base with
      
        441
            # native vision support and 128k context.
      
        442
            #
      
        443
            # An earlier draft treated this as text-only; the live HF config
      
        444
            # is `Mistral3ForConditionalGeneration` with both text and
      
        445
            # vision towers, so we register it as vision-language. The current
      
        446
            # processor config pins `[IMG]` as the image placeholder and a
      
        447
            # longest edge of 1540 px. DLM's current `VlPreprocessorPlan`
      
        448
            # abstraction is fixed-size only, so we conservatively pin
      
        449
            # 1540×1540 here until dynamic ranges land.
      
        450
            BaseModelSpec(
      
        451
                key="mistral-small-3.1-24b-instruct",
      
        452
                hf_id="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
      
        453
                revision="68faf511d618ef198fef186659617cfd2eb8e33a",
      
        454
                architecture="Mistral3ForConditionalGeneration",
      
        455
                params=24_000_000_000,
      
        456
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        457
                template="mistral",
      
        458
                gguf_arch="mistral3",
      
        459
                tokenizer_pre="tekken",
      
        460
                license_spdx="Apache-2.0",
      
        461
                license_url="https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503",
      
        462
                requires_acceptance=False,
      
        463
                redistributable=True,
      
        464
                size_gb_fp16=48.0,
      
        465
                context_length=131_072,
      
        466
                recommended_seq_len=4096,
      
        467
                modality="vision-language",
      
        468
                vl_preprocessor_plan=VlPreprocessorPlan(
      
        469
                    target_size=(1540, 1540),
      
        470
                    resize_policy="fixed",
      
        471
                    image_token="[IMG]",
      
        472
                    num_image_tokens=3025,
      
        473
                ),
      
        474
            ),
      
        475
            # --- Vision-language bases ----------------------------------------------
      
        476
            # PaliGemma-3B-mix-224 — Google's instruction-tuned VL base built on
      
        477
            # Gemma-2B + SigLIP-So400m. Gated under the Gemma license; cannot
      
        478
            # redistribute inside a `.dlm.pack` (same pattern as Llama-3.2).
      
        479
            # Training targets Gemma's transformer blocks; the vision tower is
      
        480
            # trained jointly when modules_to_save expands to ["embed_tokens",
      
        481
            # "lm_head"], but the current entry keeps modules_to_save empty so
      
        482
            # only the LLM-side LoRA adapters move — the vision tower is frozen.
      
        483
            #
      
        484
            # `gguf_arch` / `tokenizer_pre` are set to tags the current vendored
      
        485
            # llama.cpp doesn't recognize; the export probes surface
      
        486
            # UNSUPPORTED + refuse GGUF conversion until GGUF support lands.
      
        487
            # HF-snapshot export (`dlm export --hf-snapshot`) still works.
      
        488
            BaseModelSpec(
      
        489
                key="paligemma-3b-mix-224",
      
        490
                hf_id="google/paligemma-3b-mix-224",
      
        491
                revision="d1d8734c9c3ad0ccfeea4afc270faa356c2ba515",
      
        492
                architecture="PaliGemmaForConditionalGeneration",
      
        493
                params=2_900_000_000,
      
        494
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        495
                template="paligemma",
      
        496
                gguf_arch="paligemma",
      
        497
                tokenizer_pre="gemma",
      
        498
                license_spdx="Other",
      
        499
                license_url="https://ai.google.dev/gemma/terms",
      
        500
                requires_acceptance=True,
      
        501
                redistributable=False,
      
        502
                size_gb_fp16=6.5,
      
        503
                context_length=8_192,
      
        504
                recommended_seq_len=2048,
      
        505
                modality="vision-language",
      
        506
                vl_preprocessor_plan=VlPreprocessorPlan(
      
        507
                    target_size=(224, 224),
      
        508
                    resize_policy="fixed",
      
        509
                    image_token="<image>",
      
        510
                    num_image_tokens=256,
      
        511
                ),
      
        512
            ),
      
        513
            # Qwen2-VL-2B-Instruct — Alibaba's Apache-2.0 VL base with dynamic-
      
        514
            # resolution support in native HF. The current entry pins a
      
        515
            # conservative fixed 672×672 preprocessing plan to avoid growing
      
        516
            # the VlPreprocessorPlan abstraction for dynamic ranges yet; a
      
        517
            # future extension can add {min_pixels, max_pixels} when needed.
      
        518
            #
      
        519
            # 672×672 with Qwen2-VL's 28-pixel patch-merger grid yields 24×24 =
      
        520
            # 576 vision tokens per image. `<|image_pad|>` is the runtime
      
        521
            # placeholder the processor expands into that window.
      
        522
            #
      
        523
            # Apache-2.0 (redistributable, no acceptance). `AutoModelForImageTextToText`
      
        524
            # handles this arch natively since transformers ≥4.45 — same path
      
        525
            # PaliGemma loads through.
      
        526
            BaseModelSpec(
      
        527
                key="qwen2-vl-2b-instruct",
      
        528
                hf_id="Qwen/Qwen2-VL-2B-Instruct",
      
        529
                revision="895c3a49bc3fa70a340399125c650a463535e71c",
      
        530
                architecture="Qwen2VLForConditionalGeneration",
      
        531
                params=2_200_000_000,
      
        532
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        533
                template="qwen2-vl",
      
        534
                gguf_arch="qwen2-vl",
      
        535
                tokenizer_pre="qwen2",
      
        536
                license_spdx="Apache-2.0",
      
        537
                license_url="https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct/blob/main/LICENSE",
      
        538
                requires_acceptance=False,
      
        539
                redistributable=True,
      
        540
                size_gb_fp16=4.5,
      
        541
                context_length=32_768,
      
        542
                recommended_seq_len=2048,
      
        543
                modality="vision-language",
      
        544
                vl_preprocessor_plan=VlPreprocessorPlan(
      
        545
                    target_size=(672, 672),
      
        546
                    resize_policy="fixed",
      
        547
                    image_token="<|image_pad|>",
      
        548
                    num_image_tokens=576,
      
        549
                ),
      
        550
            ),
      
        551
            # InternVL2-2B — OpenGVLab's MIT-licensed 2B VL model. Uses fixed
      
        552
            # 448×448 input (32×32 patch grid with 2×2 pixel-shuffle → 256
      
        553
            # vision tokens per image).
      
        554
            #
      
        555
            # **Security surface: trust_remote_code=True**. InternVL2's HF
      
        556
            # integration is `InternVLChatModel`, a custom class defined in
      
        557
            # `modeling_internvl_chat.py` inside the model repo — not in
      
        558
            # transformers. Loading it requires executing that repo's code.
      
        559
            # The loader sets `trust_remote_code=True` when this spec is
      
        560
            # picked (`trust_remote_code` field below), so picking this base
      
        561
            # as `base_model: internvl2-2b` in a .dlm is the user's
      
        562
            # informed acknowledgment that remote code runs at load time.
      
        563
            # The cookbook + vl-memory.md flag this too.
      
        564
            BaseModelSpec(
      
        565
                key="internvl2-2b",
      
        566
                hf_id="OpenGVLab/InternVL2-2B",
      
        567
                revision="e4f6747bd20f139e637642c6a058c6bd00b36919",
      
        568
                architecture="InternVLChatModel",
      
        569
                params=2_200_000_000,
      
        570
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        571
                template="internvl2",
      
        572
                gguf_arch="internvl2",
      
        573
                tokenizer_pre="internvl2",
      
        574
                license_spdx="MIT",
      
        575
                license_url="https://huggingface.co/OpenGVLab/InternVL2-2B/blob/main/LICENSE",
      
        576
                requires_acceptance=False,
      
        577
                redistributable=True,
      
        578
                trust_remote_code=True,
      
        579
                size_gb_fp16=4.4,
      
        580
                context_length=8_192,
      
        581
                recommended_seq_len=2048,
      
        582
                modality="vision-language",
      
        583
                vl_preprocessor_plan=VlPreprocessorPlan(
      
        584
                    target_size=(448, 448),
      
        585
                    resize_policy="fixed",
      
        586
                    image_token="<IMG_CONTEXT>",
      
        587
                    num_image_tokens=256,
      
        588
                ),
      
        589
            ),
      
        590
            BaseModelSpec(
      
        591
                key="internvl3-2b",
      
        592
                hf_id="OpenGVLab/InternVL3-2B",
      
        593
                revision="899155015275a9b7338c7f4677e19c784e0e5a21",
      
        594
                architecture="InternVLChatModel",
      
        595
                params=2_000_000_000,
      
        596
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        597
                template="internvl2",
      
        598
                gguf_arch="internvl3",
      
        599
                tokenizer_pre="internvl3",
      
        600
                license_spdx="Apache-2.0",
      
        601
                license_url="https://huggingface.co/OpenGVLab/InternVL3-2B",
      
        602
                requires_acceptance=False,
      
        603
                redistributable=True,
      
        604
                trust_remote_code=True,
      
        605
                size_gb_fp16=4.0,
      
        606
                context_length=32_768,
      
        607
                recommended_seq_len=2048,
      
        608
                modality="vision-language",
      
        609
                vl_preprocessor_plan=VlPreprocessorPlan(
      
        610
                    target_size=(448, 448),
      
        611
                    resize_policy="dynamic",
      
        612
                    image_token="<image>",
      
        613
                    num_image_tokens=256,
      
        614
                ),
      
        615
            ),
      
        616
            # --- Audio-language bases -----------------------------------------------
      
        617
            # Qwen2-Audio-7B-Instruct — Alibaba's open audio-text model. Uses
      
        618
            # the Qwen2 LLM backbone + a dedicated audio encoder. Apache-2.0
      
        619
            # and currently ungated on HF, so the registry keeps it open and
      
        620
            # redistributable like the other permissive Qwen rows.
      
        621
            #
      
        622
            # The 16 kHz pin + 30 s max-length match the training-time
      
        623
            # defaults documented in the Qwen2-Audio card. Resampling support
      
        624
            # lands as follow-up work; current releases refuse mismatched
      
        625
            # sample rates with an actionable error at preprocess time.
      
        626
            #
      
        627
            # Placeholder SHA flagged the same way as paligemma — the weekly
      
        628
            # `scripts/refresh-registry.py --check` run surfaces drift and a
      
        629
            # maintainer pastes in the real SHA.
      
        630
            BaseModelSpec(
      
        631
                key="qwen2-audio-7b-instruct",
      
        632
                hf_id="Qwen/Qwen2-Audio-7B-Instruct",
      
        633
                revision="0a095220c30b7b31434169c3086508ef3ea5bf0a",
      
        634
                architecture="Qwen2AudioForConditionalGeneration",
      
        635
                params=8_400_000_000,
      
        636
                target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
      
        637
                template="qwen2-audio",
      
        638
                gguf_arch="qwen2-audio",
      
        639
                tokenizer_pre="qwen2",
      
        640
                license_spdx="Apache-2.0",
      
        641
                license_url="https://huggingface.co/Qwen/Qwen2-Audio-7B-Instruct",
      
        642
                requires_acceptance=False,
      
        643
                redistributable=True,
      
        644
                size_gb_fp16=15.5,
      
        645
                context_length=8_192,
      
        646
                recommended_seq_len=2048,
      
        647
                modality="audio-language",
      
        648
                audio_preprocessor_plan=AudioPreprocessorPlan(
      
        649
                    sample_rate=16_000,
      
        650
                    max_length_seconds=30.0,
      
        651
                    audio_token="<|AUDIO|>",
      
        652
                    num_audio_tokens=750,
      
        653
                ),
      
        654
            ),
      
        655
        )
      
        656
        
        657
        
        658
        BASE_MODELS: Final[dict[str, BaseModelSpec]] = {entry.key: entry for entry in _ENTRIES}
      
        659
        
        660
        
        661
        def known_keys() -> tuple[str, ...]:
      
        662
            """Stable ordering for use in error messages / CLI listings."""
      
        663
            return tuple(BASE_MODELS.keys())