documentlanguagemodel Public

Watch 0 Fork 0 Star 0

Python · 21772 bytes Raw Blame History

  
        1
        """Compatibility probes run against a `BaseModelSpec`.
      
        2
        
        3
        Each probe is an independent function returning `ProbeResult`. `run_all`
      
        4
        aggregates them into a `ProbeReport`. Probes must be non-destructive
      
        5
        (read-only) and offline-safe where possible — the refresh-registry
      
        6
        script exercises them online.
      
        7
        
        8
        Five probes:
      
        9
        
        10
        1. `probe_architecture` — `AutoConfig(hf_id).architectures[0]` matches
      
        11
           `spec.architecture`. Catches model-surgery mismatches and wrong
      
        12
           revisions.
      
        13
        2. `probe_chat_template` — tokenizer has a non-empty `chat_template`
      
        14
           attribute. Essential for Modelfile emission.
      
        15
        3. `probe_gguf_arch_supported` — scans the vendored
      
        16
           `convert_hf_to_gguf.py` for a `@Model.register("<arch>")` matching
      
        17
           `spec.gguf_arch`. If the vendored submodule is absent, the probe
      
        18
           skips with a clear message.
      
        19
        4. `probe_pretokenizer_label` — reads `vendor/llama_cpp_pretokenizer_hashes.json`
      
        20
           (populated by `scripts/bump-llama-cpp.sh`) and checks the spec's
      
        21
           `tokenizer_pre` is a known **label**. Silent drift here causes
      
        22
           silent GGUF export failures per findings §9; the probe catches it
      
        23
           early. This is the offline fast-check.
      
        24
        5. `probe_pretokenizer_hash` — real fingerprint check (see
      
        25
           CLAUDE.md pitfall #5). Tokenizes `_LLAMA_CPP_CHKTXT` and compares
      
        26
           the sha256 of the stringified token sequence against a vendored
      
        27
           per-label fingerprint table. Detects silent upstream tokenization
      
        28
           changes that the label probe would miss. Requires a local HF
      
        29
           cache; skipped cleanly otherwise.
      
        30
        
        31
        Heavy imports (`transformers.AutoConfig`, `AutoTokenizer`) happen
      
        32
        inside each probe so the module loads cheaply.
      
        33
        """
      
        34
        
        35
        from __future__ import annotations
      
        36
        
        37
        import json
      
        38
        import logging
      
        39
        import re
      
        40
        from pathlib import Path
      
        41
        from typing import Final
      
        42
        
        43
        from dlm.base_models.errors import GatedModelError, ProbeReport, ProbeResult
      
        44
        from dlm.base_models.schema import BaseModelSpec
      
        45
        
        46
        _LOG = logging.getLogger(__name__)
      
        47
        
        48
        # Vendored artifact locations.
      
        49
        _REPO_ROOT: Final[Path] = Path(__file__).resolve().parents[3]
      
        50
        VENDOR_LLAMA_CPP_DEFAULT: Final[Path] = _REPO_ROOT / "vendor" / "llama.cpp"
      
        51
        VENDOR_PRETOKENIZER_HASHES_DEFAULT: Final[Path] = (
      
        52
            _REPO_ROOT / "vendor" / "llama_cpp_pretokenizer_hashes.json"
      
        53
        )
      
        54
        VENDOR_PRETOKENIZER_FINGERPRINTS_DEFAULT: Final[Path] = (
      
        55
            _REPO_ROOT / "vendor" / "llama_cpp_pretokenizer_fingerprints.json"
      
        56
        )
      
        57
        
        58
        # The canonical test string llama.cpp uses at `convert_hf_to_gguf.py::
      
        59
        # get_vocab_base_pre`. Tokenize this under the model's BPE tokenizer,
      
        60
        # stringify the resulting token-id list, sha256 it — that digest is
      
        61
        # the fingerprint llama.cpp maps to one of its pre-tokenizer types.
      
        62
        # Keep verbatim; any edit here desynchronizes us from llama.cpp's
      
        63
        # identification logic (see CLAUDE.md pitfall #5).
      
        64
        _LLAMA_CPP_CHKTXT: Final[str] = (
      
        65
            "\n \n\n \n\n\n \t \t\t \t\n  \n   \n    \n     \n"
      
        66
            "🚀 (normal) 😶\u200d🌫️ (multiple emojis concatenated) ✅ "
      
        67
            "🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 "
      
        68
            "កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天～ "
      
        69
            "------======= нещо на Български '''''''```````\"\"\"\"......!!!!!!?????? "
      
        70
            "I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, "
      
        71
            "'D you like some tea? We'Ve a'lL"
      
        72
        )
      
        73
        
        74
        
        75
        # --- individual probes --------------------------------------------------------
      
        76
        
        77
        
        78
        def probe_architecture(spec: BaseModelSpec) -> ProbeResult:
      
        79
            """`AutoConfig.from_pretrained(hf_id, revision).architectures[0]` matches."""
      
        80
            try:
      
        81
                from huggingface_hub.errors import GatedRepoError
      
        82
                from transformers import AutoConfig
      
        83
            except ImportError as exc:  # pragma: no cover — dev env always has transformers
      
        84
                return ProbeResult(
      
        85
                    name="architecture",
      
        86
                    passed=True,
      
        87
                    detail=f"skipped: transformers unavailable ({exc})",
      
        88
                    skipped=True,
      
        89
                )
      
        90
        
        91
            try:
      
        92
                cfg = AutoConfig.from_pretrained(spec.hf_id, revision=spec.revision)
      
        93
            except GatedRepoError as exc:
      
        94
                raise GatedModelError(spec.hf_id, spec.license_url) from exc
      
        95
            except Exception as exc:
      
        96
                return ProbeResult(
      
        97
                    name="architecture",
      
        98
                    passed=False,
      
        99
                    detail=f"load failed: {type(exc).__name__}: {exc}",
      
        100
                )
      
        101
        
        102
            architectures = getattr(cfg, "architectures", None)
      
        103
            if not architectures:
      
        104
                return ProbeResult(
      
        105
                    name="architecture",
      
        106
                    passed=False,
      
        107
                    detail="config.json has no `architectures` entry",
      
        108
                )
      
        109
        
        110
            observed = architectures[0]
      
        111
            if observed != spec.architecture:
      
        112
                return ProbeResult(
      
        113
                    name="architecture",
      
        114
                    passed=False,
      
        115
                    detail=f"expected {spec.architecture!r}, got {observed!r}",
      
        116
                )
      
        117
            return ProbeResult(
      
        118
                name="architecture",
      
        119
                passed=True,
      
        120
                detail=f"matched {observed!r}",
      
        121
            )
      
        122
        
        123
        
        124
        def probe_chat_template(spec: BaseModelSpec) -> ProbeResult:
      
        125
            """Tokenizer carries a non-empty `chat_template` attribute."""
      
        126
            try:
      
        127
                from huggingface_hub.errors import GatedRepoError
      
        128
                from transformers import AutoTokenizer
      
        129
            except ImportError as exc:  # pragma: no cover
      
        130
                return ProbeResult(
      
        131
                    name="chat_template",
      
        132
                    passed=True,
      
        133
                    detail=f"skipped: transformers unavailable ({exc})",
      
        134
                    skipped=True,
      
        135
                )
      
        136
        
        137
            try:
      
        138
                tokenizer = AutoTokenizer.from_pretrained(spec.hf_id, revision=spec.revision)
      
        139
            except GatedRepoError as exc:
      
        140
                raise GatedModelError(spec.hf_id, spec.license_url) from exc
      
        141
            except Exception as exc:
      
        142
                return ProbeResult(
      
        143
                    name="chat_template",
      
        144
                    passed=False,
      
        145
                    detail=f"load failed: {type(exc).__name__}: {exc}",
      
        146
                )
      
        147
        
        148
            template = getattr(tokenizer, "chat_template", None)
      
        149
            if not template:
      
        150
                return ProbeResult(
      
        151
                    name="chat_template",
      
        152
                    passed=False,
      
        153
                    detail="tokenizer has no chat_template",
      
        154
                )
      
        155
            return ProbeResult(
      
        156
                name="chat_template",
      
        157
                passed=True,
      
        158
                detail=f"present ({len(template)} chars)",
      
        159
            )
      
        160
        
        161
        
        162
        def probe_gguf_arch_supported(
      
        163
            spec: BaseModelSpec,
      
        164
            *,
      
        165
            vendor_path: Path | None = None,
      
        166
        ) -> ProbeResult:
      
        167
            """Scan vendored ``convert_hf_to_gguf.py`` for
      
        168
            ``@Model.register("<gguf_arch>")`` or ``@ModelBase.register(...)``.
      
        169
        
        170
            If the vendored converter submodule is absent, this probe skips.
      
        171
            """
      
        172
            script = (vendor_path or VENDOR_LLAMA_CPP_DEFAULT) / "convert_hf_to_gguf.py"
      
        173
            if not script.exists():
      
        174
                return ProbeResult(
      
        175
                    name="gguf_arch",
      
        176
                    passed=True,
      
        177
                    detail=f"skipped: {script} not present (vendor/llama.cpp missing)",
      
        178
                    skipped=True,
      
        179
                )
      
        180
        
        181
            try:
      
        182
                source = script.read_text(encoding="utf-8", errors="replace")
      
        183
            except OSError as exc:
      
        184
                return ProbeResult(
      
        185
                    name="gguf_arch",
      
        186
                    passed=False,
      
        187
                    detail=f"read failed: {exc}",
      
        188
                )
      
        189
        
        190
            # llama.cpp's converter registers HF architecture class names via
      
        191
            # ``@ModelBase.register("Qwen3ForCausalLM", "Qwen3Model", ...)`` (the
      
        192
            # class was renamed from ``@Model.register`` mid-2024; we accept both
      
        193
            # forms so this probe stays tolerant if the vendored copy is ever
      
        194
            # pinned to an older tag). A single decorator may list *multiple*
      
        195
            # architectures, so we capture the full parenthesized arg list and
      
        196
            # then extract every quoted string from it.
      
        197
            decorator_re = re.compile(r"""@(?:Model|ModelBase)\.register\(([^)]*)\)""")
      
        198
            arg_string_re = re.compile(r"""["']([^"']+)["']""")
      
        199
            found_archs: set[str] = set()
      
        200
            for args in decorator_re.findall(source):
      
        201
                found_archs.update(arg_string_re.findall(args))
      
        202
            # Compare against the HF architecture (what the decorator actually
      
        203
            # registers), not the short gguf label. Historically the probe
      
        204
            # compared ``spec.gguf_arch`` — a silent false-negative, because
      
        205
            # llama.cpp registers ``"Qwen2ForCausalLM"`` not ``"qwen2"``; the
      
        206
            # probe only passed for registered models, which bypass this code
      
        207
            # path entirely.
      
        208
            if spec.architecture in found_archs:
      
        209
                return ProbeResult(
      
        210
                    name="gguf_arch",
      
        211
                    passed=True,
      
        212
                    detail=f"converter registers {spec.architecture!r}",
      
        213
                )
      
        214
            return ProbeResult(
      
        215
                name="gguf_arch",
      
        216
                passed=False,
      
        217
                detail=(
      
        218
                    f"{spec.architecture!r} not in convert_hf_to_gguf.py "
      
        219
                    f"(scanned {len(found_archs)} registrations)"
      
        220
                ),
      
        221
            )
      
        222
        
        223
        
        224
        def probe_pretokenizer_label(
      
        225
            spec: BaseModelSpec,
      
        226
            *,
      
        227
            hashes_path: Path | None = None,
      
        228
        ) -> ProbeResult:
      
        229
            """Check `spec.tokenizer_pre` is a known pre-tokenizer label.
      
        230
        
        231
            The vendored table is a JSON array of label strings that llama.cpp
      
        232
            recognizes in `get_vocab_base_pre()`. Missing table → skip.
      
        233
        
        234
            NOTE: this is a *label* probe, not a hash probe.
      
        235
            `probe_pretokenizer_hash` is the canonical fingerprint check; this
      
        236
            probe only checks coarse compatibility via the label.
      
        237
            """
      
        238
            path = hashes_path or VENDOR_PRETOKENIZER_HASHES_DEFAULT
      
        239
            if not path.exists():
      
        240
                return ProbeResult(
      
        241
                    name="pretokenizer_label",
      
        242
                    passed=True,
      
        243
                    detail=f"skipped: {path} not present (bump-llama-cpp.sh maintains it)",
      
        244
                    skipped=True,
      
        245
                )
      
        246
        
        247
            try:
      
        248
                labels = set(json.loads(path.read_text(encoding="utf-8")))
      
        249
            except (OSError, json.JSONDecodeError) as exc:
      
        250
                return ProbeResult(
      
        251
                    name="pretokenizer_label",
      
        252
                    passed=False,
      
        253
                    detail=f"table unreadable: {exc}",
      
        254
                )
      
        255
            except TypeError as exc:
      
        256
                return ProbeResult(
      
        257
                    name="pretokenizer_label",
      
        258
                    passed=False,
      
        259
                    detail=f"table has wrong shape (expected list[str]): {exc}",
      
        260
                )
      
        261
        
        262
            if spec.tokenizer_pre in labels:
      
        263
                return ProbeResult(
      
        264
                    name="pretokenizer_label",
      
        265
                    passed=True,
      
        266
                    detail=f"{spec.tokenizer_pre!r} known to llama.cpp",
      
        267
                )
      
        268
            return ProbeResult(
      
        269
                name="pretokenizer_label",
      
        270
                passed=False,
      
        271
                detail=(
      
        272
                    f"{spec.tokenizer_pre!r} not in vendored label table; "
      
        273
                    "run scripts/bump-llama-cpp.sh or pick another base"
      
        274
                ),
      
        275
            )
      
        276
        
        277
        
        278
        def probe_pretokenizer_hash(
      
        279
            spec: BaseModelSpec,
      
        280
            *,
      
        281
            fingerprints_path: Path | None = None,
      
        282
        ) -> ProbeResult:
      
        283
            """Compute the real llama.cpp pre-tokenizer fingerprint and compare.
      
        284
        
        285
            See CLAUDE.md pitfall #5. The label probe (above) only checks
      
        286
            membership in a string table; llama.cpp itself identifies the
      
        287
            pre-tokenizer by sha256-hashing the token-id sequence produced by
      
        288
            tokenizing a stable test string (`_LLAMA_CPP_CHKTXT`). We do the
      
        289
            same here — if the upstream tokenizer changes behavior (new
      
        290
            revision, silently different merges), the fingerprint drifts and
      
        291
            this probe fails loudly *before* a broken GGUF reaches Ollama.
      
        292
        
        293
            The fingerprint table at
      
        294
            `vendor/llama_cpp_pretokenizer_fingerprints.json` is maintained by
      
        295
            `scripts/bump-llama-cpp.sh`. Missing table or no entry for the
      
        296
            spec's `tokenizer_pre` label → skip (the label probe still runs).
      
        297
        
        298
            Requires a local HF cache (`local_files_only=True`); skipped
      
        299
            cleanly in CI environments without the tokenizer downloaded.
      
        300
            """
      
        301
            import hashlib
      
        302
        
        303
            path = fingerprints_path or VENDOR_PRETOKENIZER_FINGERPRINTS_DEFAULT
      
        304
            if not path.exists():
      
        305
                return ProbeResult(
      
        306
                    name="pretokenizer_hash",
      
        307
                    passed=True,
      
        308
                    detail=f"skipped: {path} not present (bump-llama-cpp.sh maintains it)",
      
        309
                    skipped=True,
      
        310
                )
      
        311
        
        312
            try:
      
        313
                table = json.loads(path.read_text(encoding="utf-8"))
      
        314
            except (OSError, json.JSONDecodeError) as exc:
      
        315
                return ProbeResult(
      
        316
                    name="pretokenizer_hash",
      
        317
                    passed=False,
      
        318
                    detail=f"fingerprint table unreadable: {exc}",
      
        319
                )
      
        320
            if not isinstance(table, dict):
      
        321
                return ProbeResult(
      
        322
                    name="pretokenizer_hash",
      
        323
                    passed=False,
      
        324
                    detail="fingerprint table has wrong shape (expected {label: sha256})",
      
        325
                )
      
        326
        
        327
            expected = table.get(spec.tokenizer_pre)
      
        328
            if not isinstance(expected, str):
      
        329
                return ProbeResult(
      
        330
                    name="pretokenizer_hash",
      
        331
                    passed=True,
      
        332
                    detail=(
      
        333
                        f"skipped: no fingerprint recorded for {spec.tokenizer_pre!r}; "
      
        334
                        "run scripts/bump-llama-cpp.sh to refresh the table"
      
        335
                    ),
      
        336
                    skipped=True,
      
        337
                )
      
        338
        
        339
            try:
      
        340
                from huggingface_hub.errors import GatedRepoError
      
        341
                from transformers import AutoTokenizer
      
        342
            except ImportError as exc:  # pragma: no cover — dev env always has transformers
      
        343
                return ProbeResult(
      
        344
                    name="pretokenizer_hash",
      
        345
                    passed=True,
      
        346
                    detail=f"skipped: transformers unavailable ({exc})",
      
        347
                    skipped=True,
      
        348
                )
      
        349
        
        350
            try:
      
        351
                tok = AutoTokenizer.from_pretrained(
      
        352
                    spec.hf_id, revision=spec.revision, local_files_only=True
      
        353
                )
      
        354
            except GatedRepoError as exc:
      
        355
                raise GatedModelError(spec.hf_id, spec.license_url) from exc
      
        356
            except Exception as exc:
      
        357
                # Not a probe *failure* — tokenizer simply isn't cached locally.
      
        358
                # Online refresh-registry runs will exercise the real check.
      
        359
                return ProbeResult(
      
        360
                    name="pretokenizer_hash",
      
        361
                    passed=True,
      
        362
                    detail=f"skipped: cannot load tokenizer offline ({type(exc).__name__})",
      
        363
                    skipped=True,
      
        364
                )
      
        365
        
        366
            try:
      
        367
                tokens = tok.encode(_LLAMA_CPP_CHKTXT)
      
        368
            except Exception as exc:
      
        369
                return ProbeResult(
      
        370
                    name="pretokenizer_hash",
      
        371
                    passed=False,
      
        372
                    detail=f"tokenizer.encode failed on chktxt: {type(exc).__name__}: {exc}",
      
        373
                )
      
        374
        
        375
            digest = hashlib.sha256(str(tokens).encode()).hexdigest()
      
        376
            if digest != expected:
      
        377
                return ProbeResult(
      
        378
                    name="pretokenizer_hash",
      
        379
                    passed=False,
      
        380
                    detail=(
      
        381
                        f"pre-tokenizer drifted for {spec.tokenizer_pre!r}: "
      
        382
                        f"expected {expected[:12]}…, got {digest[:12]}…. "
      
        383
                        "Upstream may have changed tokenization; re-pin revision "
      
        384
                        "or run scripts/bump-llama-cpp.sh to refresh the fingerprint."
      
        385
                    ),
      
        386
                )
      
        387
            return ProbeResult(
      
        388
                name="pretokenizer_hash",
      
        389
                passed=True,
      
        390
                detail=f"fingerprint matches {spec.tokenizer_pre!r} ({digest[:12]}…)",
      
        391
            )
      
        392
        
        393
        
        394
        def probe_vl_image_token(spec: BaseModelSpec) -> ProbeResult:
      
        395
            """Verify the processor exposes the spec's image-placeholder token.
      
        396
        
        397
            For `modality="vision-language"` bases the preprocessor plan pins
      
        398
            `image_token` (e.g. `"<image>"`). `AutoProcessor.from_pretrained`
      
        399
            must expose it as a known additional-special token — otherwise
      
        400
            mixed-row collation can't expand the placeholder into the model's
      
        401
            fixed `num_image_tokens` slots and training silently runs on
      
        402
            text-only rows.
      
        403
        
        404
            Non-VL bases skip this probe cleanly.
      
        405
            """
      
        406
            if spec.modality != "vision-language" or spec.vl_preprocessor_plan is None:
      
        407
                return ProbeResult(
      
        408
                    name="vl_image_token",
      
        409
                    passed=True,
      
        410
                    detail="skipped: spec is not a vision-language base",
      
        411
                    skipped=True,
      
        412
                )
      
        413
        
        414
            try:
      
        415
                from huggingface_hub.errors import GatedRepoError
      
        416
        
        417
                from dlm.base_models._typed_shims import load_auto_processor
      
        418
            except ImportError as exc:  # pragma: no cover
      
        419
                return ProbeResult(
      
        420
                    name="vl_image_token",
      
        421
                    passed=True,
      
        422
                    detail=f"skipped: transformers unavailable ({exc})",
      
        423
                    skipped=True,
      
        424
                )
      
        425
        
        426
            try:
      
        427
                processor = load_auto_processor(spec.hf_id, revision=spec.revision)
      
        428
            except GatedRepoError as exc:
      
        429
                raise GatedModelError(spec.hf_id, spec.license_url) from exc
      
        430
            except Exception as exc:
      
        431
                return ProbeResult(
      
        432
                    name="vl_image_token",
      
        433
                    passed=False,
      
        434
                    detail=f"processor load failed: {type(exc).__name__}: {exc}",
      
        435
                )
      
        436
        
        437
            # AutoProcessor wraps a tokenizer on `.tokenizer`. The image
      
        438
            # placeholder must tokenize to a *single* known token — otherwise
      
        439
            # the collator can't locate the insertion points deterministically.
      
        440
            placeholder = spec.vl_preprocessor_plan.image_token
      
        441
            tokenizer = getattr(processor, "tokenizer", None)
      
        442
            if tokenizer is None:
      
        443
                return ProbeResult(
      
        444
                    name="vl_image_token",
      
        445
                    passed=False,
      
        446
                    detail="processor has no `.tokenizer` attribute",
      
        447
                )
      
        448
            try:
      
        449
                token_ids = tokenizer.encode(placeholder, add_special_tokens=False)
      
        450
            except Exception as exc:
      
        451
                return ProbeResult(
      
        452
                    name="vl_image_token",
      
        453
                    passed=False,
      
        454
                    detail=f"tokenizer rejected placeholder {placeholder!r}: {exc}",
      
        455
                )
      
        456
            if len(token_ids) != 1:
      
        457
                return ProbeResult(
      
        458
                    name="vl_image_token",
      
        459
                    passed=False,
      
        460
                    detail=(
      
        461
                        f"placeholder {placeholder!r} tokenized to {len(token_ids)} tokens (expected 1)"
      
        462
                    ),
      
        463
                )
      
        464
            return ProbeResult(
      
        465
                name="vl_image_token",
      
        466
                passed=True,
      
        467
                detail=f"placeholder {placeholder!r} resolves to token id {token_ids[0]}",
      
        468
            )
      
        469
        
        470
        
        471
        def probe_audio_token(spec: BaseModelSpec) -> ProbeResult:
      
        472
            """Verify the processor exposes the spec's audio-placeholder token.
      
        473
        
        474
            Parallel to `probe_vl_image_token` — for `modality="audio-language"`
      
        475
            bases the preprocessor plan pins `audio_token` (e.g. `"<|AUDIO|>"`).
      
        476
            `AutoProcessor.from_pretrained` must expose it as a single known
      
        477
            token; otherwise the custom audio collator can't locate the
      
        478
            insertion point when expanding the placeholder into the model's
      
        479
            fixed audio-token window.
      
        480
        
        481
            Non-audio bases skip this probe cleanly.
      
        482
            """
      
        483
            if spec.modality != "audio-language" or spec.audio_preprocessor_plan is None:
      
        484
                return ProbeResult(
      
        485
                    name="audio_token",
      
        486
                    passed=True,
      
        487
                    detail="skipped: spec is not an audio-language base",
      
        488
                    skipped=True,
      
        489
                )
      
        490
        
        491
            try:
      
        492
                from huggingface_hub.errors import GatedRepoError
      
        493
        
        494
                from dlm.base_models._typed_shims import load_auto_processor
      
        495
            except ImportError as exc:  # pragma: no cover
      
        496
                return ProbeResult(
      
        497
                    name="audio_token",
      
        498
                    passed=True,
      
        499
                    detail=f"skipped: transformers unavailable ({exc})",
      
        500
                    skipped=True,
      
        501
                )
      
        502
        
        503
            try:
      
        504
                processor = load_auto_processor(spec.hf_id, revision=spec.revision)
      
        505
            except GatedRepoError as exc:
      
        506
                raise GatedModelError(spec.hf_id, spec.license_url) from exc
      
        507
            except Exception as exc:
      
        508
                return ProbeResult(
      
        509
                    name="audio_token",
      
        510
                    passed=False,
      
        511
                    detail=f"processor load failed: {type(exc).__name__}: {exc}",
      
        512
                )
      
        513
        
        514
            placeholder = spec.audio_preprocessor_plan.audio_token
      
        515
            tokenizer = getattr(processor, "tokenizer", None)
      
        516
            if tokenizer is None:
      
        517
                return ProbeResult(
      
        518
                    name="audio_token",
      
        519
                    passed=False,
      
        520
                    detail="processor has no `.tokenizer` attribute",
      
        521
                )
      
        522
            try:
      
        523
                token_ids = tokenizer.encode(placeholder, add_special_tokens=False)
      
        524
            except Exception as exc:
      
        525
                return ProbeResult(
      
        526
                    name="audio_token",
      
        527
                    passed=False,
      
        528
                    detail=f"tokenizer rejected placeholder {placeholder!r}: {exc}",
      
        529
                )
      
        530
            if len(token_ids) != 1:
      
        531
                return ProbeResult(
      
        532
                    name="audio_token",
      
        533
                    passed=False,
      
        534
                    detail=(
      
        535
                        f"placeholder {placeholder!r} tokenized to {len(token_ids)} tokens (expected 1)"
      
        536
                    ),
      
        537
                )
      
        538
            return ProbeResult(
      
        539
                name="audio_token",
      
        540
                passed=True,
      
        541
                detail=f"placeholder {placeholder!r} resolves to token id {token_ids[0]}",
      
        542
            )
      
        543
        
        544
        
        545
        # --- aggregate ---------------------------------------------------------------
      
        546
        
        547
        
        548
        def run_all(spec: BaseModelSpec, *, skip_export_probes: bool = False) -> ProbeReport:
      
        549
            """Run every probe; aggregate into a `ProbeReport`.
      
        550
        
        551
            `GatedModelError` from an individual probe propagates immediately —
      
        552
            it's not a "probe failure" in the registry-drift sense; it's an
      
        553
            acceptance-flow signal.
      
        554
        
        555
            `skip_export_probes=True` drops the three llama.cpp / GGUF-conversion
      
        556
            checks (`gguf_arch_supported`, `pretokenizer_label`,
      
        557
            `pretokenizer_hash`). Users opt into this when they want training
      
        558
            + HF inference on a base whose architecture ships faster than our
      
        559
            vendored llama.cpp can absorb (e.g. brand-new Qwen3 on a llama.cpp
      
        560
            pin from last month). They forfeit `dlm export` to Ollama until
      
        561
            the vendored copy catches up. VL bases auto-opt-out of export
      
        562
            probes because current GGUF export does not support them.
      
        563
            """
      
        564
            from dlm.modality import modality_for
      
        565
        
        566
            dispatch = modality_for(spec)
      
        567
            core: tuple[ProbeResult, ...] = (probe_architecture(spec),)
      
        568
            if dispatch.accepts_images:
      
        569
                core = (*core, probe_vl_image_token(spec))
      
        570
            elif dispatch.accepts_audio:
      
        571
                core = (*core, probe_audio_token(spec))
      
        572
            else:
      
        573
                core = (*core, probe_chat_template(spec))
      
        574
        
        575
            # Media bases (VL + audio) bypass the llama.cpp-converter probes.
      
        576
            # The export path refuses GGUF cleanly for both and emits an HF
      
        577
            # snapshot instead.
      
        578
            is_media = dispatch.requires_processor
      
        579
            if skip_export_probes or is_media:
      
        580
                return ProbeReport(hf_id=spec.hf_id, results=core)
      
        581
            results = (
      
        582
                *core,
      
        583
                probe_gguf_arch_supported(spec),
      
        584
                probe_pretokenizer_label(spec),
      
        585
                probe_pretokenizer_hash(spec),
      
        586
            )
      
        587
            return ProbeReport(hf_id=spec.hf_id, results=results)

1	"""Compatibility probes run against a `BaseModelSpec`.
2
3	Each probe is an independent function returning `ProbeResult`. `run_all`
4	aggregates them into a `ProbeReport`. Probes must be non-destructive
5	(read-only) and offline-safe where possible — the refresh-registry
6	script exercises them online.
7
8	Five probes:
9
10	1. `probe_architecture` — `AutoConfig(hf_id).architectures[0]` matches
11	`spec.architecture`. Catches model-surgery mismatches and wrong
12	revisions.
13	2. `probe_chat_template` — tokenizer has a non-empty `chat_template`
14	attribute. Essential for Modelfile emission.
15	3. `probe_gguf_arch_supported` — scans the vendored
16	`convert_hf_to_gguf.py` for a `@Model.register("<arch>")` matching
17	`spec.gguf_arch`. If the vendored submodule is absent, the probe
18	skips with a clear message.
19	4. `probe_pretokenizer_label` — reads `vendor/llama_cpp_pretokenizer_hashes.json`
20	(populated by `scripts/bump-llama-cpp.sh`) and checks the spec's
21	`tokenizer_pre` is a known label. Silent drift here causes
22	silent GGUF export failures per findings §9; the probe catches it
23	early. This is the offline fast-check.
24	5. `probe_pretokenizer_hash` — real fingerprint check (see
25	CLAUDE.md pitfall #5). Tokenizes `_LLAMA_CPP_CHKTXT` and compares
26	the sha256 of the stringified token sequence against a vendored
27	per-label fingerprint table. Detects silent upstream tokenization
28	changes that the label probe would miss. Requires a local HF
29	cache; skipped cleanly otherwise.
30
31	Heavy imports (`transformers.AutoConfig`, `AutoTokenizer`) happen
32	inside each probe so the module loads cheaply.
33	"""
34
35	from __future__ import annotations
36
37	import json
38	import logging
39	import re
40	from pathlib import Path
41	from typing import Final
42
43	from dlm.base_models.errors import GatedModelError, ProbeReport, ProbeResult
44	from dlm.base_models.schema import BaseModelSpec
45
46	_LOG = logging.getLogger(__name__)
47
48	# Vendored artifact locations.
49	_REPO_ROOT: Final[Path] = Path(__file__).resolve().parents[3]
50	VENDOR_LLAMA_CPP_DEFAULT: Final[Path] = _REPO_ROOT / "vendor" / "llama.cpp"
51	VENDOR_PRETOKENIZER_HASHES_DEFAULT: Final[Path] = (
52	_REPO_ROOT / "vendor" / "llama_cpp_pretokenizer_hashes.json"
53	)
54	VENDOR_PRETOKENIZER_FINGERPRINTS_DEFAULT: Final[Path] = (
55	_REPO_ROOT / "vendor" / "llama_cpp_pretokenizer_fingerprints.json"
56	)
57
58	# The canonical test string llama.cpp uses at `convert_hf_to_gguf.py::
59	# get_vocab_base_pre`. Tokenize this under the model's BPE tokenizer,
60	# stringify the resulting token-id list, sha256 it — that digest is
61	# the fingerprint llama.cpp maps to one of its pre-tokenizer types.
62	# Keep verbatim; any edit here desynchronizes us from llama.cpp's
63	# identification logic (see CLAUDE.md pitfall #5).
64	_LLAMA_CPP_CHKTXT: Final[str] = (
65	"\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n"
66	"🚀 (normal) 😶\u200d🌫️ (multiple emojis concatenated) ✅ "
67	"🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 "
68	"កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天～ "
69	"------======= нещо на Български '''''''```````\"\"\"\"......!!!!!!?????? "
70	"I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, "
71	"'D you like some tea? We'Ve a'lL"
72	)
73
74
75	# --- individual probes --------------------------------------------------------
76
77
78	def probe_architecture(spec: BaseModelSpec) -> ProbeResult:
79	"""`AutoConfig.from_pretrained(hf_id, revision).architectures[0]` matches."""
80	try:
81	from huggingface_hub.errors import GatedRepoError
82	from transformers import AutoConfig
83	except ImportError as exc: # pragma: no cover — dev env always has transformers
84	return ProbeResult(
85	name="architecture",
86	passed=True,
87	detail=f"skipped: transformers unavailable ({exc})",
88	skipped=True,
89	)
90
91	try:
92	cfg = AutoConfig.from_pretrained(spec.hf_id, revision=spec.revision)
93	except GatedRepoError as exc:
94	raise GatedModelError(spec.hf_id, spec.license_url) from exc
95	except Exception as exc:
96	return ProbeResult(
97	name="architecture",
98	passed=False,
99	detail=f"load failed: {type(exc).__name__}: {exc}",
100	)
101
102	architectures = getattr(cfg, "architectures", None)
103	if not architectures:
104	return ProbeResult(
105	name="architecture",
106	passed=False,
107	detail="config.json has no `architectures` entry",
108	)
109
110	observed = architectures[0]
111	if observed != spec.architecture:
112	return ProbeResult(
113	name="architecture",
114	passed=False,
115	detail=f"expected {spec.architecture!r}, got {observed!r}",
116	)
117	return ProbeResult(
118	name="architecture",
119	passed=True,
120	detail=f"matched {observed!r}",
121	)
122
123
124	def probe_chat_template(spec: BaseModelSpec) -> ProbeResult:
125	"""Tokenizer carries a non-empty `chat_template` attribute."""
126	try:
127	from huggingface_hub.errors import GatedRepoError
128	from transformers import AutoTokenizer
129	except ImportError as exc: # pragma: no cover
130	return ProbeResult(
131	name="chat_template",
132	passed=True,
133	detail=f"skipped: transformers unavailable ({exc})",
134	skipped=True,
135	)
136
137	try:
138	tokenizer = AutoTokenizer.from_pretrained(spec.hf_id, revision=spec.revision)
139	except GatedRepoError as exc:
140	raise GatedModelError(spec.hf_id, spec.license_url) from exc
141	except Exception as exc:
142	return ProbeResult(
143	name="chat_template",
144	passed=False,
145	detail=f"load failed: {type(exc).__name__}: {exc}",
146	)
147
148	template = getattr(tokenizer, "chat_template", None)
149	if not template:
150	return ProbeResult(
151	name="chat_template",
152	passed=False,
153	detail="tokenizer has no chat_template",
154	)
155	return ProbeResult(
156	name="chat_template",
157	passed=True,
158	detail=f"present ({len(template)} chars)",
159	)
160
161
162	def probe_gguf_arch_supported(
163	spec: BaseModelSpec,
164	*,
165	vendor_path: Path \| None = None,
166	) -> ProbeResult:
167	"""Scan vendored ``convert_hf_to_gguf.py`` for
168	``@Model.register("<gguf_arch>")`` or ``@ModelBase.register(...)``.
169
170	If the vendored converter submodule is absent, this probe skips.
171	"""
172	script = (vendor_path or VENDOR_LLAMA_CPP_DEFAULT) / "convert_hf_to_gguf.py"
173	if not script.exists():
174	return ProbeResult(
175	name="gguf_arch",
176	passed=True,
177	detail=f"skipped: {script} not present (vendor/llama.cpp missing)",
178	skipped=True,
179	)
180
181	try:
182	source = script.read_text(encoding="utf-8", errors="replace")
183	except OSError as exc:
184	return ProbeResult(
185	name="gguf_arch",
186	passed=False,
187	detail=f"read failed: {exc}",
188	)
189
190	# llama.cpp's converter registers HF architecture class names via
191	# ``@ModelBase.register("Qwen3ForCausalLM", "Qwen3Model", ...)`` (the
192	# class was renamed from ``@Model.register`` mid-2024; we accept both
193	# forms so this probe stays tolerant if the vendored copy is ever
194	# pinned to an older tag). A single decorator may list multiple
195	# architectures, so we capture the full parenthesized arg list and
196	# then extract every quoted string from it.
197	decorator_re = re.compile(r"""@(?:Model\|ModelBase)\.register\(([^)]*)\)""")
198	arg_string_re = re.compile(r"""["']([^"']+)["']""")
199	found_archs: set[str] = set()
200	for args in decorator_re.findall(source):
201	found_archs.update(arg_string_re.findall(args))
202	# Compare against the HF architecture (what the decorator actually
203	# registers), not the short gguf label. Historically the probe
204	# compared ``spec.gguf_arch`` — a silent false-negative, because
205	# llama.cpp registers ``"Qwen2ForCausalLM"`` not ``"qwen2"``; the
206	# probe only passed for registered models, which bypass this code
207	# path entirely.
208	if spec.architecture in found_archs:
209	return ProbeResult(
210	name="gguf_arch",
211	passed=True,
212	detail=f"converter registers {spec.architecture!r}",
213	)
214	return ProbeResult(
215	name="gguf_arch",
216	passed=False,
217	detail=(
218	f"{spec.architecture!r} not in convert_hf_to_gguf.py "
219	f"(scanned {len(found_archs)} registrations)"
220	),
221	)
222
223
224	def probe_pretokenizer_label(
225	spec: BaseModelSpec,
226	*,
227	hashes_path: Path \| None = None,
228	) -> ProbeResult:
229	"""Check `spec.tokenizer_pre` is a known pre-tokenizer label.
230
231	The vendored table is a JSON array of label strings that llama.cpp
232	recognizes in `get_vocab_base_pre()`. Missing table → skip.
233
234	NOTE: this is a label probe, not a hash probe.
235	`probe_pretokenizer_hash` is the canonical fingerprint check; this
236	probe only checks coarse compatibility via the label.
237	"""
238	path = hashes_path or VENDOR_PRETOKENIZER_HASHES_DEFAULT
239	if not path.exists():
240	return ProbeResult(
241	name="pretokenizer_label",
242	passed=True,
243	detail=f"skipped: {path} not present (bump-llama-cpp.sh maintains it)",
244	skipped=True,
245	)
246
247	try:
248	labels = set(json.loads(path.read_text(encoding="utf-8")))
249	except (OSError, json.JSONDecodeError) as exc:
250	return ProbeResult(
251	name="pretokenizer_label",
252	passed=False,
253	detail=f"table unreadable: {exc}",
254	)
255	except TypeError as exc:
256	return ProbeResult(
257	name="pretokenizer_label",
258	passed=False,
259	detail=f"table has wrong shape (expected list[str]): {exc}",
260	)
261
262	if spec.tokenizer_pre in labels:
263	return ProbeResult(
264	name="pretokenizer_label",
265	passed=True,
266	detail=f"{spec.tokenizer_pre!r} known to llama.cpp",
267	)
268	return ProbeResult(
269	name="pretokenizer_label",
270	passed=False,
271	detail=(
272	f"{spec.tokenizer_pre!r} not in vendored label table; "
273	"run scripts/bump-llama-cpp.sh or pick another base"
274	),
275	)
276
277
278	def probe_pretokenizer_hash(
279	spec: BaseModelSpec,
280	*,
281	fingerprints_path: Path \| None = None,
282	) -> ProbeResult:
283	"""Compute the real llama.cpp pre-tokenizer fingerprint and compare.
284
285	See CLAUDE.md pitfall #5. The label probe (above) only checks
286	membership in a string table; llama.cpp itself identifies the
287	pre-tokenizer by sha256-hashing the token-id sequence produced by
288	tokenizing a stable test string (`_LLAMA_CPP_CHKTXT`). We do the
289	same here — if the upstream tokenizer changes behavior (new
290	revision, silently different merges), the fingerprint drifts and
291	this probe fails loudly before a broken GGUF reaches Ollama.
292
293	The fingerprint table at
294	`vendor/llama_cpp_pretokenizer_fingerprints.json` is maintained by
295	`scripts/bump-llama-cpp.sh`. Missing table or no entry for the
296	spec's `tokenizer_pre` label → skip (the label probe still runs).
297
298	Requires a local HF cache (`local_files_only=True`); skipped
299	cleanly in CI environments without the tokenizer downloaded.
300	"""
301	import hashlib
302
303	path = fingerprints_path or VENDOR_PRETOKENIZER_FINGERPRINTS_DEFAULT
304	if not path.exists():
305	return ProbeResult(
306	name="pretokenizer_hash",
307	passed=True,
308	detail=f"skipped: {path} not present (bump-llama-cpp.sh maintains it)",
309	skipped=True,
310	)
311
312	try:
313	table = json.loads(path.read_text(encoding="utf-8"))
314	except (OSError, json.JSONDecodeError) as exc:
315	return ProbeResult(
316	name="pretokenizer_hash",
317	passed=False,
318	detail=f"fingerprint table unreadable: {exc}",
319	)
320	if not isinstance(table, dict):
321	return ProbeResult(
322	name="pretokenizer_hash",
323	passed=False,
324	detail="fingerprint table has wrong shape (expected {label: sha256})",
325	)
326
327	expected = table.get(spec.tokenizer_pre)
328	if not isinstance(expected, str):
329	return ProbeResult(
330	name="pretokenizer_hash",
331	passed=True,
332	detail=(
333	f"skipped: no fingerprint recorded for {spec.tokenizer_pre!r}; "
334	"run scripts/bump-llama-cpp.sh to refresh the table"
335	),
336	skipped=True,
337	)
338
339	try:
340	from huggingface_hub.errors import GatedRepoError
341	from transformers import AutoTokenizer
342	except ImportError as exc: # pragma: no cover — dev env always has transformers
343	return ProbeResult(
344	name="pretokenizer_hash",
345	passed=True,
346	detail=f"skipped: transformers unavailable ({exc})",
347	skipped=True,
348	)
349
350	try:
351	tok = AutoTokenizer.from_pretrained(
352	spec.hf_id, revision=spec.revision, local_files_only=True
353	)
354	except GatedRepoError as exc:
355	raise GatedModelError(spec.hf_id, spec.license_url) from exc
356	except Exception as exc:
357	# Not a probe failure — tokenizer simply isn't cached locally.
358	# Online refresh-registry runs will exercise the real check.
359	return ProbeResult(
360	name="pretokenizer_hash",
361	passed=True,
362	detail=f"skipped: cannot load tokenizer offline ({type(exc).__name__})",
363	skipped=True,
364	)
365
366	try:
367	tokens = tok.encode(_LLAMA_CPP_CHKTXT)
368	except Exception as exc:
369	return ProbeResult(
370	name="pretokenizer_hash",
371	passed=False,
372	detail=f"tokenizer.encode failed on chktxt: {type(exc).__name__}: {exc}",
373	)
374
375	digest = hashlib.sha256(str(tokens).encode()).hexdigest()
376	if digest != expected:
377	return ProbeResult(
378	name="pretokenizer_hash",
379	passed=False,
380	detail=(
381	f"pre-tokenizer drifted for {spec.tokenizer_pre!r}: "
382	f"expected {expected[:12]}…, got {digest[:12]}…. "
383	"Upstream may have changed tokenization; re-pin revision "
384	"or run scripts/bump-llama-cpp.sh to refresh the fingerprint."
385	),
386	)
387	return ProbeResult(
388	name="pretokenizer_hash",
389	passed=True,
390	detail=f"fingerprint matches {spec.tokenizer_pre!r} ({digest[:12]}…)",
391	)
392
393
394	def probe_vl_image_token(spec: BaseModelSpec) -> ProbeResult:
395	"""Verify the processor exposes the spec's image-placeholder token.
396
397	For `modality="vision-language"` bases the preprocessor plan pins
398	`image_token` (e.g. `"<image>"`). `AutoProcessor.from_pretrained`
399	must expose it as a known additional-special token — otherwise
400	mixed-row collation can't expand the placeholder into the model's
401	fixed `num_image_tokens` slots and training silently runs on
402	text-only rows.
403
404	Non-VL bases skip this probe cleanly.
405	"""
406	if spec.modality != "vision-language" or spec.vl_preprocessor_plan is None:
407	return ProbeResult(
408	name="vl_image_token",
409	passed=True,
410	detail="skipped: spec is not a vision-language base",
411	skipped=True,
412	)
413
414	try:
415	from huggingface_hub.errors import GatedRepoError
416
417	from dlm.base_models._typed_shims import load_auto_processor
418	except ImportError as exc: # pragma: no cover
419	return ProbeResult(
420	name="vl_image_token",
421	passed=True,
422	detail=f"skipped: transformers unavailable ({exc})",
423	skipped=True,
424	)
425
426	try:
427	processor = load_auto_processor(spec.hf_id, revision=spec.revision)
428	except GatedRepoError as exc:
429	raise GatedModelError(spec.hf_id, spec.license_url) from exc
430	except Exception as exc:
431	return ProbeResult(
432	name="vl_image_token",
433	passed=False,
434	detail=f"processor load failed: {type(exc).__name__}: {exc}",
435	)
436
437	# AutoProcessor wraps a tokenizer on `.tokenizer`. The image
438	# placeholder must tokenize to a single known token — otherwise
439	# the collator can't locate the insertion points deterministically.
440	placeholder = spec.vl_preprocessor_plan.image_token
441	tokenizer = getattr(processor, "tokenizer", None)
442	if tokenizer is None:
443	return ProbeResult(
444	name="vl_image_token",
445	passed=False,
446	detail="processor has no `.tokenizer` attribute",
447	)
448	try:
449	token_ids = tokenizer.encode(placeholder, add_special_tokens=False)
450	except Exception as exc:
451	return ProbeResult(
452	name="vl_image_token",
453	passed=False,
454	detail=f"tokenizer rejected placeholder {placeholder!r}: {exc}",
455	)
456	if len(token_ids) != 1:
457	return ProbeResult(
458	name="vl_image_token",
459	passed=False,
460	detail=(
461	f"placeholder {placeholder!r} tokenized to {len(token_ids)} tokens (expected 1)"
462	),
463	)
464	return ProbeResult(
465	name="vl_image_token",
466	passed=True,
467	detail=f"placeholder {placeholder!r} resolves to token id {token_ids[0]}",
468	)
469
470
471	def probe_audio_token(spec: BaseModelSpec) -> ProbeResult:
472	"""Verify the processor exposes the spec's audio-placeholder token.
473
474	Parallel to `probe_vl_image_token` — for `modality="audio-language"`
475	bases the preprocessor plan pins `audio_token` (e.g. `"<\|AUDIO\|>"`).
476	`AutoProcessor.from_pretrained` must expose it as a single known
477	token; otherwise the custom audio collator can't locate the
478	insertion point when expanding the placeholder into the model's
479	fixed audio-token window.
480
481	Non-audio bases skip this probe cleanly.
482	"""
483	if spec.modality != "audio-language" or spec.audio_preprocessor_plan is None:
484	return ProbeResult(
485	name="audio_token",
486	passed=True,
487	detail="skipped: spec is not an audio-language base",
488	skipped=True,
489	)
490
491	try:
492	from huggingface_hub.errors import GatedRepoError
493
494	from dlm.base_models._typed_shims import load_auto_processor
495	except ImportError as exc: # pragma: no cover
496	return ProbeResult(
497	name="audio_token",
498	passed=True,
499	detail=f"skipped: transformers unavailable ({exc})",
500	skipped=True,
501	)
502
503	try:
504	processor = load_auto_processor(spec.hf_id, revision=spec.revision)
505	except GatedRepoError as exc:
506	raise GatedModelError(spec.hf_id, spec.license_url) from exc
507	except Exception as exc:
508	return ProbeResult(
509	name="audio_token",
510	passed=False,
511	detail=f"processor load failed: {type(exc).__name__}: {exc}",
512	)
513
514	placeholder = spec.audio_preprocessor_plan.audio_token
515	tokenizer = getattr(processor, "tokenizer", None)
516	if tokenizer is None:
517	return ProbeResult(
518	name="audio_token",
519	passed=False,
520	detail="processor has no `.tokenizer` attribute",
521	)
522	try:
523	token_ids = tokenizer.encode(placeholder, add_special_tokens=False)
524	except Exception as exc:
525	return ProbeResult(
526	name="audio_token",
527	passed=False,
528	detail=f"tokenizer rejected placeholder {placeholder!r}: {exc}",
529	)
530	if len(token_ids) != 1:
531	return ProbeResult(
532	name="audio_token",
533	passed=False,
534	detail=(
535	f"placeholder {placeholder!r} tokenized to {len(token_ids)} tokens (expected 1)"
536	),
537	)
538	return ProbeResult(
539	name="audio_token",
540	passed=True,
541	detail=f"placeholder {placeholder!r} resolves to token id {token_ids[0]}",
542	)
543
544
545	# --- aggregate ---------------------------------------------------------------
546
547
548	def run_all(spec: BaseModelSpec, *, skip_export_probes: bool = False) -> ProbeReport:
549	"""Run every probe; aggregate into a `ProbeReport`.
550
551	`GatedModelError` from an individual probe propagates immediately —
552	it's not a "probe failure" in the registry-drift sense; it's an
553	acceptance-flow signal.
554
555	`skip_export_probes=True` drops the three llama.cpp / GGUF-conversion
556	checks (`gguf_arch_supported`, `pretokenizer_label`,
557	`pretokenizer_hash`). Users opt into this when they want training
558	+ HF inference on a base whose architecture ships faster than our
559	vendored llama.cpp can absorb (e.g. brand-new Qwen3 on a llama.cpp
560	pin from last month). They forfeit `dlm export` to Ollama until
561	the vendored copy catches up. VL bases auto-opt-out of export
562	probes because current GGUF export does not support them.
563	"""
564	from dlm.modality import modality_for
565
566	dispatch = modality_for(spec)
567	core: tuple[ProbeResult, ...] = (probe_architecture(spec),)
568	if dispatch.accepts_images:
569	core = (*core, probe_vl_image_token(spec))
570	elif dispatch.accepts_audio:
571	core = (*core, probe_audio_token(spec))
572	else:
573	core = (*core, probe_chat_template(spec))
574
575	# Media bases (VL + audio) bypass the llama.cpp-converter probes.
576	# The export path refuses GGUF cleanly for both and emits an HF
577	# snapshot instead.
578	is_media = dispatch.requires_processor
579	if skip_export_probes or is_media:
580	return ProbeReport(hf_id=spec.hf_id, results=core)
581	results = (
582	*core,
583	probe_gguf_arch_supported(spec),
584	probe_pretokenizer_label(spec),
585	probe_pretokenizer_hash(spec),
586	)
587	return ProbeReport(hf_id=spec.hf_id, results=results)