| 1 |
"""Pydantic models for `.dlm` frontmatter. |
| 2 |
|
| 3 |
Every model is `extra="forbid"` and `frozen=True` — strict validation and |
| 4 |
immutable values. Default values must match the shape produced by |
| 5 |
`tests/fixtures/dlm_factory.py` so round-trips are stable. |
| 6 |
|
| 7 |
Versioned schema dispatch lives in `dlm.doc.versioned`; this module |
| 8 |
defines the current frontmatter shape. |
| 9 |
""" |
| 10 |
|
| 11 |
from __future__ import annotations |
| 12 |
|
| 13 |
import re |
| 14 |
from typing import Final, Literal |
| 15 |
|
| 16 |
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator |
| 17 |
|
| 18 |
# Crockford base32 alphabet used by ULID: 0-9, A-Z minus I L O U. |
| 19 |
_ULID_RE: Final[re.Pattern[str]] = re.compile(r"^[0-9A-HJ-KM-NP-TV-Z]{26}$") |
| 20 |
|
| 21 |
# Adapter names: lowercase alpha start, alphanumeric + underscore tail. |
| 22 |
# Keeps store paths safe (adapter/<name>/versions/) and log lines readable. |
| 23 |
_ADAPTER_NAME_RE: Final[re.Pattern[str]] = re.compile(r"^[a-z][a-z0-9_]{0,31}$") |
| 24 |
|
| 25 |
CURRENT_SCHEMA_VERSION: Final[int] = 15 |
| 26 |
"""Schema version this parser implements. |
| 27 |
|
| 28 |
New fields bump the version and register a migrator in the same |
| 29 |
commit — enforced by `test_all_versions_have_migrator_up_to_latest`. |
| 30 |
v2 renamed `training.dpo` → `training.preference` to accommodate both |
| 31 |
DPO and ORPO under one `method`-switched config. v3 added the |
| 32 |
additive `training.cpt` block (DAPT schedule + embedding warm-up) |
| 33 |
for continued-pretraining refinements. v4 added the additive |
| 34 |
`training.adapters` map for named multi-adapter composition; flat |
| 35 |
`adapter`/`lora_*` keys remain the single-adapter shorthand. v5 |
| 36 |
added the additive `training.precision` override (opt-in fp16/bf16 |
| 37 |
on MPS after the NaN-adapter bug). v6 adds the additive |
| 38 |
`training.sources` block — declarative file-tree directives that |
| 39 |
synthesize PROSE sections at train time, letting a `.dlm` act as a |
| 40 |
training plan over content stored elsewhere on disk. v10 introduces |
| 41 |
`SectionType.IMAGE` + the `::image path="..." alt="..."::` fence |
| 42 |
grammar for multi-modal training; the body schema is strictly |
| 43 |
additive and the fence extension is backward-compatible (images |
| 44 |
are parsed via a separate attribute grammar rather than changing |
| 45 |
the existing `::type#name::` form). v11 adds `SectionType.AUDIO` |
| 46 |
with the parallel `::audio path="..." transcript="..."::` fence — |
| 47 |
the transcript becomes the text-side supervision (no equivalent to |
| 48 |
the optional image caption; audio without a transcript has no |
| 49 |
training signal). v12 adds the additive `training.audio` block |
| 50 |
(currently one field, `auto_resample: bool`) — opt-in automatic |
| 51 |
resampling when audio files don't match the base's pinned rate. |
| 52 |
Default False preserves the "refuse on SR mismatch" contract. v13 is |
| 53 |
an identity bump paired with the 2026 base-model registry refresh: |
| 54 |
the document frontmatter shape is unchanged, but the migration chain |
| 55 |
still advances so tooling can distinguish post-refresh docs from older |
| 56 |
ones. v14 adds additive auto-mined preference metadata on |
| 57 |
`::preference::` sections; the frontmatter shape remains unchanged, |
| 58 |
but the schema still advances so migration-aware tooling can tell |
| 59 |
pre-mining docs from ones that may carry mined-preference markers in |
| 60 |
the body. v15 adds additive auto-synth instruction metadata on |
| 61 |
`::instruction::` sections; the frontmatter shape remains unchanged, |
| 62 |
but the schema still advances so migration-aware tooling can tell |
| 63 |
pre-synth docs from ones that may carry synthesized-instruction |
| 64 |
markers in the body. |
| 65 |
""" |
| 66 |
|
| 67 |
|
| 68 |
class PreferenceHyperparams(BaseModel): |
| 69 |
"""Hyperparameters shared across preference methods. |
| 70 |
|
| 71 |
Some fields are method-specific (`beta` for DPO, `alpha` for |
| 72 |
ORPO); the trainer reads whichever applies. Keeping both on one |
| 73 |
flat block simplifies migration and lets users switch methods |
| 74 |
without reshaping their document. |
| 75 |
""" |
| 76 |
|
| 77 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 78 |
|
| 79 |
beta: float = Field(0.1, ge=0.0, le=1.0) |
| 80 |
alpha: float = Field(0.1, ge=0.0, le=1.0) |
| 81 |
learning_rate: float = Field(5e-6, gt=0.0) |
| 82 |
num_epochs: int = Field(1, ge=1) |
| 83 |
|
| 84 |
|
| 85 |
class PreferenceConfig(BaseModel): |
| 86 |
"""Preference-phase knobs (DPO or ORPO). Additive to `TrainingConfig`; |
| 87 |
default disabled. `enabled` flips to `True` automatically when the |
| 88 |
document contains `::preference::` sections unless the user has |
| 89 |
explicitly set it to `False` — the phase orchestrator reads that |
| 90 |
signal.""" |
| 91 |
|
| 92 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 93 |
|
| 94 |
enabled: bool = False |
| 95 |
method: Literal["dpo", "orpo"] = "dpo" |
| 96 |
hyperparams: PreferenceHyperparams = Field(default_factory=lambda: PreferenceHyperparams()) |
| 97 |
# DPO-only fields — ignored for ORPO but kept on the config so a |
| 98 |
# user switching methods doesn't have to delete them. |
| 99 |
loss_type: Literal["sigmoid", "hinge", "ipo"] = "sigmoid" |
| 100 |
reference: Literal["base", "pre_adapter"] = "pre_adapter" |
| 101 |
|
| 102 |
|
| 103 |
def _default_preference() -> PreferenceConfig: |
| 104 |
return PreferenceConfig() |
| 105 |
|
| 106 |
|
| 107 |
class CptConfig(BaseModel): |
| 108 |
"""Continued-pretraining refinements. |
| 109 |
|
| 110 |
`schedule="auto"` lets the trainer pick: `dapt` when CPT rows |
| 111 |
dominate (>70% of training rows), otherwise the SFT default. A |
| 112 |
user who wants the DAPT curve regardless of the row mix pins |
| 113 |
`schedule="dapt"`; `schedule="sft"` opts out entirely. |
| 114 |
|
| 115 |
`embed_warmup_steps>0` unfreezes `embed_tokens` + `lm_head` for |
| 116 |
the first N steps and adds them to `modules_to_save`, which |
| 117 |
inflates adapter size by `vocab_size * hidden_dim`. The trainer |
| 118 |
warns loudly when this is enabled. |
| 119 |
""" |
| 120 |
|
| 121 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 122 |
|
| 123 |
schedule: Literal["auto", "dapt", "sft"] = "auto" |
| 124 |
embed_warmup_steps: int = Field(0, ge=0) |
| 125 |
|
| 126 |
|
| 127 |
def _default_cpt() -> CptConfig: |
| 128 |
return CptConfig() |
| 129 |
|
| 130 |
|
| 131 |
class GateConfig(BaseModel): |
| 132 |
"""Learned MoE-style adapter gate. |
| 133 |
|
| 134 |
When `enabled`, a small MLP trained post-SFT routes each prompt to |
| 135 |
a weighted combination of the document's named adapters. Applied |
| 136 |
uniformly across adapter layers (per-layer routing is the research |
| 137 |
follow-up). |
| 138 |
|
| 139 |
`cold_start_floor` is the minimum number of supervising sections |
| 140 |
per adapter below which gate training is skipped and inference |
| 141 |
defaults to uniform weights — small corpora overfit a tiny router. |
| 142 |
""" |
| 143 |
|
| 144 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 145 |
|
| 146 |
enabled: bool = False |
| 147 |
hidden_proj_dim: int = Field(64, ge=8, le=2048) |
| 148 |
steps: int = Field(200, ge=1, le=10000) |
| 149 |
lr: float = Field(3e-4, gt=0.0, le=1.0) |
| 150 |
cold_start_floor: int = Field(4, ge=1, le=1024) |
| 151 |
# Entropy-regularization weight on the gate loss. Higher values |
| 152 |
# discourage mode collapse (one adapter takes all the weight); |
| 153 |
# lower values let the gate commit harder when data justifies it. |
| 154 |
entropy_lambda: float = Field(0.01, ge=0.0, le=1.0) |
| 155 |
|
| 156 |
|
| 157 |
def _default_gate() -> GateConfig: |
| 158 |
return GateConfig() |
| 159 |
|
| 160 |
|
| 161 |
class CacheConfig(BaseModel): |
| 162 |
"""Tokenized-section cache tuning. |
| 163 |
|
| 164 |
The cache lives at `~/.dlm/store/<dlm_id>/tokenized-cache/` and |
| 165 |
trades disk for tokenization wall-clock on directive-sourced runs. |
| 166 |
Defaults cover the typical case: cache on, 10 GiB cap, 90-day |
| 167 |
retention. Per-document overrides here let authors tune for their |
| 168 |
corpus size. |
| 169 |
|
| 170 |
All fields are independent — no cross-field validation. The three |
| 171 |
knobs map to three distinct operator concerns: |
| 172 |
- ``enabled`` is the off-switch. |
| 173 |
- ``max_bytes`` is the disk ceiling. |
| 174 |
- ``prune_older_than_days`` is the retention window. |
| 175 |
""" |
| 176 |
|
| 177 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 178 |
|
| 179 |
enabled: bool = True |
| 180 |
# Default: 10 GiB (10 * 1024^3). Per-document cap that supersedes |
| 181 |
# the cache module's built-in default when the trainer opens the |
| 182 |
# cache. Lower for small personal corpora, higher for 50K+ file |
| 183 |
# codebases. |
| 184 |
max_bytes: int = Field(10 * 1024 * 1024 * 1024, ge=1) |
| 185 |
# Default cutoff for `dlm cache prune` when the user doesn't pass |
| 186 |
# `--older-than`. Overridable by the CLI flag on a per-command |
| 187 |
# basis. |
| 188 |
prune_older_than_days: int = Field(90, ge=1) |
| 189 |
|
| 190 |
|
| 191 |
def _default_cache() -> CacheConfig: |
| 192 |
return CacheConfig() |
| 193 |
|
| 194 |
|
| 195 |
class AudioConfig(BaseModel): |
| 196 |
"""Audio-pipeline knobs for audio-language training (v12). |
| 197 |
|
| 198 |
Only meaningful when the base is an audio-language model (the |
| 199 |
trainer reads this block only on that branch). Default leaves |
| 200 |
behavior identical to v11: ``auto_resample=False`` preserves the |
| 201 |
"refuse on SR mismatch" contract, so adding a no-audio or |
| 202 |
non-audio document to v12 never changes training output. |
| 203 |
""" |
| 204 |
|
| 205 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 206 |
|
| 207 |
# When True, audio files at a native sample rate different from the |
| 208 |
# base's pinned rate (e.g. 48 kHz clip with Qwen2-Audio's 16 kHz |
| 209 |
# pin) are resampled on the fly via `dlm.data.audio_resample` |
| 210 |
# instead of raising. Resampled waveforms cache separately from |
| 211 |
# native-rate ones (cache key carries the flag), so toggling |
| 212 |
# auto_resample on an existing corpus doesn't serve stale entries. |
| 213 |
auto_resample: bool = False |
| 214 |
|
| 215 |
|
| 216 |
def _default_audio() -> AudioConfig: |
| 217 |
return AudioConfig() |
| 218 |
|
| 219 |
|
| 220 |
class AdapterConfig(BaseModel): |
| 221 |
"""One named adapter in a multi-adapter document. |
| 222 |
|
| 223 |
A subset of the flat config — only the per-adapter LoRA knobs plus |
| 224 |
`learning_rate`. Hyperparameters that are intrinsically run-scoped |
| 225 |
(`sequence_len`, `num_epochs`, `seed`, `optimizer`, `lr_scheduler`, |
| 226 |
`warmup_ratio`, `micro_batch_size`, `grad_accum`) stay at the |
| 227 |
`TrainingConfig` top level because mixing them per-adapter makes |
| 228 |
schedules and batching incoherent. |
| 229 |
""" |
| 230 |
|
| 231 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 232 |
|
| 233 |
adapter: Literal["lora", "qlora", "dora"] = "lora" |
| 234 |
lora_r: int = Field(8, ge=1, le=256) |
| 235 |
lora_alpha: int = Field(16, ge=1) |
| 236 |
lora_dropout: float = Field(0.05, ge=0.0, le=0.5) |
| 237 |
target_modules: Literal["auto"] | list[str] = "auto" |
| 238 |
learning_rate: float = Field(2e-4, gt=0.0) |
| 239 |
|
| 240 |
|
| 241 |
class SourceDirective(BaseModel): |
| 242 |
"""A directive to ingest file(s) as synthetic PROSE sections at train |
| 243 |
time. |
| 244 |
|
| 245 |
Paths resolve relative to the `.dlm` file's parent directory when |
| 246 |
not absolute; `~` expands via `Path.expanduser()`. Under |
| 247 |
`training.sources_policy="strict"` the resolved path must stay |
| 248 |
under the `.dlm` parent dir (symlinks included — containment is |
| 249 |
checked after `Path.resolve()`). `permissive` lets absolute paths |
| 250 |
anywhere on disk. |
| 251 |
|
| 252 |
`include` / `exclude` are POSIX-glob patterns relative to each |
| 253 |
source root (default `("**/*",)` + `()` — every file matches). |
| 254 |
Size caps apply per-file and per-directive; binary files (first- |
| 255 |
KiB NUL scan) and non-UTF-8 bytes are skipped with a log warning, |
| 256 |
never a fatal error, because mixed trees are the common case. |
| 257 |
""" |
| 258 |
|
| 259 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 260 |
|
| 261 |
path: str = Field(..., min_length=1) |
| 262 |
include: tuple[str, ...] = ("**/*",) |
| 263 |
exclude: tuple[str, ...] = () |
| 264 |
max_bytes_per_file: int | None = Field(default=None, ge=1) |
| 265 |
max_files: int | None = Field(default=None, ge=1) |
| 266 |
|
| 267 |
|
| 268 |
class TrainingConfig(BaseModel): |
| 269 |
"""Training-time knobs. `auto` values are resolved by the hardware doctor.""" |
| 270 |
|
| 271 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 272 |
|
| 273 |
adapter: Literal["lora", "qlora", "dora"] = "lora" |
| 274 |
lora_r: int = Field(8, ge=1, le=256) |
| 275 |
lora_alpha: int = Field(16, ge=1) |
| 276 |
lora_dropout: float = Field(0.05, ge=0.0, le=0.5) |
| 277 |
target_modules: Literal["auto"] | list[str] = "auto" |
| 278 |
sequence_len: int = Field(2048, ge=64, le=32768) |
| 279 |
micro_batch_size: Literal["auto"] | int = "auto" |
| 280 |
grad_accum: Literal["auto"] | int = "auto" |
| 281 |
learning_rate: float = Field(2e-4, gt=0.0) |
| 282 |
num_epochs: int = Field(3, ge=1) |
| 283 |
optimizer: Literal[ |
| 284 |
"adamw_torch", |
| 285 |
"adamw_bnb_8bit", |
| 286 |
"paged_adamw_8bit", |
| 287 |
"galore_adamw", |
| 288 |
"galore_adamw_8bit", |
| 289 |
] = "adamw_torch" |
| 290 |
lr_scheduler: Literal["cosine", "linear", "constant"] = "cosine" |
| 291 |
warmup_ratio: float = Field(0.1, ge=0.0, le=0.5) |
| 292 |
# Advanced: override the hardware doctor's auto-picked precision. |
| 293 |
# `None` (default) lets the planner pick per backend — bf16 on |
| 294 |
# Ampere+, fp16 on older CUDA, fp32 on MPS (the last pin is |
| 295 |
# defensive: MPS fp16 attention kernels produce NaN LoRA weights |
| 296 |
# on tiny-data runs; see `.docs/bugs/01-nan-adapter-on-mps.md`). |
| 297 |
# Users who want fp16 on MPS for memory (e.g. running an 8B base |
| 298 |
# on a 24 GB unified-memory budget) can opt in here, accepting |
| 299 |
# the stability risk on small datasets. |
| 300 |
precision: Literal["bf16", "fp16", "fp32"] | None = None |
| 301 |
seed: int = 42 |
| 302 |
preference: PreferenceConfig = Field(default_factory=_default_preference) |
| 303 |
cpt: CptConfig = Field(default_factory=_default_cpt) |
| 304 |
# Learned adapter gate. Only meaningful when `adapters` |
| 305 |
# declares two or more named adapters — a gate over a single |
| 306 |
# adapter is a tautology. Enforced at validate-time below. |
| 307 |
gate: GateConfig = Field(default_factory=_default_gate) |
| 308 |
# Tokenized-section cache tuning. Defaults preserve |
| 309 |
# pre-v9 behavior: cache on, 10 GiB cap, 90-day prune window. |
| 310 |
cache: CacheConfig = Field(default_factory=_default_cache) |
| 311 |
# Audio-pipeline knobs (v12). Only read when the base is an |
| 312 |
# audio-language model. Default `auto_resample=False` preserves the |
| 313 |
# v11 contract (refuse on SR mismatch); set to True to enable on- |
| 314 |
# the-fly resampling to the base's pinned rate. |
| 315 |
audio: AudioConfig = Field(default_factory=_default_audio) |
| 316 |
# Named adapters for multi-adapter composition. When set, the flat |
| 317 |
# `adapter`/`lora_*`/`target_modules`/`learning_rate` fields must |
| 318 |
# stay at their defaults — mixing the two shapes creates ambiguous |
| 319 |
# "which config wins?" semantics. An empty/None `adapters` keeps |
| 320 |
# the single-adapter shorthand fully backward-compatible. |
| 321 |
adapters: dict[str, AdapterConfig] | None = None |
| 322 |
# Source directives (v6). Declarative file-tree ingestion — each |
| 323 |
# entry becomes a walk-and-read at train time, synthesizing PROSE |
| 324 |
# sections for the CPT path. `None` / empty keeps the `.dlm` as a |
| 325 |
# self-contained training corpus; populated lets the document |
| 326 |
# reference external codebases, notes directories, etc. See |
| 327 |
# `dlm.directives.expand_sources`. |
| 328 |
sources: tuple[SourceDirective, ...] | None = None |
| 329 |
# `permissive` (default) lets directive paths point anywhere on |
| 330 |
# disk. `strict` confines them to the `.dlm` parent subtree — |
| 331 |
# useful when a `.dlm` travels with a project and the author wants |
| 332 |
# training content to stay project-local regardless of where a |
| 333 |
# downstream user places the file. |
| 334 |
sources_policy: Literal["permissive", "strict"] = "permissive" |
| 335 |
|
| 336 |
@field_validator("micro_batch_size", "grad_accum") |
| 337 |
@classmethod |
| 338 |
def _validate_auto_or_positive(cls, v: int | str) -> int | str: |
| 339 |
if v == "auto": |
| 340 |
return v |
| 341 |
if not isinstance(v, int) or v < 1: |
| 342 |
raise ValueError(f"must be a positive int or 'auto', got {v!r}") |
| 343 |
return v |
| 344 |
|
| 345 |
@field_validator("adapters") |
| 346 |
@classmethod |
| 347 |
def _validate_adapter_names( |
| 348 |
cls, v: dict[str, AdapterConfig] | None |
| 349 |
) -> dict[str, AdapterConfig] | None: |
| 350 |
if v is None: |
| 351 |
return v |
| 352 |
if not v: |
| 353 |
raise ValueError( |
| 354 |
"training.adapters: at least one adapter must be declared " |
| 355 |
"(omit the block entirely for the single-adapter shorthand)" |
| 356 |
) |
| 357 |
for name in v: |
| 358 |
if not _ADAPTER_NAME_RE.fullmatch(name): |
| 359 |
raise ValueError( |
| 360 |
f"training.adapters: {name!r} is not a valid adapter " |
| 361 |
f"name (must match {_ADAPTER_NAME_RE.pattern})" |
| 362 |
) |
| 363 |
return v |
| 364 |
|
| 365 |
@model_validator(mode="after") |
| 366 |
def _gate_requires_multiple_adapters(self) -> TrainingConfig: |
| 367 |
if self.gate.enabled and (self.adapters is None or len(self.adapters) < 2): |
| 368 |
raise ValueError( |
| 369 |
"training.gate.enabled=true requires training.adapters " |
| 370 |
"with two or more named adapters (a gate over a single " |
| 371 |
"adapter has nothing to route between)" |
| 372 |
) |
| 373 |
return self |
| 374 |
|
| 375 |
@model_validator(mode="after") |
| 376 |
def _flat_and_named_are_mutually_exclusive(self) -> TrainingConfig: |
| 377 |
if self.adapters is None: |
| 378 |
return self |
| 379 |
# A set flat-adapter field would silently lose to the named |
| 380 |
# block at train time. Refuse at parse time instead. |
| 381 |
flat_defaults = { |
| 382 |
"adapter": "lora", |
| 383 |
"lora_r": 8, |
| 384 |
"lora_alpha": 16, |
| 385 |
"lora_dropout": 0.05, |
| 386 |
"target_modules": "auto", |
| 387 |
"learning_rate": 2e-4, |
| 388 |
} |
| 389 |
drift = [key for key, default in flat_defaults.items() if getattr(self, key) != default] |
| 390 |
if drift: |
| 391 |
raise ValueError( |
| 392 |
"training.adapters is declared; flat per-adapter fields " |
| 393 |
f"{drift} must stay at their defaults (move them into the " |
| 394 |
"per-adapter block instead)" |
| 395 |
) |
| 396 |
return self |
| 397 |
|
| 398 |
|
| 399 |
class ExportConfig(BaseModel): |
| 400 |
"""Export-time defaults.""" |
| 401 |
|
| 402 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 403 |
|
| 404 |
default_quant: Literal["Q4_K_M", "Q5_K_M", "Q6_K", "Q8_0"] = "Q4_K_M" |
| 405 |
# Optional per-document sampling overrides. When set, the Modelfile |
| 406 |
# emits `PARAMETER temperature <v>` / `PARAMETER top_p <v>` in place |
| 407 |
# of the dialect default — a Q&A document prefers temperature=0.2, |
| 408 |
# a creative one prefers 0.9. `None` keeps the dialect default |
| 409 |
# from the template registry. |
| 410 |
default_temperature: float | None = Field(None, gt=0.0, le=2.0) |
| 411 |
default_top_p: float | None = Field(None, gt=0.0, le=1.0) |
| 412 |
|
| 413 |
|
| 414 |
# Named factories so mypy can type-check the field defaults correctly. |
| 415 |
def _default_training() -> TrainingConfig: |
| 416 |
return TrainingConfig() |
| 417 |
|
| 418 |
|
| 419 |
def _default_export() -> ExportConfig: |
| 420 |
return ExportConfig() |
| 421 |
|
| 422 |
|
| 423 |
class DlmFrontmatter(BaseModel): |
| 424 |
"""Top-level frontmatter: the YAML block between `---` delimiters. |
| 425 |
|
| 426 |
`dlm_id` is a canonical 26-character ULID. It is assigned by |
| 427 |
`dlm init` and never regenerated by the parser. |
| 428 |
`base_model` is either a registry key (e.g. `qwen2.5-1.5b`) or an |
| 429 |
`hf:org/name` escape hatch — the registry validates the |
| 430 |
actual lookup; this module only validates that the string is non-empty. |
| 431 |
""" |
| 432 |
|
| 433 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 434 |
|
| 435 |
dlm_id: str |
| 436 |
dlm_version: int = CURRENT_SCHEMA_VERSION |
| 437 |
base_model: str = Field(..., min_length=1) |
| 438 |
training: TrainingConfig = Field(default_factory=_default_training) |
| 439 |
export: ExportConfig = Field(default_factory=_default_export) |
| 440 |
system_prompt: str | None = None |
| 441 |
|
| 442 |
@field_validator("dlm_id") |
| 443 |
@classmethod |
| 444 |
def _validate_ulid(cls, v: str) -> str: |
| 445 |
if not _ULID_RE.fullmatch(v): |
| 446 |
raise ValueError( |
| 447 |
f"dlm_id must be a 26-char Crockford base32 ULID, got {v!r}", |
| 448 |
) |
| 449 |
return v |
| 450 |
|
| 451 |
@field_validator("dlm_version") |
| 452 |
@classmethod |
| 453 |
def _validate_version(cls, v: int) -> int: |
| 454 |
# Defense in depth: the `versioned` dispatcher is |
| 455 |
# the intended entry point, but direct `DlmFrontmatter.model_validate` |
| 456 |
# callers (tests, tooling) need the same guard. Reject both |
| 457 |
# under-1 and beyond-current values at the field level. |
| 458 |
if v < 1: |
| 459 |
raise ValueError(f"dlm_version must be ≥1, got {v}") |
| 460 |
if v > CURRENT_SCHEMA_VERSION: |
| 461 |
raise ValueError( |
| 462 |
f"dlm_version {v} is newer than this CLI supports " |
| 463 |
f"(CURRENT_SCHEMA_VERSION={CURRENT_SCHEMA_VERSION})." |
| 464 |
) |
| 465 |
return v |