| 1 |
"""Subcommand stubs for the v1.0 CLI surface. |
| 2 |
|
| 3 |
Every stub raises `NotImplementedError` with the sprint number that will |
| 4 |
implement it. This makes `dlm --help` self-documenting about project |
| 5 |
progress. Arguments are accepted so `--help` renders the real eventual |
| 6 |
surface; they're unused until each subcommand's owning sprint lands, |
| 7 |
which is why `src/dlm/cli/commands.py` has a ruff per-file-ignore for |
| 8 |
`ARG001` in `pyproject.toml`. |
| 9 |
""" |
| 10 |
|
| 11 |
from __future__ import annotations |
| 12 |
|
| 13 |
import os |
| 14 |
from collections.abc import Sequence |
| 15 |
from pathlib import Path |
| 16 |
from typing import TYPE_CHECKING, Annotated, Any, Literal, cast |
| 17 |
|
| 18 |
import typer |
| 19 |
|
| 20 |
if TYPE_CHECKING: |
| 21 |
from datetime import timedelta |
| 22 |
|
| 23 |
|
| 24 |
def _stub(sprint: str, subject: str) -> None: |
| 25 |
"""Raise a clear unimplemented error pointing to the owning sprint.""" |
| 26 |
raise NotImplementedError( |
| 27 |
f"`{subject}` is not implemented yet (owned by Sprint {sprint}).", |
| 28 |
) |
| 29 |
|
| 30 |
|
| 31 |
def init_cmd( |
| 32 |
path: Annotated[Path, typer.Argument(help="Target .dlm path to create.")], |
| 33 |
base: Annotated[ |
| 34 |
str, typer.Option("--base", help="Base model key or hf:org/name.") |
| 35 |
] = "qwen2.5-1.5b", |
| 36 |
template: Annotated[ |
| 37 |
str | None, |
| 38 |
typer.Option( |
| 39 |
"--template", |
| 40 |
help="Start from a named gallery template (see `dlm templates list`).", |
| 41 |
), |
| 42 |
] = None, |
| 43 |
i_accept_license: Annotated[ |
| 44 |
bool, |
| 45 |
typer.Option("--i-accept-license", help="Accept gated base-model license."), |
| 46 |
] = False, |
| 47 |
force: Annotated[ |
| 48 |
bool, |
| 49 |
typer.Option("--force", help="Overwrite an existing .dlm at path."), |
| 50 |
] = False, |
| 51 |
skip_export_probes: Annotated[ |
| 52 |
bool, |
| 53 |
typer.Option( |
| 54 |
"--skip-export-probes", |
| 55 |
help=( |
| 56 |
"Skip the llama.cpp / GGUF-conversion probes so brand-new " |
| 57 |
"architectures (not yet in our vendored llama.cpp) can still " |
| 58 |
"be used for training + HF inference. Forfeits `dlm export` " |
| 59 |
"to Ollama until the vendored copy catches up." |
| 60 |
), |
| 61 |
), |
| 62 |
] = False, |
| 63 |
multimodal: Annotated[ |
| 64 |
bool, |
| 65 |
typer.Option( |
| 66 |
"--multimodal", |
| 67 |
help=( |
| 68 |
"Scaffold a vision-language .dlm with an `::image::` section. " |
| 69 |
"Defaults --base to paligemma-3b-mix-224 and skips GGUF " |
| 70 |
"export probes because current GGUF export does not " |
| 71 |
"support vision-language bases." |
| 72 |
), |
| 73 |
), |
| 74 |
] = False, |
| 75 |
audio: Annotated[ |
| 76 |
bool, |
| 77 |
typer.Option( |
| 78 |
"--audio", |
| 79 |
help=( |
| 80 |
"Scaffold an audio-language .dlm with an `::audio::` section. " |
| 81 |
"Defaults --base to qwen2-audio-7b-instruct and skips GGUF " |
| 82 |
"export probes (audio archs are not on llama.cpp's roadmap)." |
| 83 |
), |
| 84 |
), |
| 85 |
] = False, |
| 86 |
) -> None: |
| 87 |
"""Bootstrap a new .dlm file with sensible defaults.""" |
| 88 |
|
| 89 |
from rich.console import Console |
| 90 |
|
| 91 |
from dlm.base_models import ( |
| 92 |
GatedModelError, |
| 93 |
UnknownBaseModelError, |
| 94 |
is_gated, |
| 95 |
require_acceptance, |
| 96 |
) |
| 97 |
from dlm.base_models import resolve as resolve_base_model |
| 98 |
from dlm.io.ulid import mint_ulid |
| 99 |
|
| 100 |
console = Console(stderr=True) |
| 101 |
|
| 102 |
if path.exists() and not force: |
| 103 |
console.print( |
| 104 |
f"[red]init:[/red] {path} already exists. " |
| 105 |
"Re-run with [bold]--force[/bold] to overwrite." |
| 106 |
) |
| 107 |
raise typer.Exit(code=1) |
| 108 |
|
| 109 |
# --multimodal / --audio are mutually exclusive with each other and |
| 110 |
# with --template (templates pin their own base + body shape; v1 |
| 111 |
# doesn't ship media templates yet). |
| 112 |
if multimodal and audio: |
| 113 |
console.print( |
| 114 |
"[red]init:[/red] --multimodal and --audio are mutually exclusive " |
| 115 |
"(each targets a different modality)." |
| 116 |
) |
| 117 |
raise typer.Exit(code=2) |
| 118 |
if multimodal and template is not None: |
| 119 |
console.print( |
| 120 |
"[red]init:[/red] --multimodal and --template are mutually exclusive; " |
| 121 |
"v1 doesn't ship a VL template (see `dlm templates list`)." |
| 122 |
) |
| 123 |
raise typer.Exit(code=2) |
| 124 |
if audio and template is not None: |
| 125 |
console.print( |
| 126 |
"[red]init:[/red] --audio and --template are mutually exclusive; " |
| 127 |
"v1 doesn't ship an audio template (see `dlm templates list`)." |
| 128 |
) |
| 129 |
raise typer.Exit(code=2) |
| 130 |
|
| 131 |
# --multimodal / --audio override the text-first --base default. A |
| 132 |
# user who wants a different media base passes --base explicitly; |
| 133 |
# we verify the pick is the right modality below. |
| 134 |
if multimodal and base == "qwen2.5-1.5b": |
| 135 |
base = "paligemma-3b-mix-224" |
| 136 |
if audio and base == "qwen2.5-1.5b": |
| 137 |
base = "qwen2-audio-7b-instruct" |
| 138 |
|
| 139 |
# --template resolves the base from the template's meta.yaml; the |
| 140 |
# --base default is kept for the no-template path only. Users who |
| 141 |
# pass both a template and an explicit --base get a warning but the |
| 142 |
# template still wins (the template body was authored against its |
| 143 |
# recommended base). |
| 144 |
if template is not None: |
| 145 |
from dlm.templates import load_template |
| 146 |
|
| 147 |
# Peek at the template's recommended base WITHOUT writing |
| 148 |
# anything yet, so we can handle the license prompt against the |
| 149 |
# right base (the template's, not `--base`) before committing. |
| 150 |
try: |
| 151 |
resolved_base = load_template(template).meta.recommended_base |
| 152 |
except Exception as exc: |
| 153 |
console.print(f"[red]init:[/red] {exc}") |
| 154 |
raise typer.Exit(code=1) from exc |
| 155 |
if base != "qwen2.5-1.5b" and base != resolved_base: |
| 156 |
console.print( |
| 157 |
f"[yellow]init:[/yellow] --base {base} ignored; template " |
| 158 |
f"{template!r} uses {resolved_base}." |
| 159 |
) |
| 160 |
else: |
| 161 |
resolved_base = base |
| 162 |
|
| 163 |
# Media bases can't clear the GGUF-conversion probes. Force-skip |
| 164 |
# them so the probe suite doesn't false-fail the init. |
| 165 |
if multimodal or audio: |
| 166 |
skip_export_probes = True |
| 167 |
|
| 168 |
try: |
| 169 |
spec = resolve_base_model( |
| 170 |
resolved_base, |
| 171 |
accept_license=i_accept_license, |
| 172 |
skip_export_probes=skip_export_probes, |
| 173 |
) |
| 174 |
except UnknownBaseModelError as exc: |
| 175 |
console.print(f"[red]init:[/red] {exc}") |
| 176 |
raise typer.Exit(code=1) from exc |
| 177 |
except GatedModelError as exc: |
| 178 |
# Gated + user didn't pass --i-accept-license up-front. Prompt |
| 179 |
# interactively if we have a TTY; otherwise refuse with the flag |
| 180 |
# pointer (audit F22 non-interactive path). |
| 181 |
if not _prompt_accept_license(console, resolved_base, exc.license_url): |
| 182 |
console.print( |
| 183 |
"[red]license:[/red] refused. Re-run with " |
| 184 |
"[bold]--i-accept-license[/bold] to accept non-interactively." |
| 185 |
) |
| 186 |
raise typer.Exit(code=1) from exc |
| 187 |
spec = resolve_base_model( |
| 188 |
resolved_base, |
| 189 |
accept_license=True, |
| 190 |
skip_export_probes=skip_export_probes, |
| 191 |
) |
| 192 |
|
| 193 |
# NOW apply the template — license has already been accepted |
| 194 |
# (either by --i-accept-license or interactive prompt), so pass |
| 195 |
# the acceptance through. apply_template enforces the license |
| 196 |
# contract at its boundary. |
| 197 |
applied_result = None |
| 198 |
if template is not None: |
| 199 |
from dlm.templates import TemplateError, apply_template |
| 200 |
|
| 201 |
try: |
| 202 |
applied_result = apply_template(template, path, force=force, accept_license=True) |
| 203 |
except TemplateError as exc: |
| 204 |
console.print(f"[red]init:[/red] {exc}") |
| 205 |
raise typer.Exit(code=1) from exc |
| 206 |
|
| 207 |
# Record the license acceptance (or None for non-gated specs). We |
| 208 |
# know `resolve_base_model` already validated the flag/prompt chain |
| 209 |
# — `accept_license=True` means either the user passed the flag or |
| 210 |
# answered the interactive prompt. Either path is a real |
| 211 |
# acceptance; persist the record now so subsequent `dlm train` / |
| 212 |
# `dlm export` don't re-prompt. |
| 213 |
acceptance_via: Literal["cli_flag", "interactive"] = ( |
| 214 |
"cli_flag" if i_accept_license else "interactive" |
| 215 |
) |
| 216 |
acceptance = ( |
| 217 |
require_acceptance(spec, accept_license=True, via=acceptance_via) |
| 218 |
if is_gated(spec) |
| 219 |
else None |
| 220 |
) |
| 221 |
|
| 222 |
# Media flags require a matching-modality base. Check after resolve |
| 223 |
# so users pointing at an unknown or wrong-modality hf:org/name get |
| 224 |
# a clear explanation rather than a schema error deep in parse time. |
| 225 |
if multimodal and spec.modality != "vision-language": |
| 226 |
console.print( |
| 227 |
f"[red]init:[/red] --multimodal requires a vision-language base; " |
| 228 |
f"{spec.key!r} is modality='{spec.modality}'. " |
| 229 |
"Pick --base paligemma-3b-mix-224 or drop --multimodal." |
| 230 |
) |
| 231 |
raise typer.Exit(code=2) |
| 232 |
if audio and spec.modality != "audio-language": |
| 233 |
console.print( |
| 234 |
f"[red]init:[/red] --audio requires an audio-language base; " |
| 235 |
f"{spec.key!r} is modality='{spec.modality}'. " |
| 236 |
"Pick --base qwen2-audio-7b-instruct or drop --audio." |
| 237 |
) |
| 238 |
raise typer.Exit(code=2) |
| 239 |
|
| 240 |
if applied_result is not None: |
| 241 |
dlm_id = applied_result.dlm_id |
| 242 |
else: |
| 243 |
dlm_id = mint_ulid() |
| 244 |
if multimodal: |
| 245 |
_write_init_scaffold_multimodal(path, spec.key, dlm_id) |
| 246 |
elif audio: |
| 247 |
_write_init_scaffold_audio(path, spec.key, dlm_id) |
| 248 |
else: |
| 249 |
_write_init_scaffold(path, spec.key, dlm_id) |
| 250 |
|
| 251 |
# Create the store + write the initial manifest so `dlm show` sees |
| 252 |
# the license record and `dlm train` has a prior manifest to diff |
| 253 |
# against. |
| 254 |
from dlm.store.manifest import Manifest, save_manifest |
| 255 |
from dlm.store.paths import for_dlm |
| 256 |
|
| 257 |
store = for_dlm(dlm_id) |
| 258 |
store.ensure_layout() |
| 259 |
save_manifest( |
| 260 |
store.manifest, |
| 261 |
Manifest( |
| 262 |
dlm_id=dlm_id, |
| 263 |
base_model=spec.key, |
| 264 |
base_model_revision=spec.revision, |
| 265 |
source_path=path.resolve(), |
| 266 |
license_acceptance=acceptance, |
| 267 |
), |
| 268 |
) |
| 269 |
if applied_result is not None: |
| 270 |
meta = applied_result.template.meta |
| 271 |
console.print( |
| 272 |
f"[green]init:[/green] wrote {path} from template " |
| 273 |
f"[bold]{meta.name}[/bold] ({meta.title}) — base {spec.key}." |
| 274 |
) |
| 275 |
else: |
| 276 |
console.print(f"[green]init:[/green] wrote {path}") |
| 277 |
|
| 278 |
|
| 279 |
def _previously_accepted(store_manifest_path: Path) -> bool: |
| 280 |
"""Return True iff the store manifest already holds a LicenseAcceptance. |
| 281 |
|
| 282 |
`dlm prompt` and `dlm export` operate on an already-trained |
| 283 |
adapter; the gated-base license was accepted during training and |
| 284 |
persisted into `manifest.license_acceptance`. Replaying that |
| 285 |
acceptance here is correct; silently hardcoding |
| 286 |
`accept_license=True` is not — it would let a never-accepted |
| 287 |
gated base slip through. |
| 288 |
""" |
| 289 |
if not store_manifest_path.exists(): |
| 290 |
return False |
| 291 |
from dlm.store.errors import ManifestCorruptError |
| 292 |
from dlm.store.manifest import load_manifest |
| 293 |
|
| 294 |
try: |
| 295 |
manifest = load_manifest(store_manifest_path) |
| 296 |
except (ManifestCorruptError, OSError): |
| 297 |
# Narrow from bare `Exception` so programmer bugs propagate |
| 298 |
# instead of being silently treated as "no acceptance." |
| 299 |
return False |
| 300 |
return manifest.license_acceptance is not None |
| 301 |
|
| 302 |
|
| 303 |
def _prompt_accept_license(console: object, base: str, license_url: str | None) -> bool: |
| 304 |
"""Interactive y/N prompt for gated base-model license acceptance. |
| 305 |
|
| 306 |
Non-interactive runs (no TTY) return False; the caller surfaces the |
| 307 |
`--i-accept-license` flag pointer in that case. |
| 308 |
""" |
| 309 |
import sys |
| 310 |
|
| 311 |
from rich.console import Console |
| 312 |
|
| 313 |
assert isinstance(console, Console) |
| 314 |
|
| 315 |
if not sys.stdin.isatty(): |
| 316 |
return False |
| 317 |
|
| 318 |
console.print( |
| 319 |
f"[yellow]This base model ({base}) requires accepting the upstream license.[/yellow]" |
| 320 |
) |
| 321 |
if license_url: |
| 322 |
console.print(f" Review the license at: {license_url}") |
| 323 |
console.print("Accept and continue? [y/N]: ", end="") |
| 324 |
try: |
| 325 |
answer = input().strip().lower() |
| 326 |
except EOFError: |
| 327 |
return False |
| 328 |
return answer in ("y", "yes") |
| 329 |
|
| 330 |
|
| 331 |
def _write_init_scaffold(path: Path, base_model_key: str, dlm_id: str) -> None: |
| 332 |
"""Write a minimal-but-valid .dlm file at `path`. |
| 333 |
|
| 334 |
Body has one PROSE paragraph + a commented instruction section so |
| 335 |
users see both section shapes on first open. |
| 336 |
""" |
| 337 |
scaffold = f"""--- |
| 338 |
dlm_id: {dlm_id} |
| 339 |
dlm_version: 1 |
| 340 |
base_model: {base_model_key} |
| 341 |
--- |
| 342 |
|
| 343 |
# Your document title |
| 344 |
|
| 345 |
Write prose here. It will train via continued pretraining (CPT) loss. |
| 346 |
|
| 347 |
::instruction:: |
| 348 |
|
| 349 |
### Q |
| 350 |
Your example question. |
| 351 |
|
| 352 |
### A |
| 353 |
Your example answer. |
| 354 |
""" |
| 355 |
path.write_text(scaffold, encoding="utf-8") |
| 356 |
|
| 357 |
|
| 358 |
def _write_init_scaffold_multimodal(path: Path, base_model_key: str, dlm_id: str) -> None: |
| 359 |
"""Write a VL-shaped .dlm file at `path`. |
| 360 |
|
| 361 |
Body shows the `::image::` attribute fence + a caption so users |
| 362 |
see the v10 grammar on first open. The placeholder path |
| 363 |
`figures/your-image.png` is deliberately non-existent — first |
| 364 |
`dlm train` will refuse with a clear file-missing error, prompting |
| 365 |
the user to drop a real image in. This is friendlier than |
| 366 |
committing an inert sample that users might not notice isn't theirs. |
| 367 |
|
| 368 |
`dlm_version: 10` because IMAGE sections require schema v10. |
| 369 |
""" |
| 370 |
scaffold = f"""--- |
| 371 |
dlm_id: {dlm_id} |
| 372 |
dlm_version: 10 |
| 373 |
base_model: {base_model_key} |
| 374 |
--- |
| 375 |
|
| 376 |
# Your document title |
| 377 |
|
| 378 |
Write prose here. It will train via continued pretraining (CPT) loss. |
| 379 |
|
| 380 |
::image path="figures/your-image.png" alt="short description":: |
| 381 |
Caption text describing the image. Training rows bundle the image |
| 382 |
with this caption as `<image>\\n<caption>`. |
| 383 |
|
| 384 |
::instruction:: |
| 385 |
|
| 386 |
### Q |
| 387 |
What is in this image? |
| 388 |
|
| 389 |
### A |
| 390 |
Describe what the image shows. |
| 391 |
""" |
| 392 |
path.write_text(scaffold, encoding="utf-8") |
| 393 |
|
| 394 |
|
| 395 |
def _write_init_scaffold_audio(path: Path, base_model_key: str, dlm_id: str) -> None: |
| 396 |
"""Write an audio-shaped .dlm file at `path`. |
| 397 |
|
| 398 |
Body shows the `::audio::` attribute fence with the sibling- |
| 399 |
transcript-friendly `transcript="..."` form so users see the v11 |
| 400 |
grammar on first open. The placeholder path `clips/your-clip.wav` |
| 401 |
is deliberately non-existent — first `dlm train` refuses with a |
| 402 |
clear file-missing error rather than silently training on an inert |
| 403 |
sample. |
| 404 |
|
| 405 |
`dlm_version: 11` because AUDIO sections require schema v11. |
| 406 |
""" |
| 407 |
scaffold = f"""--- |
| 408 |
dlm_id: {dlm_id} |
| 409 |
dlm_version: 11 |
| 410 |
base_model: {base_model_key} |
| 411 |
--- |
| 412 |
|
| 413 |
# Your document title |
| 414 |
|
| 415 |
Write prose here. It will train via continued pretraining (CPT) loss. |
| 416 |
|
| 417 |
::audio path="clips/your-clip.wav" transcript="Transcript of the audio clip.":: |
| 418 |
|
| 419 |
::instruction:: |
| 420 |
|
| 421 |
### Q |
| 422 |
What was said in this recording? |
| 423 |
|
| 424 |
### A |
| 425 |
Describe what you hear in the audio. |
| 426 |
""" |
| 427 |
path.write_text(scaffold, encoding="utf-8") |
| 428 |
|
| 429 |
|
| 430 |
def train_cmd( |
| 431 |
path: Annotated[ |
| 432 |
Path, |
| 433 |
typer.Argument( |
| 434 |
help=( |
| 435 |
".dlm file to train. Or a directory — when passed a directory, " |
| 436 |
"`dlm train` auto-scaffolds `<dir>/.dlm/corpus.dlm` on first run " |
| 437 |
"(with --base) and reuses it on subsequent runs." |
| 438 |
), |
| 439 |
), |
| 440 |
], |
| 441 |
resume: Annotated[bool, typer.Option("--resume", help="Resume from last checkpoint.")] = False, |
| 442 |
fresh: Annotated[bool, typer.Option("--fresh", help="Discard prior adapter state.")] = False, |
| 443 |
seed: Annotated[int | None, typer.Option("--seed", help="Override training seed.")] = None, |
| 444 |
max_steps: Annotated[int | None, typer.Option("--max-steps", help="Cap step count.")] = None, |
| 445 |
phase: Annotated[ |
| 446 |
str, |
| 447 |
typer.Option( |
| 448 |
"--phase", |
| 449 |
help=( |
| 450 |
"Which training phases to run: 'sft' (supervised only), " |
| 451 |
"'preference' (DPO/ORPO only — requires a prior SFT " |
| 452 |
"adapter), or 'all' (SFT then preference when enabled). " |
| 453 |
"The preference method (dpo / orpo) comes from " |
| 454 |
"training.preference.method in the frontmatter." |
| 455 |
), |
| 456 |
), |
| 457 |
] = "all", |
| 458 |
i_accept_license: Annotated[ |
| 459 |
bool, |
| 460 |
typer.Option( |
| 461 |
"--i-accept-license", |
| 462 |
help="Accept the base model's license (required for gated bases like llama-3.2).", |
| 463 |
), |
| 464 |
] = False, |
| 465 |
strict_lock: Annotated[ |
| 466 |
bool, |
| 467 |
typer.Option( |
| 468 |
"--strict-lock", |
| 469 |
help="Fail on any dlm.lock drift, including version warns.", |
| 470 |
), |
| 471 |
] = False, |
| 472 |
update_lock: Annotated[ |
| 473 |
bool, |
| 474 |
typer.Option( |
| 475 |
"--update-lock", |
| 476 |
help="Overwrite dlm.lock without validating prior entries.", |
| 477 |
), |
| 478 |
] = False, |
| 479 |
ignore_lock: Annotated[ |
| 480 |
bool, |
| 481 |
typer.Option( |
| 482 |
"--ignore-lock", |
| 483 |
help="Skip dlm.lock validation and don't write a new lock.", |
| 484 |
), |
| 485 |
] = False, |
| 486 |
strict_metrics: Annotated[ |
| 487 |
bool, |
| 488 |
typer.Option( |
| 489 |
"--strict-metrics", |
| 490 |
help="Promote metrics SQLite write failures to hard errors.", |
| 491 |
), |
| 492 |
] = False, |
| 493 |
no_mined: Annotated[ |
| 494 |
bool, |
| 495 |
typer.Option( |
| 496 |
"--no-mined", |
| 497 |
help=( |
| 498 |
"Exclude auto-mined preference sections from the preference " |
| 499 |
"phase, including replay-sampled mined pairs. Hand-authored " |
| 500 |
"`::preference::` sections still train normally." |
| 501 |
), |
| 502 |
), |
| 503 |
] = False, |
| 504 |
gpus: Annotated[ |
| 505 |
str | None, |
| 506 |
typer.Option( |
| 507 |
"--gpus", |
| 508 |
help=( |
| 509 |
"Multi-GPU training. `all` uses every visible CUDA device; " |
| 510 |
"`N` uses the first N; `0,1` selects exact device ids. " |
| 511 |
"Dispatches to `accelerate launch` when >1 device is " |
| 512 |
"selected. Omit for single-process training." |
| 513 |
), |
| 514 |
), |
| 515 |
] = None, |
| 516 |
watch: Annotated[ |
| 517 |
bool, |
| 518 |
typer.Option( |
| 519 |
"--watch", |
| 520 |
help=( |
| 521 |
"Save-to-train mode. After an initial train, block on " |
| 522 |
"filesystem events and run incremental retrains " |
| 523 |
"(mode=resume, step-capped) on each settled save. Ctrl-C " |
| 524 |
"exits cleanly between cycles." |
| 525 |
), |
| 526 |
), |
| 527 |
] = False, |
| 528 |
watch_max_steps: Annotated[ |
| 529 |
int, |
| 530 |
typer.Option( |
| 531 |
"--watch-max-steps", |
| 532 |
help="Per-cycle step cap for --watch. Default 100 keeps cycles responsive.", |
| 533 |
), |
| 534 |
] = 100, |
| 535 |
watch_debounce_ms: Annotated[ |
| 536 |
int, |
| 537 |
typer.Option( |
| 538 |
"--watch-debounce-ms", |
| 539 |
help="Quiet interval (ms) before a burst of saves triggers a retrain.", |
| 540 |
), |
| 541 |
] = 400, |
| 542 |
watch_repl: Annotated[ |
| 543 |
bool, |
| 544 |
typer.Option( |
| 545 |
"--repl", |
| 546 |
help=( |
| 547 |
"With --watch: also open the REPL so prompts reflect the " |
| 548 |
"latest adapter. **Scaffolded** — threading integration " |
| 549 |
"is untestable without a two-process harness; emit a " |
| 550 |
"not-yet-implemented refusal and exit 2." |
| 551 |
), |
| 552 |
), |
| 553 |
] = False, |
| 554 |
base: Annotated[ |
| 555 |
str | None, |
| 556 |
typer.Option( |
| 557 |
"--base", |
| 558 |
help=( |
| 559 |
"Base model key for auto-scaffold. Required on first run when " |
| 560 |
"`path` is a directory without an existing .dlm/ config. " |
| 561 |
"Accepts registry keys (smollm2-135m, qwen2.5-coder-1.5b, ...) " |
| 562 |
"or `hf:<org>/<name>` for off-registry models." |
| 563 |
), |
| 564 |
), |
| 565 |
] = None, |
| 566 |
include: Annotated[ |
| 567 |
list[str] | None, |
| 568 |
typer.Option( |
| 569 |
"--include", |
| 570 |
help=( |
| 571 |
"Glob pattern for files to train on (auto-scaffold only). " |
| 572 |
"Repeatable. Default: '**/*' with --recursive, '*' without. " |
| 573 |
"Examples: '**/*.py', '**/*.f90', '**/*.{md,rst}'." |
| 574 |
), |
| 575 |
), |
| 576 |
] = None, |
| 577 |
exclude: Annotated[ |
| 578 |
list[str] | None, |
| 579 |
typer.Option( |
| 580 |
"--exclude", |
| 581 |
help=( |
| 582 |
"Glob pattern for files to skip (auto-scaffold only). " |
| 583 |
"Repeatable. Defaults (secrets, VCS, lockfiles, binaries) " |
| 584 |
"apply on top via the descent protocol." |
| 585 |
), |
| 586 |
), |
| 587 |
] = None, |
| 588 |
recursive: Annotated[ |
| 589 |
bool, |
| 590 |
typer.Option( |
| 591 |
"--recursive/--no-recursive", |
| 592 |
"-r/-R", |
| 593 |
help=( |
| 594 |
"Auto-scaffold include patterns descend into subdirectories. " |
| 595 |
"Default True. --no-recursive limits the default include to " |
| 596 |
"top-level files only." |
| 597 |
), |
| 598 |
), |
| 599 |
] = True, |
| 600 |
name: Annotated[ |
| 601 |
str, |
| 602 |
typer.Option( |
| 603 |
"--name", |
| 604 |
help=( |
| 605 |
"Adapter name for auto-scaffold → `<dir>/.dlm/<name>.dlm`. " |
| 606 |
"Default 'corpus'. Lets a single tree host multiple adapters." |
| 607 |
), |
| 608 |
), |
| 609 |
] = "corpus", |
| 610 |
policy: Annotated[ |
| 611 |
str, |
| 612 |
typer.Option( |
| 613 |
"--policy", |
| 614 |
help=( |
| 615 |
"Auto-scaffold sources_policy: 'strict' (default; confines " |
| 616 |
"training to the target directory) or 'permissive' (allows " |
| 617 |
"absolute paths anywhere)." |
| 618 |
), |
| 619 |
), |
| 620 |
] = "strict", |
| 621 |
rescaffold: Annotated[ |
| 622 |
bool, |
| 623 |
typer.Option( |
| 624 |
"--rescaffold", |
| 625 |
help=( |
| 626 |
"Rewrite an existing scaffolded .dlm in place with the new " |
| 627 |
"--base/--include/--exclude/--policy flags. Keeps the same " |
| 628 |
"dlm_id (store stays intact). Without it, re-running with " |
| 629 |
"frontmatter-editing flags refuses to shadow-edit." |
| 630 |
), |
| 631 |
), |
| 632 |
] = False, |
| 633 |
listen_rpc: Annotated[ |
| 634 |
str | None, |
| 635 |
typer.Option( |
| 636 |
"--listen-rpc", |
| 637 |
help=( |
| 638 |
"Open a JSON-RPC endpoint at <host:port> (e.g. `127.0.0.1:7429`) " |
| 639 |
"that accepts `inject_probe` pushes from sway-style eval " |
| 640 |
"harnesses. Probes enter the queue and drain at the next " |
| 641 |
"training-cycle boundary. Requires --watch or --max-cycles. " |
| 642 |
"Bearer token from DLM_PROBE_TOKEN." |
| 643 |
), |
| 644 |
), |
| 645 |
] = None, |
| 646 |
max_cycles: Annotated[ |
| 647 |
int, |
| 648 |
typer.Option( |
| 649 |
"--max-cycles", |
| 650 |
help=( |
| 651 |
"Convergence stop for --listen-rpc without --watch: cap the " |
| 652 |
"probe-driven retrain loop at N cycles. Ignored without " |
| 653 |
"--listen-rpc." |
| 654 |
), |
| 655 |
), |
| 656 |
] = 0, |
| 657 |
no_cache: Annotated[ |
| 658 |
bool, |
| 659 |
typer.Option( |
| 660 |
"--no-cache", |
| 661 |
help=( |
| 662 |
"Opt out of the tokenized-section cache for this run. By " |
| 663 |
"default, `dlm train` pre-tokenizes directive-sourced rows " |
| 664 |
"via ~/.dlm/store/<id>/tokenized-cache/ so subsequent runs " |
| 665 |
"on the same corpus skip re-tokenization. Use this to " |
| 666 |
"bypass the cache for debugging or to compare cached vs " |
| 667 |
"uncached training determinism." |
| 668 |
), |
| 669 |
), |
| 670 |
] = False, |
| 671 |
skip_export_probes: Annotated[ |
| 672 |
bool, |
| 673 |
typer.Option( |
| 674 |
"--skip-export-probes", |
| 675 |
help=( |
| 676 |
"Skip the llama.cpp / GGUF-conversion probes so brand-new " |
| 677 |
"architectures (not yet in our vendored llama.cpp) can still " |
| 678 |
"be used for training + HF inference. Forfeits `dlm export` " |
| 679 |
"to Ollama until the vendored copy catches up. Mirrors the " |
| 680 |
"flag of the same name on `dlm init`." |
| 681 |
), |
| 682 |
), |
| 683 |
] = False, |
| 684 |
) -> None: |
| 685 |
"""Train / retrain a .dlm against its base model.""" |
| 686 |
import sqlite3 |
| 687 |
import sys |
| 688 |
|
| 689 |
from rich.console import Console |
| 690 |
|
| 691 |
from dlm.base_models import GatedModelError |
| 692 |
from dlm.base_models import resolve as resolve_base_model |
| 693 |
from dlm.doc.errors import DlmParseError |
| 694 |
from dlm.doc.parser import parse_file |
| 695 |
from dlm.hardware import doctor |
| 696 |
from dlm.lock import LockMode, LockValidationError |
| 697 |
from dlm.store.paths import for_dlm |
| 698 |
from dlm.train import ( |
| 699 |
DiskSpaceError, |
| 700 |
OOMError, |
| 701 |
ResumeIntegrityError, |
| 702 |
TrainingError, |
| 703 |
) |
| 704 |
from dlm.train.preference import ( |
| 705 |
DpoPhaseError, |
| 706 |
NoPreferenceContentError, |
| 707 |
PriorAdapterRequiredError, |
| 708 |
) |
| 709 |
from dlm.train.preference.phase_orchestrator import Phase, run_phases |
| 710 |
|
| 711 |
console = Console(stderr=True) |
| 712 |
|
| 713 |
if phase not in ("sft", "preference", "all"): |
| 714 |
console.print(f"[red]error:[/red] --phase must be one of sft|preference|all, got {phase!r}") |
| 715 |
raise typer.Exit(code=2) |
| 716 |
phase_literal: Phase = phase # type: ignore[assignment] |
| 717 |
|
| 718 |
if resume and fresh: |
| 719 |
console.print("[red]error:[/red] --resume and --fresh are mutually exclusive") |
| 720 |
raise typer.Exit(code=2) |
| 721 |
mode: Literal["fresh", "resume"] = "resume" if resume else "fresh" |
| 722 |
|
| 723 |
# --gpus dispatches to accelerate launch when >1 device is |
| 724 |
# selected. The single-GPU path falls through to the existing |
| 725 |
# in-process trainer; a bare `--gpus 1` is a no-op (users can use |
| 726 |
# it to lock the visible device set via CUDA_VISIBLE_DEVICES |
| 727 |
# without spawning a subprocess). |
| 728 |
if gpus is not None: |
| 729 |
# Resolve mixed_precision from capabilities so bf16-incapable |
| 730 |
# CUDA GPUs (SM<8.0) don't trip the `accelerate launch` |
| 731 |
# default. `probe()` is cheap and runs in the launcher-side |
| 732 |
# process only; each rank re-probes via `doctor()` later. |
| 733 |
from dlm.hardware.capabilities import probe as _probe_caps |
| 734 |
|
| 735 |
_caps = _probe_caps() |
| 736 |
_mp = "bf16" if _caps.supports_bf16 else "fp16" |
| 737 |
exit_code = _maybe_dispatch_multi_gpu(gpus, sys.argv, console, mixed_precision=_mp) |
| 738 |
if exit_code is not None: |
| 739 |
raise typer.Exit(code=exit_code) |
| 740 |
|
| 741 |
# Mutual-exclusion gate for the three lock flags. Exactly one (or |
| 742 |
# zero) may be set — silently ignoring a conflicting pair would |
| 743 |
# mask operator intent. |
| 744 |
lock_flag_count = sum((strict_lock, update_lock, ignore_lock)) |
| 745 |
if lock_flag_count > 1: |
| 746 |
console.print( |
| 747 |
"[red]error:[/red] --strict-lock / --update-lock / --ignore-lock " |
| 748 |
"are mutually exclusive", |
| 749 |
) |
| 750 |
raise typer.Exit(code=2) |
| 751 |
lock_mode: LockMode = "default" |
| 752 |
if strict_lock: |
| 753 |
lock_mode = "strict" |
| 754 |
elif update_lock: |
| 755 |
lock_mode = "update" |
| 756 |
elif ignore_lock: |
| 757 |
lock_mode = "ignore" |
| 758 |
|
| 759 |
# `--no-cache` bypasses the tokenized-section cache for this run. |
| 760 |
# Plumbed as an env var because the trainer's pre-tokenize helper |
| 761 |
# already reads one — the CLI flag is a discoverable surface over |
| 762 |
# the same switch. Rolling the flag into `TrainingPlan` is a |
| 763 |
# deferred refactor; the env var is sufficient for the user-facing |
| 764 |
# contract and survives `accelerate launch` re-invocations. |
| 765 |
if no_cache: |
| 766 |
from dlm.train.cache import set_disable_flag |
| 767 |
|
| 768 |
set_disable_flag("--no-cache") |
| 769 |
|
| 770 |
if policy not in ("permissive", "strict"): |
| 771 |
console.print( |
| 772 |
f"[red]error:[/red] --policy must be 'permissive' or 'strict', got {policy!r}" |
| 773 |
) |
| 774 |
raise typer.Exit(code=2) |
| 775 |
policy_literal: Literal["permissive", "strict"] = policy # type: ignore[assignment] |
| 776 |
|
| 777 |
# --listen-rpc requires a loop to drain the queue — either --watch |
| 778 |
# (file-change cycles) or --max-cycles N (bounded retrain loop). |
| 779 |
# Without one, the server would accept probes that never train. We |
| 780 |
# also need the bearer token up front so the user sees the refusal |
| 781 |
# before we spend time downloading weights. |
| 782 |
rpc_config: tuple[str, int, str] | None = None |
| 783 |
if listen_rpc is not None: |
| 784 |
if not watch and max_cycles <= 0: |
| 785 |
console.print( |
| 786 |
"[red]error:[/red] --listen-rpc requires --watch or --max-cycles N " |
| 787 |
"(the probe queue needs a drain cadence)" |
| 788 |
) |
| 789 |
raise typer.Exit(code=2) |
| 790 |
token = os.environ.get("DLM_PROBE_TOKEN", "").strip() |
| 791 |
if not token: |
| 792 |
console.print( |
| 793 |
"[red]error:[/red] --listen-rpc needs a bearer token; " |
| 794 |
"export DLM_PROBE_TOKEN=<secret>" |
| 795 |
) |
| 796 |
raise typer.Exit(code=2) |
| 797 |
host, _, port_s = listen_rpc.rpartition(":") |
| 798 |
if not host or not port_s: |
| 799 |
console.print(f"[red]error:[/red] --listen-rpc expects host:port, got {listen_rpc!r}") |
| 800 |
raise typer.Exit(code=2) |
| 801 |
try: |
| 802 |
port = int(port_s) |
| 803 |
except ValueError: |
| 804 |
console.print(f"[red]error:[/red] --listen-rpc port must be an integer, got {port_s!r}") |
| 805 |
raise typer.Exit(code=2) from None |
| 806 |
rpc_config = (host, port, token) |
| 807 |
|
| 808 |
# Directory targets auto-scaffold `<dir>/.dlm/corpus.dlm` (or |
| 809 |
# reuse an existing one). After this block, `path` always points |
| 810 |
# at an actual `.dlm` file that the rest of the flow can parse. |
| 811 |
if path.is_dir(): |
| 812 |
from dlm.cli.scaffold import ScaffoldError, scaffold_train_target |
| 813 |
|
| 814 |
try: |
| 815 |
scaffold_result = scaffold_train_target( |
| 816 |
path, |
| 817 |
base=base, |
| 818 |
include=tuple(include or ()), |
| 819 |
exclude=tuple(exclude or ()), |
| 820 |
recursive=recursive, |
| 821 |
name=name, |
| 822 |
policy=policy_literal, |
| 823 |
rescaffold=rescaffold, |
| 824 |
) |
| 825 |
except ScaffoldError as exc: |
| 826 |
console.print(f"[red]scaffold:[/red] {exc.message}") |
| 827 |
raise typer.Exit(code=1) from exc |
| 828 |
|
| 829 |
if scaffold_result.scaffolded: |
| 830 |
console.print( |
| 831 |
f"[cyan]scaffolded:[/cyan] {scaffold_result.dlm_path} " |
| 832 |
f"(dlm_id={scaffold_result.dlm_id})" |
| 833 |
) |
| 834 |
path = scaffold_result.dlm_path |
| 835 |
|
| 836 |
try: |
| 837 |
parsed = parse_file(path) |
| 838 |
except (DlmParseError, OSError) as exc: |
| 839 |
console.print(f"[red]error:[/red] {exc}") |
| 840 |
raise typer.Exit(code=1) from exc |
| 841 |
try: |
| 842 |
spec = resolve_base_model( |
| 843 |
parsed.frontmatter.base_model, |
| 844 |
accept_license=i_accept_license, |
| 845 |
skip_export_probes=skip_export_probes, |
| 846 |
) |
| 847 |
except GatedModelError as exc: |
| 848 |
console.print(f"[red]license:[/red] base model {parsed.frontmatter.base_model!r} is gated.") |
| 849 |
if exc.license_url: |
| 850 |
console.print(f" review the license at: {exc.license_url}") |
| 851 |
console.print( |
| 852 |
" re-run with [bold]--i-accept-license[/bold] once you have accepted. " |
| 853 |
"Acceptance will be persisted in the store manifest." |
| 854 |
) |
| 855 |
raise typer.Exit(code=1) from exc |
| 856 |
# Detect the DDP world_size set by `accelerate launch` |
| 857 |
# (WORLD_SIZE env var) and thread it into the doctor so the plan's |
| 858 |
# effective_batch_size reflects the rank count. Single-process |
| 859 |
# runs read 1 and the plan math is unchanged. |
| 860 |
from dlm.train.distributed import detect_world_size |
| 861 |
|
| 862 |
ws = detect_world_size() |
| 863 |
doctor_result = doctor( |
| 864 |
training_config=parsed.frontmatter.training, |
| 865 |
base_params=spec.params, |
| 866 |
seq_len=min(parsed.frontmatter.training.sequence_len, spec.effective_context_length), |
| 867 |
world_size=ws, |
| 868 |
) |
| 869 |
plan = doctor_result.plan |
| 870 |
if plan is None: |
| 871 |
console.print( |
| 872 |
"[red]doctor:[/red] no viable training plan for this host. " |
| 873 |
"Run `dlm doctor` for details." |
| 874 |
) |
| 875 |
raise typer.Exit(code=1) |
| 876 |
|
| 877 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 878 |
store.ensure_layout() |
| 879 |
|
| 880 |
# `dlm init` writes a manifest as part of store provisioning. Mirror |
| 881 |
# that manifest write here when the store layout exists but has no |
| 882 |
# manifest yet — covers two flows: |
| 883 |
# - auto-scaffold via `dlm train <dir>` on a fresh directory |
| 884 |
# - hand-authored .dlm with a fresh ULID that never went through |
| 885 |
# `dlm init` (e.g. authored via the LSP / VSCode extension) |
| 886 |
# License acceptance has already been validated upstream by this |
| 887 |
# point, so we just record it. |
| 888 |
if not store.manifest.exists(): |
| 889 |
from dlm.base_models import is_gated |
| 890 |
from dlm.base_models.license import require_acceptance |
| 891 |
from dlm.store.manifest import Manifest, save_manifest |
| 892 |
|
| 893 |
acceptance = ( |
| 894 |
require_acceptance(spec, accept_license=True, via="cli_flag") |
| 895 |
if is_gated(spec) |
| 896 |
else None |
| 897 |
) |
| 898 |
save_manifest( |
| 899 |
store.manifest, |
| 900 |
Manifest( |
| 901 |
dlm_id=parsed.frontmatter.dlm_id, |
| 902 |
base_model=spec.key, |
| 903 |
base_model_revision=spec.revision, |
| 904 |
source_path=path.resolve(), |
| 905 |
license_acceptance=acceptance, |
| 906 |
), |
| 907 |
) |
| 908 |
|
| 909 |
from dlm.modality import ModalityError |
| 910 |
|
| 911 |
try: |
| 912 |
phase_results = run_phases( |
| 913 |
store, |
| 914 |
parsed, |
| 915 |
spec, |
| 916 |
plan, |
| 917 |
phase=phase_literal, |
| 918 |
mode=mode, |
| 919 |
seed=seed, |
| 920 |
max_steps=max_steps, |
| 921 |
lock_mode=lock_mode, |
| 922 |
capabilities=doctor_result.capabilities, |
| 923 |
world_size=ws, |
| 924 |
strict_metrics=strict_metrics, |
| 925 |
include_auto_mined=not no_mined, |
| 926 |
) |
| 927 |
except sqlite3.Error as exc: |
| 928 |
console.print(f"[red]metrics:[/red] {exc}") |
| 929 |
raise typer.Exit(code=1) from exc |
| 930 |
except LockValidationError as exc: |
| 931 |
console.print(f"[red]lock:[/red] {exc}") |
| 932 |
console.print( |
| 933 |
" Re-run with [bold]--update-lock[/bold] to accept the drift or " |
| 934 |
"[bold]--ignore-lock[/bold] to continue without persisting a new lock." |
| 935 |
) |
| 936 |
raise typer.Exit(code=1) from exc |
| 937 |
except DiskSpaceError as exc: |
| 938 |
console.print(f"[red]disk:[/red] {exc}") |
| 939 |
raise typer.Exit(code=1) from exc |
| 940 |
except OOMError as exc: |
| 941 |
from dlm.train import format_oom_message |
| 942 |
|
| 943 |
console.print( |
| 944 |
format_oom_message( |
| 945 |
step=exc.step, |
| 946 |
peak_bytes=exc.peak_bytes, |
| 947 |
free_at_start_bytes=exc.free_at_start_bytes, |
| 948 |
current_grad_accum=exc.current_grad_accum, |
| 949 |
recommended_grad_accum=exc.recommended_grad_accum, |
| 950 |
) |
| 951 |
) |
| 952 |
raise typer.Exit(code=1) from exc |
| 953 |
except ResumeIntegrityError as exc: |
| 954 |
console.print(f"[red]resume:[/red] {exc}") |
| 955 |
raise typer.Exit(code=1) from exc |
| 956 |
except (NoPreferenceContentError, PriorAdapterRequiredError) as exc: |
| 957 |
console.print(f"[red]dpo:[/red] {exc}") |
| 958 |
raise typer.Exit(code=1) from exc |
| 959 |
except DpoPhaseError as exc: |
| 960 |
console.print(f"[red]dpo:[/red] {exc}") |
| 961 |
raise typer.Exit(code=1) from exc |
| 962 |
except TrainingError as exc: |
| 963 |
console.print(f"[red]training:[/red] {exc}") |
| 964 |
raise typer.Exit(code=1) from exc |
| 965 |
except ModalityError as exc: |
| 966 |
console.print(f"[red]training:[/red] {exc}") |
| 967 |
raise typer.Exit(code=1) from exc |
| 968 |
|
| 969 |
if not phase_results: |
| 970 |
console.print( |
| 971 |
"[yellow]no-op:[/yellow] nothing to train for the requested phase. " |
| 972 |
"Check that the document has the section types the phase consumes " |
| 973 |
"(prose/instruction for SFT, preference for DPO)." |
| 974 |
) |
| 975 |
raise typer.Exit(code=0) |
| 976 |
|
| 977 |
for pr in phase_results: |
| 978 |
result = pr.result |
| 979 |
console.print( |
| 980 |
f"[green]{pr.phase}:[/green] v{result.adapter_version:04d} " |
| 981 |
f"({result.steps} steps, seed={result.seed}, " |
| 982 |
f"determinism={result.determinism.class_})" |
| 983 |
) |
| 984 |
console.print(f"adapter: {result.adapter_path}") |
| 985 |
console.print(f"log: {result.log_path}") |
| 986 |
# Final-train-loss stdout line mirrors the last phase so existing |
| 987 |
# downstream scripts keep working. |
| 988 |
result = phase_results[-1].result |
| 989 |
if result.final_train_loss is not None: |
| 990 |
sys.stdout.write(f"{result.final_train_loss}\n") |
| 991 |
|
| 992 |
# --watch keeps the training context alive and re-runs incremental |
| 993 |
# cycles on file change. Entered AFTER the initial train so the |
| 994 |
# loop resumes from a real committed adapter. |
| 995 |
if watch: |
| 996 |
if watch_repl: |
| 997 |
console.print( |
| 998 |
"[red]train:[/red] --watch --repl is scaffolded but not yet " |
| 999 |
"implemented. The threaded REPL bridge needs a test " |
| 1000 |
"harness we don't have in CI today." |
| 1001 |
) |
| 1002 |
raise typer.Exit(code=2) |
| 1003 |
|
| 1004 |
from dlm.watch.loop import run_watch |
| 1005 |
from dlm.watch.status import WatchStatus, render_status |
| 1006 |
|
| 1007 |
status = WatchStatus(doc_path=str(path), sections=len(parsed.sections)) |
| 1008 |
|
| 1009 |
# Start the probe-RPC server if --listen-rpc was requested. The |
| 1010 |
# queue is exposed; end-to-end flow into `build_dataset` at the |
| 1011 |
# next cycle boundary is the follow-up consumer task — for now |
| 1012 |
# the server accepts and buffers probes so sway sinks can be |
| 1013 |
# wired + tested against a live endpoint. |
| 1014 |
rpc_server = None |
| 1015 |
probe_queue = None |
| 1016 |
if rpc_config is not None: |
| 1017 |
from dlm.train.inject import InjectedProbeQueue |
| 1018 |
from dlm.train.rpc import ProbeRpcServer |
| 1019 |
|
| 1020 |
rpc_host, rpc_port, rpc_token = rpc_config |
| 1021 |
probe_queue = InjectedProbeQueue() |
| 1022 |
rpc_server = ProbeRpcServer( |
| 1023 |
host=rpc_host, port=rpc_port, token=rpc_token, queue=probe_queue |
| 1024 |
) |
| 1025 |
rpc_server.start() |
| 1026 |
bound_host, bound_port = rpc_server.address |
| 1027 |
console.print( |
| 1028 |
f"[dim]rpc:[/dim] listening on {bound_host}:{bound_port} " |
| 1029 |
f"(queue capacity {probe_queue.capacity})" |
| 1030 |
) |
| 1031 |
|
| 1032 |
console.print( |
| 1033 |
f"[dim]watch:[/dim] {render_status(status)}; " |
| 1034 |
f"max_steps={watch_max_steps}, debounce_ms={watch_debounce_ms}" |
| 1035 |
) |
| 1036 |
|
| 1037 |
def _log_cycle(result_: object) -> None: |
| 1038 |
from dlm.watch.loop import CycleResult |
| 1039 |
|
| 1040 |
assert isinstance(result_, CycleResult) |
| 1041 |
if result_.ran and result_.run_result is not None: |
| 1042 |
status.mark_cycle_done( |
| 1043 |
train_loss=result_.run_result.final_train_loss, |
| 1044 |
val_loss=result_.run_result.final_val_loss, |
| 1045 |
steps=result_.run_result.steps, |
| 1046 |
coalesced=1, |
| 1047 |
) |
| 1048 |
console.print(f"[dim]watch:[/dim] {render_status(status)}") |
| 1049 |
else: |
| 1050 |
console.print("[dim]watch:[/dim] no new content, skipping retrain") |
| 1051 |
|
| 1052 |
try: |
| 1053 |
exit_code = run_watch( |
| 1054 |
doc_path=path, |
| 1055 |
store=store, |
| 1056 |
spec=spec, |
| 1057 |
plan=plan, |
| 1058 |
max_steps=watch_max_steps, |
| 1059 |
debounce_ms=watch_debounce_ms, |
| 1060 |
on_cycle=_log_cycle, |
| 1061 |
drain_probes=probe_queue.drain if probe_queue is not None else None, |
| 1062 |
) |
| 1063 |
except KeyboardInterrupt: |
| 1064 |
if rpc_server is not None: |
| 1065 |
rpc_server.stop() |
| 1066 |
console.print("[dim]watch:[/dim] Ctrl-C received, exiting") |
| 1067 |
raise typer.Exit(code=0) # noqa: B904 |
| 1068 |
finally: |
| 1069 |
if rpc_server is not None: |
| 1070 |
rpc_server.stop() |
| 1071 |
raise typer.Exit(code=exit_code) |
| 1072 |
|
| 1073 |
# --max-cycles without --watch: the bounded-loop cycle driver is |
| 1074 |
# the next consumer-side integration step. Accept the flags, refuse |
| 1075 |
# execution until the loop lands. |
| 1076 |
if rpc_config is not None and not watch: |
| 1077 |
console.print( |
| 1078 |
"[red]train:[/red] --listen-rpc --max-cycles (without --watch) is " |
| 1079 |
"scaffolded; the bounded cycle loop is the follow-up. Use " |
| 1080 |
"--watch for now." |
| 1081 |
) |
| 1082 |
raise typer.Exit(code=2) |
| 1083 |
|
| 1084 |
|
| 1085 |
def _maybe_dispatch_multi_gpu( |
| 1086 |
gpus_flag: str, |
| 1087 |
argv: list[str], |
| 1088 |
console: object, |
| 1089 |
*, |
| 1090 |
mixed_precision: str = "bf16", |
| 1091 |
) -> int | None: |
| 1092 |
"""Resolve `--gpus`; if multi-GPU, spawn accelerate launch and return its exit code. |
| 1093 |
|
| 1094 |
Returns None when the resolved world_size is 1 — caller falls |
| 1095 |
through to the in-process trainer. Returns an int exit code when |
| 1096 |
the launcher ran, so the caller can `raise typer.Exit(code=...)`. |
| 1097 |
""" |
| 1098 |
from rich.console import Console |
| 1099 |
|
| 1100 |
from dlm.train.distributed import UnsupportedGpuSpecError, launch_multi_gpu, parse_gpus |
| 1101 |
|
| 1102 |
assert isinstance(console, Console) |
| 1103 |
|
| 1104 |
try: |
| 1105 |
spec = parse_gpus(gpus_flag) |
| 1106 |
except UnsupportedGpuSpecError as exc: |
| 1107 |
console.print(f"[red]train:[/red] {exc}") |
| 1108 |
return 2 |
| 1109 |
|
| 1110 |
try: |
| 1111 |
import torch |
| 1112 |
|
| 1113 |
device_count = int(torch.cuda.device_count()) |
| 1114 |
except Exception: # pragma: no cover - torch probing has many failure modes |
| 1115 |
device_count = 0 |
| 1116 |
|
| 1117 |
try: |
| 1118 |
device_ids = spec.resolve(device_count) |
| 1119 |
except UnsupportedGpuSpecError as exc: |
| 1120 |
console.print(f"[red]train:[/red] {exc}") |
| 1121 |
return 2 |
| 1122 |
|
| 1123 |
if len(device_ids) < 2: |
| 1124 |
# Single-GPU (or --gpus 1) — no subprocess needed. Caller |
| 1125 |
# continues with the in-process path. |
| 1126 |
return None |
| 1127 |
|
| 1128 |
# Forward the original argv minus `--gpus` / `--gpus=...`; the |
| 1129 |
# worker entry strips it defensively too, but we drop it here so |
| 1130 |
# the launched accelerate cmd carries exactly the intended args. |
| 1131 |
cli_args = _strip_gpus_from_argv(argv) |
| 1132 |
console.print( |
| 1133 |
f"[dim]train:[/dim] dispatching to accelerate launch on devices {list(device_ids)} " |
| 1134 |
f"(mixed_precision={mixed_precision})" |
| 1135 |
) |
| 1136 |
return launch_multi_gpu(device_ids, cli_args, mixed_precision=mixed_precision) |
| 1137 |
|
| 1138 |
|
| 1139 |
def _strip_gpus_from_argv(argv: list[str]) -> list[str]: |
| 1140 |
"""Drop `--gpus <v>` / `--gpus=<v>` from raw sys.argv (launcher side). |
| 1141 |
|
| 1142 |
Skips argv[0] (script path) — `accelerate launch -m <entry>` |
| 1143 |
provides the rank entrypoint separately, so the launcher forwards |
| 1144 |
argv[1:] minus the multi-GPU flag. Delegates to the shared |
| 1145 |
`strip_gpus_flag` helper. |
| 1146 |
""" |
| 1147 |
from dlm.train.distributed.gpus import strip_gpus_flag |
| 1148 |
|
| 1149 |
return strip_gpus_flag(argv, skip_argv0=True) |
| 1150 |
|
| 1151 |
|
| 1152 |
def prompt_cmd( |
| 1153 |
ctx: typer.Context, |
| 1154 |
path: Annotated[Path, typer.Argument(help=".dlm file to query.")], |
| 1155 |
query: Annotated[str | None, typer.Argument(help="One-shot prompt (omit for stdin).")] = None, |
| 1156 |
max_tokens: Annotated[ |
| 1157 |
int, |
| 1158 |
typer.Option("--max-tokens", help="Max new tokens to generate."), |
| 1159 |
] = 256, |
| 1160 |
temp: Annotated[ |
| 1161 |
float, |
| 1162 |
typer.Option("--temp", help="Sampling temperature. `0.0` = greedy decoding."), |
| 1163 |
] = 0.7, |
| 1164 |
top_p: Annotated[ |
| 1165 |
float | None, |
| 1166 |
typer.Option( |
| 1167 |
"--top-p", |
| 1168 |
help="Top-p sampling cutoff. Omit to disable nucleus sampling.", |
| 1169 |
), |
| 1170 |
] = None, |
| 1171 |
verbose: Annotated[bool, typer.Option("--verbose", help="Log resolved InferencePlan.")] = False, |
| 1172 |
adapter: Annotated[ |
| 1173 |
str | None, |
| 1174 |
typer.Option( |
| 1175 |
"--adapter", |
| 1176 |
help=( |
| 1177 |
"Named adapter to prompt against. Required on multi-adapter " |
| 1178 |
"documents; rejected on single-adapter documents." |
| 1179 |
), |
| 1180 |
), |
| 1181 |
] = None, |
| 1182 |
gate: Annotated[ |
| 1183 |
str, |
| 1184 |
typer.Option( |
| 1185 |
"--gate", |
| 1186 |
help=( |
| 1187 |
"Learned adapter gate. `auto` (default) uses the " |
| 1188 |
"gate when one exists in the store; `off` forces uniform " |
| 1189 |
"weights across declared adapters. Ignored when --adapter " |
| 1190 |
"explicitly pins a single adapter." |
| 1191 |
), |
| 1192 |
), |
| 1193 |
] = "auto", |
| 1194 |
backend: Annotated[ |
| 1195 |
str, |
| 1196 |
typer.Option( |
| 1197 |
"--backend", |
| 1198 |
help=( |
| 1199 |
"Inference backend: `auto` (default) picks MLX on Apple " |
| 1200 |
"Silicon, else PyTorch. Force with `pytorch` or `mlx`. " |
| 1201 |
"MLX requires `uv sync --extra mlx` on darwin-arm64." |
| 1202 |
), |
| 1203 |
), |
| 1204 |
] = "auto", |
| 1205 |
image: Annotated[ |
| 1206 |
list[Path] | None, |
| 1207 |
typer.Option( |
| 1208 |
"--image", |
| 1209 |
help=( |
| 1210 |
"Attach an image file to the prompt. Repeat for multiple " |
| 1211 |
"images; each expands to the base's image-token placeholder. " |
| 1212 |
"Requires a vision-language base." |
| 1213 |
), |
| 1214 |
), |
| 1215 |
] = None, |
| 1216 |
audio: Annotated[ |
| 1217 |
list[Path] | None, |
| 1218 |
typer.Option( |
| 1219 |
"--audio", |
| 1220 |
help=( |
| 1221 |
"Attach an audio file (.wav/.flac/.ogg) to the prompt. " |
| 1222 |
"Repeat for multiple clips; each expands to the base's " |
| 1223 |
"audio-token placeholder. Requires an audio-language base " |
| 1224 |
"(for example Qwen2-Audio-7B-Instruct)." |
| 1225 |
), |
| 1226 |
), |
| 1227 |
] = None, |
| 1228 |
) -> None: |
| 1229 |
"""Run inference against the trained adapter.""" |
| 1230 |
import sys |
| 1231 |
|
| 1232 |
from rich.console import Console |
| 1233 |
|
| 1234 |
from dlm.base_models import resolve as resolve_base_model |
| 1235 |
from dlm.doc.parser import parse_file |
| 1236 |
from dlm.hardware import doctor |
| 1237 |
from dlm.inference import AdapterNotFoundError |
| 1238 |
from dlm.inference.backends import ( |
| 1239 |
UnsupportedBackendError, |
| 1240 |
build_backend, |
| 1241 |
select_backend, |
| 1242 |
) |
| 1243 |
from dlm.store.paths import for_dlm |
| 1244 |
|
| 1245 |
console = Console(stderr=True) |
| 1246 |
|
| 1247 |
if backend not in ("auto", "pytorch", "mlx"): |
| 1248 |
console.print( |
| 1249 |
f"[red]prompt:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})." |
| 1250 |
) |
| 1251 |
raise typer.Exit(code=2) |
| 1252 |
|
| 1253 |
# Typer passes None when the option was never given; normalize early so |
| 1254 |
# downstream branching can just check truthiness + len(). |
| 1255 |
image_paths: list[Path] = list(image or []) |
| 1256 |
audio_paths: list[Path] = list(audio or []) |
| 1257 |
if image_paths and audio_paths: |
| 1258 |
console.print( |
| 1259 |
"[red]prompt:[/red] --image and --audio cannot be combined " |
| 1260 |
"(each targets a different modality)." |
| 1261 |
) |
| 1262 |
raise typer.Exit(code=2) |
| 1263 |
|
| 1264 |
from dlm.base_models import GatedModelError |
| 1265 |
|
| 1266 |
parsed = parse_file(path) |
| 1267 |
adapters_declared = parsed.frontmatter.training.adapters |
| 1268 |
if adapter is not None: |
| 1269 |
if adapters_declared is None: |
| 1270 |
console.print( |
| 1271 |
"[red]prompt:[/red] --adapter is only valid on multi-adapter " |
| 1272 |
"documents (this doc does not declare `training.adapters`)." |
| 1273 |
) |
| 1274 |
raise typer.Exit(code=2) |
| 1275 |
if adapter not in adapters_declared: |
| 1276 |
declared = sorted(adapters_declared) |
| 1277 |
console.print( |
| 1278 |
f"[red]prompt:[/red] --adapter {adapter!r} is not declared (declared: {declared})." |
| 1279 |
) |
| 1280 |
raise typer.Exit(code=2) |
| 1281 |
|
| 1282 |
if gate not in ("auto", "off"): |
| 1283 |
console.print(f"[red]prompt:[/red] --gate must be `auto` or `off`, got {gate!r}.") |
| 1284 |
raise typer.Exit(code=2) |
| 1285 |
# --adapter explicitly pins a single adapter — gate routing is moot. |
| 1286 |
# We silently ignore --gate in that case (the flag has a non-default |
| 1287 |
# value only when the user cares, and pairing it with --adapter is |
| 1288 |
# not an error, just a no-op). |
| 1289 |
|
| 1290 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 1291 |
already_accepted = _previously_accepted(store.manifest) |
| 1292 |
try: |
| 1293 |
spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted) |
| 1294 |
except GatedModelError as exc: |
| 1295 |
console.print( |
| 1296 |
f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has " |
| 1297 |
"no recorded acceptance in this store; run `dlm train --i-accept-license` first." |
| 1298 |
) |
| 1299 |
raise typer.Exit(code=1) from exc |
| 1300 |
caps = doctor().capabilities |
| 1301 |
|
| 1302 |
# --- VL path ------------------------------------------------------- |
| 1303 |
# The VL branch has its own model / processor / adapter loader and |
| 1304 |
# its own generate function. `--image` and vision-language bases |
| 1305 |
# must appear together; each alone is a usage error. |
| 1306 |
from dlm.modality import modality_for |
| 1307 |
|
| 1308 |
dispatch = modality_for(spec) |
| 1309 |
from click.core import ParameterSource |
| 1310 |
|
| 1311 |
if ctx.get_parameter_source("temp") == ParameterSource.DEFAULT: |
| 1312 |
temp = spec.suggested_prompt_temperature |
| 1313 |
if image_paths and not dispatch.accepts_images: |
| 1314 |
console.print( |
| 1315 |
f"[red]prompt:[/red] --image is only valid with vision-language bases; " |
| 1316 |
f"base {spec.key!r} is modality='{spec.modality}'." |
| 1317 |
) |
| 1318 |
raise typer.Exit(code=2) |
| 1319 |
if dispatch.accepts_images and not image_paths: |
| 1320 |
console.print( |
| 1321 |
f"[red]prompt:[/red] base {spec.key!r} is vision-language; " |
| 1322 |
"pass at least one --image PATH to prompt it." |
| 1323 |
) |
| 1324 |
raise typer.Exit(code=2) |
| 1325 |
if dispatch.accepts_images: |
| 1326 |
_dispatch_vl_prompt( |
| 1327 |
console=console, |
| 1328 |
spec=spec, |
| 1329 |
store=store, |
| 1330 |
caps=caps, |
| 1331 |
adapter_name=adapter, |
| 1332 |
image_paths=image_paths, |
| 1333 |
query=query, |
| 1334 |
max_tokens=max_tokens, |
| 1335 |
temp=temp, |
| 1336 |
top_p=top_p, |
| 1337 |
verbose=verbose, |
| 1338 |
) |
| 1339 |
return |
| 1340 |
|
| 1341 |
# --- Audio path ---------------------------------------------------- |
| 1342 |
if audio_paths and not dispatch.accepts_audio: |
| 1343 |
console.print( |
| 1344 |
f"[red]prompt:[/red] --audio is only valid with audio-language bases; " |
| 1345 |
f"base {spec.key!r} is modality='{spec.modality}'." |
| 1346 |
) |
| 1347 |
raise typer.Exit(code=2) |
| 1348 |
if dispatch.accepts_audio and not audio_paths: |
| 1349 |
console.print( |
| 1350 |
f"[red]prompt:[/red] base {spec.key!r} is audio-language; " |
| 1351 |
"pass at least one --audio PATH to prompt it." |
| 1352 |
) |
| 1353 |
raise typer.Exit(code=2) |
| 1354 |
if dispatch.accepts_audio: |
| 1355 |
_dispatch_audio_prompt( |
| 1356 |
console=console, |
| 1357 |
spec=spec, |
| 1358 |
store=store, |
| 1359 |
caps=caps, |
| 1360 |
adapter_name=adapter, |
| 1361 |
audio_paths=audio_paths, |
| 1362 |
query=query, |
| 1363 |
max_tokens=max_tokens, |
| 1364 |
temp=temp, |
| 1365 |
top_p=top_p, |
| 1366 |
verbose=verbose, |
| 1367 |
auto_resample=parsed.frontmatter.training.audio.auto_resample, |
| 1368 |
) |
| 1369 |
return |
| 1370 |
|
| 1371 |
try: |
| 1372 |
backend_name = select_backend(backend, caps) # type: ignore[arg-type] |
| 1373 |
except UnsupportedBackendError as exc: |
| 1374 |
console.print(f"[red]prompt:[/red] {exc}") |
| 1375 |
raise typer.Exit(code=2) from exc |
| 1376 |
backend_obj = build_backend(backend_name, caps) |
| 1377 |
|
| 1378 |
if verbose: |
| 1379 |
console.print(f"[dim]backend:[/dim] {backend_name}") |
| 1380 |
|
| 1381 |
try: |
| 1382 |
backend_obj.load(spec, store, adapter_name=adapter) |
| 1383 |
except AdapterNotFoundError as exc: |
| 1384 |
console.print(f"[red]prompt:[/red] {exc}") |
| 1385 |
raise typer.Exit(code=1) from exc |
| 1386 |
|
| 1387 |
if query is None: |
| 1388 |
query = sys.stdin.read().strip() |
| 1389 |
if not query: |
| 1390 |
console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)") |
| 1391 |
raise typer.Exit(code=2) |
| 1392 |
|
| 1393 |
response = backend_obj.generate( |
| 1394 |
query, |
| 1395 |
max_new_tokens=max_tokens, |
| 1396 |
temperature=temp, |
| 1397 |
top_p=top_p, |
| 1398 |
) |
| 1399 |
sys.stdout.write(response + "\n") |
| 1400 |
|
| 1401 |
|
| 1402 |
def _dispatch_vl_prompt( # pragma: no cover |
| 1403 |
*, |
| 1404 |
console: Any, |
| 1405 |
spec: Any, |
| 1406 |
store: Any, |
| 1407 |
caps: Any, |
| 1408 |
adapter_name: str | None, |
| 1409 |
image_paths: list[Path], |
| 1410 |
query: str | None, |
| 1411 |
max_tokens: int, |
| 1412 |
temp: float, |
| 1413 |
top_p: float | None, |
| 1414 |
verbose: bool, |
| 1415 |
) -> None: |
| 1416 |
"""Run the VL generate path. Keeps `prompt_cmd` readable. |
| 1417 |
|
| 1418 |
Pragma'd from unit coverage because it calls the VL HF stack. |
| 1419 |
Covered by the slow-marked vision-language integration test (T12). |
| 1420 |
""" |
| 1421 |
import sys |
| 1422 |
|
| 1423 |
import typer |
| 1424 |
|
| 1425 |
from dlm.inference import ( |
| 1426 |
AdapterNotFoundError, |
| 1427 |
generate_vl, |
| 1428 |
load_for_vl_inference, |
| 1429 |
load_images, |
| 1430 |
) |
| 1431 |
from dlm.modality import ProcessorContractError |
| 1432 |
|
| 1433 |
if verbose: |
| 1434 |
console.print("[dim]vl-backend:[/dim] pytorch (AutoModelForImageTextToText)") |
| 1435 |
|
| 1436 |
try: |
| 1437 |
loaded = load_for_vl_inference(store, spec, caps, adapter_name=adapter_name) |
| 1438 |
except AdapterNotFoundError as exc: |
| 1439 |
console.print(f"[red]prompt:[/red] {exc}") |
| 1440 |
raise typer.Exit(code=1) from exc |
| 1441 |
except ProcessorContractError as exc: |
| 1442 |
console.print(f"[red]prompt:[/red] {exc}") |
| 1443 |
raise typer.Exit(code=1) from exc |
| 1444 |
|
| 1445 |
try: |
| 1446 |
images = load_images(image_paths) |
| 1447 |
except FileNotFoundError as exc: |
| 1448 |
console.print(f"[red]prompt:[/red] {exc}") |
| 1449 |
raise typer.Exit(code=2) from exc |
| 1450 |
|
| 1451 |
if query is None: |
| 1452 |
query = sys.stdin.read().strip() |
| 1453 |
if not query: |
| 1454 |
console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)") |
| 1455 |
raise typer.Exit(code=2) |
| 1456 |
|
| 1457 |
# Every VL spec in the registry must declare a preprocessor plan |
| 1458 |
# (schema validator); the fallback is defensive for the hf: escape |
| 1459 |
# hatch, which could in principle skip one. |
| 1460 |
image_token = "<image>" |
| 1461 |
if spec.vl_preprocessor_plan is not None: |
| 1462 |
image_token = spec.vl_preprocessor_plan.image_token |
| 1463 |
|
| 1464 |
response = generate_vl( |
| 1465 |
loaded.model, |
| 1466 |
loaded.processor, |
| 1467 |
query, |
| 1468 |
images, |
| 1469 |
image_token=image_token, |
| 1470 |
max_new_tokens=max_tokens, |
| 1471 |
temperature=temp, |
| 1472 |
top_p=top_p, |
| 1473 |
) |
| 1474 |
sys.stdout.write(response + "\n") |
| 1475 |
|
| 1476 |
|
| 1477 |
def _dispatch_audio_prompt( # pragma: no cover |
| 1478 |
*, |
| 1479 |
console: Any, |
| 1480 |
spec: Any, |
| 1481 |
store: Any, |
| 1482 |
caps: Any, |
| 1483 |
adapter_name: str | None, |
| 1484 |
audio_paths: list[Path], |
| 1485 |
query: str | None, |
| 1486 |
max_tokens: int, |
| 1487 |
temp: float, |
| 1488 |
top_p: float | None, |
| 1489 |
verbose: bool, |
| 1490 |
auto_resample: bool = False, |
| 1491 |
) -> None: |
| 1492 |
"""Run the audio-LM generate path. Keeps `prompt_cmd` readable. |
| 1493 |
|
| 1494 |
Pragma'd from unit coverage because it calls the audio HF stack. |
| 1495 |
Covered by the slow-marked audio integration test (T12). |
| 1496 |
""" |
| 1497 |
import sys |
| 1498 |
|
| 1499 |
import typer |
| 1500 |
|
| 1501 |
from dlm.inference import ( |
| 1502 |
AdapterNotFoundError, |
| 1503 |
generate_audio, |
| 1504 |
load_audios, |
| 1505 |
load_for_audio_inference, |
| 1506 |
) |
| 1507 |
|
| 1508 |
if verbose: |
| 1509 |
console.print(f"[dim]audio-backend:[/dim] pytorch ({spec.architecture})") |
| 1510 |
|
| 1511 |
try: |
| 1512 |
loaded = load_for_audio_inference(store, spec, caps, adapter_name=adapter_name) |
| 1513 |
except AdapterNotFoundError as exc: |
| 1514 |
console.print(f"[red]prompt:[/red] {exc}") |
| 1515 |
raise typer.Exit(code=1) from exc |
| 1516 |
|
| 1517 |
if spec.audio_preprocessor_plan is None: |
| 1518 |
# Defensive — every registry audio spec carries the plan, but |
| 1519 |
# the hf: escape hatch could skip it. |
| 1520 |
console.print( |
| 1521 |
f"[red]prompt:[/red] base {spec.key!r} is audio-language " |
| 1522 |
"but has no audio_preprocessor_plan; cannot resolve sample rate." |
| 1523 |
) |
| 1524 |
raise typer.Exit(code=2) |
| 1525 |
|
| 1526 |
target_sr = spec.audio_preprocessor_plan.sample_rate |
| 1527 |
try: |
| 1528 |
waveforms = load_audios( |
| 1529 |
audio_paths, |
| 1530 |
target_sample_rate=target_sr, |
| 1531 |
auto_resample=auto_resample, |
| 1532 |
) |
| 1533 |
except FileNotFoundError as exc: |
| 1534 |
console.print(f"[red]prompt:[/red] {exc}") |
| 1535 |
raise typer.Exit(code=2) from exc |
| 1536 |
except ValueError as exc: |
| 1537 |
# Sample-rate mismatch — surface the actionable ffmpeg hint. |
| 1538 |
console.print(f"[red]prompt:[/red] {exc}") |
| 1539 |
raise typer.Exit(code=2) from exc |
| 1540 |
|
| 1541 |
if query is None: |
| 1542 |
query = sys.stdin.read().strip() |
| 1543 |
if not query: |
| 1544 |
console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)") |
| 1545 |
raise typer.Exit(code=2) |
| 1546 |
|
| 1547 |
audio_token = spec.audio_preprocessor_plan.audio_token |
| 1548 |
|
| 1549 |
response = generate_audio( |
| 1550 |
loaded.model, |
| 1551 |
loaded.processor, |
| 1552 |
query, |
| 1553 |
waveforms, |
| 1554 |
audio_token=audio_token, |
| 1555 |
sample_rate=target_sr, |
| 1556 |
max_new_tokens=max_tokens, |
| 1557 |
temperature=temp, |
| 1558 |
top_p=top_p, |
| 1559 |
) |
| 1560 |
sys.stdout.write(response + "\n") |
| 1561 |
|
| 1562 |
|
| 1563 |
def export_cmd( |
| 1564 |
path: Annotated[Path, typer.Argument(help=".dlm file to export.")], |
| 1565 |
target: Annotated[ |
| 1566 |
str, |
| 1567 |
typer.Option( |
| 1568 |
"--target", |
| 1569 |
help="Export destination. Currently supported: ollama, llama-server, vllm, mlx-serve.", |
| 1570 |
), |
| 1571 |
] = "ollama", |
| 1572 |
quant: Annotated[ |
| 1573 |
str | None, |
| 1574 |
typer.Option("--quant", help="GGUF quant level (defaults to frontmatter)."), |
| 1575 |
] = None, |
| 1576 |
merged: Annotated[ |
| 1577 |
bool, |
| 1578 |
typer.Option("--merged", help="Merge the adapter into the base before export."), |
| 1579 |
] = False, |
| 1580 |
dequantize: Annotated[ |
| 1581 |
bool, |
| 1582 |
typer.Option( |
| 1583 |
"--dequantize", |
| 1584 |
help="Dequantize a QLoRA base to fp16 before merging.", |
| 1585 |
), |
| 1586 |
] = False, |
| 1587 |
name: Annotated[str | None, typer.Option("--name", help="Ollama model name.")] = None, |
| 1588 |
no_template: Annotated[ |
| 1589 |
bool, |
| 1590 |
typer.Option("--no-template", help="Skip writing TEMPLATE into the Modelfile."), |
| 1591 |
] = False, |
| 1592 |
no_smoke: Annotated[ |
| 1593 |
bool, |
| 1594 |
typer.Option("--no-smoke", help="Register the export but skip the smoke prompt."), |
| 1595 |
] = False, |
| 1596 |
no_imatrix: Annotated[ |
| 1597 |
bool, |
| 1598 |
typer.Option( |
| 1599 |
"--no-imatrix", |
| 1600 |
help=( |
| 1601 |
"Skip importance-matrix calibration. Default uses the " |
| 1602 |
"replay corpus to calibrate k-quant quantization." |
| 1603 |
), |
| 1604 |
), |
| 1605 |
] = False, |
| 1606 |
draft: Annotated[ |
| 1607 |
str | None, |
| 1608 |
typer.Option( |
| 1609 |
"--draft", |
| 1610 |
help=( |
| 1611 |
"Speculative-decoding draft model Ollama tag " |
| 1612 |
"(e.g. qwen2.5:0.5b). Default uses the registered pair " |
| 1613 |
"for this base; override here to pick a custom draft." |
| 1614 |
), |
| 1615 |
), |
| 1616 |
] = None, |
| 1617 |
no_draft: Annotated[ |
| 1618 |
bool, |
| 1619 |
typer.Option( |
| 1620 |
"--no-draft", |
| 1621 |
help="Suppress PARAMETER draft_model emission even when a pair is registered.", |
| 1622 |
), |
| 1623 |
] = False, |
| 1624 |
skip_ollama: Annotated[ |
| 1625 |
bool, |
| 1626 |
typer.Option( |
| 1627 |
"--skip-ollama", |
| 1628 |
help="Emit GGUFs + manifest only; do not touch the Ollama binary.", |
| 1629 |
), |
| 1630 |
] = False, |
| 1631 |
adapter: Annotated[ |
| 1632 |
str | None, |
| 1633 |
typer.Option( |
| 1634 |
"--adapter", |
| 1635 |
help=( |
| 1636 |
"Named adapter to export. Required on multi-adapter " |
| 1637 |
"documents; rejected on single-adapter documents." |
| 1638 |
), |
| 1639 |
), |
| 1640 |
] = None, |
| 1641 |
adapter_mix: Annotated[ |
| 1642 |
str | None, |
| 1643 |
typer.Option( |
| 1644 |
"--adapter-mix", |
| 1645 |
help=( |
| 1646 |
"Weighted composition of named adapters, e.g. " |
| 1647 |
"`knowledge:1.0,tone:0.5`. Mutually exclusive with --adapter. " |
| 1648 |
"Multi-adapter docs only. LoRA-only; QLoRA requires " |
| 1649 |
"--dequantize." |
| 1650 |
), |
| 1651 |
), |
| 1652 |
] = None, |
| 1653 |
adapter_mix_method: Annotated[ |
| 1654 |
str, |
| 1655 |
typer.Option( |
| 1656 |
"--adapter-mix-method", |
| 1657 |
help=( |
| 1658 |
"PEFT combination strategy for --adapter-mix. `linear` " |
| 1659 |
"(default) sums LoRA deltas; `svd` recomposes via SVD " |
| 1660 |
"(higher fidelity, heavier compute). Only meaningful " |
| 1661 |
"with --adapter-mix." |
| 1662 |
), |
| 1663 |
), |
| 1664 |
] = "linear", |
| 1665 |
verbose: Annotated[ |
| 1666 |
bool, |
| 1667 |
typer.Option("--verbose", help="Log each subprocess command as it launches."), |
| 1668 |
] = False, |
| 1669 |
emit_sway_json: Annotated[ |
| 1670 |
bool, |
| 1671 |
typer.Option( |
| 1672 |
"--emit-sway-json", |
| 1673 |
help=( |
| 1674 |
"After the export, also write a ready-to-run sway.yaml " |
| 1675 |
"(via dlm-sway autogen) into the export dir. Requires the " |
| 1676 |
"[sway] extra: pip install 'dlm[sway]'." |
| 1677 |
), |
| 1678 |
), |
| 1679 |
] = False, |
| 1680 |
) -> None: |
| 1681 |
"""Export the adapter to a runtime target.""" |
| 1682 |
|
| 1683 |
from rich.console import Console |
| 1684 |
|
| 1685 |
from dlm.base_models import GatedModelError, download_spec |
| 1686 |
from dlm.base_models import resolve as resolve_base_model |
| 1687 |
from dlm.doc.parser import parse_file |
| 1688 |
from dlm.export import ( |
| 1689 |
ExportError, |
| 1690 |
PreflightError, |
| 1691 |
SubprocessError, |
| 1692 |
UnknownExportTargetError, |
| 1693 |
UnsafeMergeError, |
| 1694 |
VendoringError, |
| 1695 |
resolve_export_plan, |
| 1696 |
run_export, |
| 1697 |
) |
| 1698 |
from dlm.export.ollama import ( |
| 1699 |
OllamaBinaryNotFoundError, |
| 1700 |
OllamaCreateError, |
| 1701 |
OllamaError, |
| 1702 |
OllamaSmokeError, |
| 1703 |
OllamaVersionError, |
| 1704 |
) |
| 1705 |
from dlm.export.quantize import run_checked |
| 1706 |
from dlm.export.targets import ( |
| 1707 |
finalize_mlx_serve_export, |
| 1708 |
finalize_vllm_export, |
| 1709 |
prepare_llama_server_export, |
| 1710 |
prepare_mlx_serve_export, |
| 1711 |
prepare_vllm_export, |
| 1712 |
resolve_target, |
| 1713 |
) |
| 1714 |
from dlm.store.paths import for_dlm |
| 1715 |
|
| 1716 |
console = Console(stderr=True) |
| 1717 |
|
| 1718 |
if draft is not None and no_draft: |
| 1719 |
console.print("[red]error:[/red] --draft and --no-draft are mutually exclusive; pick one.") |
| 1720 |
raise typer.Exit(code=2) |
| 1721 |
if adapter is not None and adapter_mix is not None: |
| 1722 |
console.print( |
| 1723 |
"[red]export:[/red] --adapter and --adapter-mix are mutually exclusive; pick one." |
| 1724 |
) |
| 1725 |
raise typer.Exit(code=2) |
| 1726 |
try: |
| 1727 |
resolved_target = resolve_target(target) |
| 1728 |
except UnknownExportTargetError as exc: |
| 1729 |
console.print(f"[red]export:[/red] {exc}") |
| 1730 |
raise typer.Exit(code=2) from exc |
| 1731 |
parsed = parse_file(path) |
| 1732 |
adapters_declared = parsed.frontmatter.training.adapters |
| 1733 |
if adapter is not None: |
| 1734 |
if adapters_declared is None: |
| 1735 |
console.print( |
| 1736 |
"[red]export:[/red] --adapter is only valid on multi-adapter " |
| 1737 |
"documents (this doc does not declare `training.adapters`)." |
| 1738 |
) |
| 1739 |
raise typer.Exit(code=2) |
| 1740 |
if adapter not in adapters_declared: |
| 1741 |
declared = sorted(adapters_declared) |
| 1742 |
console.print( |
| 1743 |
f"[red]export:[/red] --adapter {adapter!r} is not declared (declared: {declared})." |
| 1744 |
) |
| 1745 |
raise typer.Exit(code=2) |
| 1746 |
|
| 1747 |
mix_entries: list[tuple[str, float]] | None = None |
| 1748 |
if adapter_mix is not None: |
| 1749 |
from dlm.export.weighted_merge import ( |
| 1750 |
InvalidMixSpecError, |
| 1751 |
parse_mix_spec, |
| 1752 |
validate_mix_against_declared, |
| 1753 |
) |
| 1754 |
|
| 1755 |
if adapters_declared is None: |
| 1756 |
console.print( |
| 1757 |
"[red]export:[/red] --adapter-mix is only valid on multi-adapter " |
| 1758 |
"documents (this doc does not declare `training.adapters`)." |
| 1759 |
) |
| 1760 |
raise typer.Exit(code=2) |
| 1761 |
if adapter_mix_method not in ("linear", "svd"): |
| 1762 |
console.print( |
| 1763 |
f"[red]export:[/red] --adapter-mix-method must be " |
| 1764 |
f"`linear` or `svd`, got {adapter_mix_method!r}." |
| 1765 |
) |
| 1766 |
raise typer.Exit(code=2) |
| 1767 |
try: |
| 1768 |
entries = parse_mix_spec(adapter_mix) |
| 1769 |
validate_mix_against_declared(entries, set(adapters_declared)) |
| 1770 |
except InvalidMixSpecError as exc: |
| 1771 |
console.print(f"[red]export:[/red] {exc}") |
| 1772 |
raise typer.Exit(code=2) from exc |
| 1773 |
mix_entries = [(e.name, e.weight) for e in entries] |
| 1774 |
|
| 1775 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 1776 |
|
| 1777 |
# Gate-driven static mix: when the doc has an enabled gate and the |
| 1778 |
# user didn't pass --adapter-mix / --adapter, freeze the learned |
| 1779 |
# gate to per-adapter weights for the GGUF export path. Dynamic |
| 1780 |
# routing only lives in the `dlm prompt` flow; the runtime can't |
| 1781 |
# evaluate the torch gate, so we substitute the prior here. A CLI |
| 1782 |
# --adapter-mix wins — users who know what they want get full |
| 1783 |
# control. |
| 1784 |
if mix_entries is None and adapter is None: |
| 1785 |
from dlm.export.gate_fallback import resolve_and_announce |
| 1786 |
|
| 1787 |
resolution = resolve_and_announce(store, parsed) |
| 1788 |
if resolution.entries is not None: |
| 1789 |
mix_entries = resolution.entries |
| 1790 |
for line in resolution.banner_lines: |
| 1791 |
console.print(line) |
| 1792 |
|
| 1793 |
already_accepted = _previously_accepted(store.manifest) |
| 1794 |
try: |
| 1795 |
spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted) |
| 1796 |
except GatedModelError as exc: |
| 1797 |
console.print(f"[red]license:[/red] base model {parsed.frontmatter.base_model!r} is gated.") |
| 1798 |
if exc.license_url: |
| 1799 |
console.print(f" review the license at: {exc.license_url}") |
| 1800 |
console.print(" accept via `dlm train --i-accept-license` before exporting.") |
| 1801 |
raise typer.Exit(code=1) from exc |
| 1802 |
|
| 1803 |
# Audio bases take HF-snapshot unconditionally — llama.cpp has no |
| 1804 |
# audio-arch roadmap at our pinned tag — so branch early without |
| 1805 |
# resolving a GGUF plan. |
| 1806 |
from dlm.modality import modality_for |
| 1807 |
|
| 1808 |
export_dispatch = modality_for(spec) |
| 1809 |
if resolved_target.name == "vllm" and export_dispatch.accepts_audio: |
| 1810 |
console.print( |
| 1811 |
"[red]export:[/red] --target vllm is not wired for audio-language " |
| 1812 |
"documents yet; the current vllm export path only supports text bases." |
| 1813 |
) |
| 1814 |
raise typer.Exit(code=2) |
| 1815 |
if resolved_target.name == "mlx-serve" and export_dispatch.accepts_audio: |
| 1816 |
console.print( |
| 1817 |
"[red]export:[/red] --target mlx-serve is not wired for audio-language " |
| 1818 |
"documents yet; the current mlx-serve export path only supports text bases." |
| 1819 |
) |
| 1820 |
raise typer.Exit(code=2) |
| 1821 |
if export_dispatch.accepts_audio: |
| 1822 |
try: |
| 1823 |
dispatch_result = export_dispatch.dispatch_export( |
| 1824 |
store=store, |
| 1825 |
spec=spec, |
| 1826 |
adapter_name=adapter, |
| 1827 |
quant=quant, |
| 1828 |
merged=merged, |
| 1829 |
adapter_mix_raw=adapter_mix, |
| 1830 |
) |
| 1831 |
except ExportError as exc: |
| 1832 |
console.print(f"[red]export:[/red] {exc}") |
| 1833 |
raise typer.Exit(code=1) from exc |
| 1834 |
assert dispatch_result is not None # audio modality always returns a result |
| 1835 |
for line in dispatch_result.banner_lines: |
| 1836 |
console.print(line) |
| 1837 |
return |
| 1838 |
|
| 1839 |
try: |
| 1840 |
plan = resolve_export_plan( |
| 1841 |
cli_quant=quant, |
| 1842 |
cli_merged=merged, |
| 1843 |
cli_dequantize=dequantize, |
| 1844 |
cli_no_template=no_template, |
| 1845 |
cli_ollama_name=name, |
| 1846 |
cli_no_imatrix=no_imatrix, |
| 1847 |
frontmatter_default_quant=parsed.frontmatter.export.default_quant, |
| 1848 |
) |
| 1849 |
except ValueError as exc: |
| 1850 |
console.print(f"[red]export:[/red] {exc}") |
| 1851 |
raise typer.Exit(code=2) from exc |
| 1852 |
|
| 1853 |
store.ensure_layout() |
| 1854 |
|
| 1855 |
# VL bases: arch-probe + try single-file GGUF on SUPPORTED (with |
| 1856 |
# fallback to HF-snapshot on refusal or subprocess failure). A |
| 1857 |
# missing local base snapshot should not hard-fail the whole |
| 1858 |
# export — the dispatcher can still emit the HF-snapshot path |
| 1859 |
# without GGUF context. |
| 1860 |
if resolved_target.name == "vllm" and export_dispatch.accepts_images: |
| 1861 |
console.print( |
| 1862 |
"[red]export:[/red] --target vllm is not wired for vision-language " |
| 1863 |
"documents yet; the current vllm export path only supports text bases." |
| 1864 |
) |
| 1865 |
raise typer.Exit(code=2) |
| 1866 |
if resolved_target.name == "mlx-serve" and export_dispatch.accepts_images: |
| 1867 |
console.print( |
| 1868 |
"[red]export:[/red] --target mlx-serve is not wired for vision-language " |
| 1869 |
"documents yet; the current mlx-serve export path only supports text bases." |
| 1870 |
) |
| 1871 |
raise typer.Exit(code=2) |
| 1872 |
if export_dispatch.accepts_images: |
| 1873 |
gguf_emission_context = None |
| 1874 |
try: |
| 1875 |
cached_vl = download_spec(spec, local_files_only=True) |
| 1876 |
except RuntimeError as exc: |
| 1877 |
_ = exc |
| 1878 |
else: |
| 1879 |
gguf_emission_context = { |
| 1880 |
"plan": plan, |
| 1881 |
"cached_base_dir": cached_vl.path, |
| 1882 |
"source_dlm_path": path.resolve(), |
| 1883 |
"training_sequence_len": parsed.frontmatter.training.sequence_len, |
| 1884 |
"dlm_version": f"v{parsed.frontmatter.dlm_version}", |
| 1885 |
} |
| 1886 |
try: |
| 1887 |
dispatch_result = export_dispatch.dispatch_export( |
| 1888 |
store=store, |
| 1889 |
spec=spec, |
| 1890 |
adapter_name=adapter, |
| 1891 |
quant=quant, |
| 1892 |
merged=merged, |
| 1893 |
adapter_mix_raw=adapter_mix, |
| 1894 |
gguf_emission_context=gguf_emission_context, |
| 1895 |
) |
| 1896 |
except ExportError as exc: |
| 1897 |
console.print(f"[red]export:[/red] {exc}") |
| 1898 |
raise typer.Exit(code=1) from exc |
| 1899 |
assert dispatch_result is not None # VL modality always returns a result |
| 1900 |
for line in dispatch_result.banner_lines: |
| 1901 |
console.print(line) |
| 1902 |
return |
| 1903 |
|
| 1904 |
try: |
| 1905 |
cached = download_spec(spec, local_files_only=True) |
| 1906 |
except RuntimeError as exc: |
| 1907 |
console.print( |
| 1908 |
f"[red]export:[/red] base model not in local cache — run `dlm train` first.\n {exc}" |
| 1909 |
) |
| 1910 |
raise typer.Exit(code=1) from exc |
| 1911 |
|
| 1912 |
def _verbose_runner(cmd: Sequence[str]) -> object: |
| 1913 |
console.print(f"[dim]$ {' '.join(cmd)}[/dim]") |
| 1914 |
return run_checked(cmd) |
| 1915 |
|
| 1916 |
adapter_path_override = None |
| 1917 |
if mix_entries is not None: # pragma: no cover - heavy path |
| 1918 |
# Build the weighted-merged adapter into an ephemeral dir, |
| 1919 |
# then feed the path to run_export as an override. The tmp |
| 1920 |
# dir lives under the store's cache/ so it cleans up with |
| 1921 |
# the rest of the store on `dlm pack`. |
| 1922 |
from dlm.export.weighted_merge import MixEntry, build_and_stage |
| 1923 |
|
| 1924 |
entries_typed = [MixEntry(name=n, weight=w) for (n, w) in mix_entries] |
| 1925 |
adapter_path_override = build_and_stage( |
| 1926 |
store=store, |
| 1927 |
spec=spec, |
| 1928 |
cached_base_dir=cached.path, |
| 1929 |
entries=entries_typed, |
| 1930 |
combination_type=adapter_mix_method, # type: ignore[arg-type] |
| 1931 |
) |
| 1932 |
|
| 1933 |
if resolved_target.name == "vllm": |
| 1934 |
ignored_flags: list[str] = [] |
| 1935 |
if quant is not None: |
| 1936 |
ignored_flags.append("--quant") |
| 1937 |
if merged: |
| 1938 |
ignored_flags.append("--merged") |
| 1939 |
if dequantize: |
| 1940 |
ignored_flags.append("--dequantize") |
| 1941 |
if no_template: |
| 1942 |
ignored_flags.append("--no-template") |
| 1943 |
if skip_ollama: |
| 1944 |
ignored_flags.append("--skip-ollama") |
| 1945 |
if no_imatrix: |
| 1946 |
ignored_flags.append("--no-imatrix") |
| 1947 |
if draft is not None: |
| 1948 |
ignored_flags.append("--draft") |
| 1949 |
if no_draft: |
| 1950 |
ignored_flags.append("--no-draft") |
| 1951 |
if ignored_flags: |
| 1952 |
console.print( |
| 1953 |
"[yellow]export:[/yellow] ignoring flags not applicable to " |
| 1954 |
f"`--target vllm`: {', '.join(ignored_flags)}" |
| 1955 |
) |
| 1956 |
|
| 1957 |
declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None |
| 1958 |
try: |
| 1959 |
vllm_result = prepare_vllm_export( |
| 1960 |
store=store, |
| 1961 |
spec=spec, |
| 1962 |
served_model_name=name or f"dlm-{parsed.frontmatter.dlm_id.lower()}", |
| 1963 |
training_sequence_len=parsed.frontmatter.training.sequence_len, |
| 1964 |
adapter_name=adapter, |
| 1965 |
adapter_path_override=adapter_path_override, |
| 1966 |
declared_adapter_names=declared_adapter_names, |
| 1967 |
) |
| 1968 |
except ExportError as exc: |
| 1969 |
console.print(f"[red]export:[/red] {exc}") |
| 1970 |
raise typer.Exit(code=1) from exc |
| 1971 |
|
| 1972 |
vllm_smoke = None if no_smoke else resolved_target.smoke_test(vllm_result) |
| 1973 |
if vllm_smoke is not None and not vllm_smoke.ok: |
| 1974 |
console.print( |
| 1975 |
f"[red]smoke:[/red] {vllm_smoke.detail}\n" |
| 1976 |
" re-run with `--no-smoke` to skip the smoke test." |
| 1977 |
) |
| 1978 |
raise typer.Exit(code=1) |
| 1979 |
|
| 1980 |
manifest_path = finalize_vllm_export( |
| 1981 |
store=store, |
| 1982 |
spec=spec, |
| 1983 |
prepared=vllm_result, |
| 1984 |
smoke_output_first_line=None if vllm_smoke is None else vllm_smoke.detail, |
| 1985 |
adapter_name=adapter, |
| 1986 |
adapter_mix=mix_entries, |
| 1987 |
) |
| 1988 |
console.print(f"[green]exported:[/green] {vllm_result.export_dir}") |
| 1989 |
console.print("target: vllm") |
| 1990 |
assert vllm_result.launch_script_path is not None |
| 1991 |
assert vllm_result.config_path is not None |
| 1992 |
console.print(f"launch: {vllm_result.launch_script_path.name}") |
| 1993 |
console.print(f"config: {vllm_result.config_path.name}") |
| 1994 |
console.print(f"manifest: {manifest_path.name}") |
| 1995 |
if vllm_smoke is not None and vllm_smoke.detail: |
| 1996 |
console.print(f"smoke: {vllm_smoke.detail}") |
| 1997 |
return |
| 1998 |
|
| 1999 |
if resolved_target.name == "mlx-serve": |
| 2000 |
mlx_ignored_flags: list[str] = [] |
| 2001 |
if quant is not None: |
| 2002 |
mlx_ignored_flags.append("--quant") |
| 2003 |
if merged: |
| 2004 |
mlx_ignored_flags.append("--merged") |
| 2005 |
if dequantize: |
| 2006 |
mlx_ignored_flags.append("--dequantize") |
| 2007 |
if name is not None: |
| 2008 |
mlx_ignored_flags.append("--name") |
| 2009 |
if no_template: |
| 2010 |
mlx_ignored_flags.append("--no-template") |
| 2011 |
if skip_ollama: |
| 2012 |
mlx_ignored_flags.append("--skip-ollama") |
| 2013 |
if no_imatrix: |
| 2014 |
mlx_ignored_flags.append("--no-imatrix") |
| 2015 |
if draft is not None: |
| 2016 |
mlx_ignored_flags.append("--draft") |
| 2017 |
if no_draft: |
| 2018 |
mlx_ignored_flags.append("--no-draft") |
| 2019 |
if mlx_ignored_flags: |
| 2020 |
console.print( |
| 2021 |
"[yellow]export:[/yellow] ignoring flags not applicable to " |
| 2022 |
f"`--target mlx-serve`: {', '.join(mlx_ignored_flags)}" |
| 2023 |
) |
| 2024 |
|
| 2025 |
declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None |
| 2026 |
try: |
| 2027 |
mlx_serve_result = prepare_mlx_serve_export( |
| 2028 |
store=store, |
| 2029 |
spec=spec, |
| 2030 |
adapter_name=adapter, |
| 2031 |
adapter_path_override=adapter_path_override, |
| 2032 |
declared_adapter_names=declared_adapter_names, |
| 2033 |
) |
| 2034 |
except ExportError as exc: |
| 2035 |
console.print(f"[red]export:[/red] {exc}") |
| 2036 |
raise typer.Exit(code=1) from exc |
| 2037 |
|
| 2038 |
mlx_serve_smoke = None if no_smoke else resolved_target.smoke_test(mlx_serve_result) |
| 2039 |
if mlx_serve_smoke is not None and not mlx_serve_smoke.ok: |
| 2040 |
console.print( |
| 2041 |
f"[red]smoke:[/red] {mlx_serve_smoke.detail}\n" |
| 2042 |
" re-run with `--no-smoke` to skip the smoke test." |
| 2043 |
) |
| 2044 |
raise typer.Exit(code=1) |
| 2045 |
|
| 2046 |
manifest_path = finalize_mlx_serve_export( |
| 2047 |
store=store, |
| 2048 |
spec=spec, |
| 2049 |
prepared=mlx_serve_result, |
| 2050 |
smoke_output_first_line=None if mlx_serve_smoke is None else mlx_serve_smoke.detail, |
| 2051 |
adapter_name=adapter, |
| 2052 |
adapter_mix=mix_entries, |
| 2053 |
) |
| 2054 |
console.print(f"[green]exported:[/green] {mlx_serve_result.export_dir}") |
| 2055 |
console.print("target: mlx-serve") |
| 2056 |
assert mlx_serve_result.launch_script_path is not None |
| 2057 |
console.print(f"launch: {mlx_serve_result.launch_script_path.name}") |
| 2058 |
console.print(f"manifest: {manifest_path.name}") |
| 2059 |
if mlx_serve_smoke is not None and mlx_serve_smoke.detail: |
| 2060 |
console.print(f"smoke: {mlx_serve_smoke.detail}") |
| 2061 |
return |
| 2062 |
|
| 2063 |
try: |
| 2064 |
result = run_export( |
| 2065 |
store, |
| 2066 |
spec, |
| 2067 |
plan, |
| 2068 |
target=resolved_target.name, |
| 2069 |
cached_base_dir=cached.path, |
| 2070 |
subprocess_runner=_verbose_runner if verbose else None, |
| 2071 |
skip_ollama=skip_ollama or resolved_target.name != "ollama", |
| 2072 |
skip_smoke=no_smoke, |
| 2073 |
source_dlm_path=path.resolve(), |
| 2074 |
training_sequence_len=parsed.frontmatter.training.sequence_len, |
| 2075 |
override_temperature=parsed.frontmatter.export.default_temperature, |
| 2076 |
override_top_p=parsed.frontmatter.export.default_top_p, |
| 2077 |
draft_override=draft, |
| 2078 |
draft_disabled=no_draft, |
| 2079 |
adapter_name=adapter, |
| 2080 |
adapter_path_override=adapter_path_override, |
| 2081 |
adapter_mix=mix_entries, |
| 2082 |
) |
| 2083 |
except UnsafeMergeError as exc: |
| 2084 |
console.print(f"[red]merge:[/red] {exc}") |
| 2085 |
raise typer.Exit(code=1) from exc |
| 2086 |
except VendoringError as exc: |
| 2087 |
console.print( |
| 2088 |
f"[red]vendor:[/red] {exc}\n" |
| 2089 |
" run `scripts/bump-llama-cpp.sh build` or " |
| 2090 |
"`git submodule update --init --recursive`." |
| 2091 |
) |
| 2092 |
raise typer.Exit(code=1) from exc |
| 2093 |
except PreflightError as exc: |
| 2094 |
console.print(f"[red]preflight[{exc.probe}]:[/red] {exc.detail}") |
| 2095 |
raise typer.Exit(code=1) from exc |
| 2096 |
except SubprocessError as exc: |
| 2097 |
console.print(f"[red]subprocess:[/red] {exc}") |
| 2098 |
raise typer.Exit(code=1) from exc |
| 2099 |
except OllamaBinaryNotFoundError as exc: |
| 2100 |
console.print( |
| 2101 |
f"[red]ollama:[/red] {exc}\n" |
| 2102 |
" install from https://ollama.com/download " |
| 2103 |
"or re-run with `--skip-ollama`." |
| 2104 |
) |
| 2105 |
raise typer.Exit(code=1) from exc |
| 2106 |
except OllamaVersionError as exc: |
| 2107 |
console.print(f"[red]ollama:[/red] {exc}") |
| 2108 |
raise typer.Exit(code=1) from exc |
| 2109 |
except OllamaCreateError as exc: |
| 2110 |
console.print(f"[red]ollama create:[/red] {exc}") |
| 2111 |
raise typer.Exit(code=1) from exc |
| 2112 |
except OllamaSmokeError as exc: |
| 2113 |
console.print( |
| 2114 |
f"[red]smoke:[/red] {exc}\n re-run with `--no-smoke` to skip the smoke test." |
| 2115 |
) |
| 2116 |
raise typer.Exit(code=1) from exc |
| 2117 |
except OllamaError as exc: |
| 2118 |
console.print(f"[red]ollama:[/red] {exc}") |
| 2119 |
raise typer.Exit(code=1) from exc |
| 2120 |
except ExportError as exc: |
| 2121 |
console.print(f"[red]export:[/red] {exc}") |
| 2122 |
raise typer.Exit(code=1) from exc |
| 2123 |
|
| 2124 |
if resolved_target.name == "llama-server": |
| 2125 |
adapter_dir = adapter_path_override |
| 2126 |
if adapter_dir is None: |
| 2127 |
if adapter is None: |
| 2128 |
adapter_dir = store.resolve_current_adapter() |
| 2129 |
else: |
| 2130 |
adapter_dir = store.resolve_current_adapter_for(adapter) |
| 2131 |
assert adapter_dir is not None |
| 2132 |
try: |
| 2133 |
llama_server_result = prepare_llama_server_export( |
| 2134 |
export_dir=result.export_dir, |
| 2135 |
manifest_path=result.manifest_path, |
| 2136 |
artifacts=result.artifacts, |
| 2137 |
adapter_dir=adapter_dir, |
| 2138 |
spec=spec, |
| 2139 |
training_sequence_len=parsed.frontmatter.training.sequence_len, |
| 2140 |
) |
| 2141 |
except VendoringError as exc: |
| 2142 |
console.print( |
| 2143 |
f"[red]vendor:[/red] {exc}\n" |
| 2144 |
" run `scripts/bump-llama-cpp.sh build --with-server` or " |
| 2145 |
"`git submodule update --init --recursive`." |
| 2146 |
) |
| 2147 |
raise typer.Exit(code=1) from exc |
| 2148 |
except ExportError as exc: |
| 2149 |
console.print(f"[red]export:[/red] {exc}") |
| 2150 |
raise typer.Exit(code=1) from exc |
| 2151 |
llama_server_smoke = None if no_smoke else resolved_target.smoke_test(llama_server_result) |
| 2152 |
if llama_server_smoke is not None and not llama_server_smoke.ok: |
| 2153 |
console.print( |
| 2154 |
f"[red]smoke:[/red] {llama_server_smoke.detail}\n" |
| 2155 |
" re-run with `--no-smoke` to skip the smoke test." |
| 2156 |
) |
| 2157 |
raise typer.Exit(code=1) |
| 2158 |
|
| 2159 |
cached_tag = " [dim](cached base)[/dim]" if result.cached else "" |
| 2160 |
console.print(f"[green]exported:[/green] {result.export_dir}{cached_tag}") |
| 2161 |
for artifact in result.artifacts: |
| 2162 |
console.print(f" {artifact.name}") |
| 2163 |
|
| 2164 |
# S26 X1 — also emit a sway.yaml next to the GGUF when the user |
| 2165 |
# asks for it. Done AFTER the regular export so a sway-side |
| 2166 |
# failure can never roll back a working GGUF deployment. |
| 2167 |
if emit_sway_json: |
| 2168 |
from dlm.export.sway_json import SwayJsonExportError, write_sway_json |
| 2169 |
|
| 2170 |
try: |
| 2171 |
sway_yaml_path = write_sway_json(path, result.export_dir) |
| 2172 |
except SwayJsonExportError as exc: |
| 2173 |
console.print(f"[red]sway-json:[/red] {exc}") |
| 2174 |
raise typer.Exit(code=1) from exc |
| 2175 |
console.print(f"[green]sway.yaml:[/green] {sway_yaml_path}") |
| 2176 |
console.print(" next: sway run " + str(sway_yaml_path)) |
| 2177 |
if resolved_target.name == "llama-server": |
| 2178 |
assert llama_server_result.launch_script_path is not None |
| 2179 |
assert llama_server_result.config_path is not None |
| 2180 |
console.print(f"target: {result.target}") |
| 2181 |
console.print(f"launch: {llama_server_result.launch_script_path.name}") |
| 2182 |
console.print(f"template: {llama_server_result.config_path.name}") |
| 2183 |
if llama_server_smoke is not None and llama_server_smoke.detail: |
| 2184 |
console.print(f"smoke: {llama_server_smoke.detail}") |
| 2185 |
return |
| 2186 |
if result.ollama_name: |
| 2187 |
console.print(f"ollama: {result.ollama_name} (v{result.ollama_version})") |
| 2188 |
if result.smoke_output_first_line: |
| 2189 |
console.print(f"smoke: {result.smoke_output_first_line}") |
| 2190 |
|
| 2191 |
|
| 2192 |
def pack_cmd( |
| 2193 |
path: Annotated[Path, typer.Argument(help=".dlm file to pack.")], |
| 2194 |
out: Annotated[ |
| 2195 |
Path | None, |
| 2196 |
typer.Option("--out", help="Output .dlm.pack path."), |
| 2197 |
] = None, |
| 2198 |
include_exports: Annotated[ |
| 2199 |
bool, |
| 2200 |
typer.Option("--include-exports", help="Bundle all GGUF exports into the pack."), |
| 2201 |
] = False, |
| 2202 |
include_base: Annotated[ |
| 2203 |
bool, |
| 2204 |
typer.Option( |
| 2205 |
"--include-base", |
| 2206 |
help="Bundle the base model snapshot (license rules still apply).", |
| 2207 |
), |
| 2208 |
] = False, |
| 2209 |
include_logs: Annotated[ |
| 2210 |
bool, |
| 2211 |
typer.Option("--include-logs", help="Bundle per-run JSONL logs."), |
| 2212 |
] = False, |
| 2213 |
licensee: Annotated[ |
| 2214 |
str | None, |
| 2215 |
typer.Option( |
| 2216 |
"--i-am-the-licensee", |
| 2217 |
help="URL acknowledging separate acceptance of a non-redistributable base (required for --include-base on gated models).", |
| 2218 |
), |
| 2219 |
] = None, |
| 2220 |
) -> None: |
| 2221 |
"""Produce a portable .dlm.pack bundle.""" |
| 2222 |
from rich.console import Console |
| 2223 |
|
| 2224 |
from dlm.doc.errors import DlmParseError |
| 2225 |
from dlm.pack.errors import BaseLicenseRefusedError, PackError |
| 2226 |
from dlm.pack.packer import pack |
| 2227 |
|
| 2228 |
console = Console(stderr=True) |
| 2229 |
|
| 2230 |
try: |
| 2231 |
result = pack( |
| 2232 |
path, |
| 2233 |
out=out, |
| 2234 |
include_exports=include_exports, |
| 2235 |
include_base=include_base, |
| 2236 |
include_logs=include_logs, |
| 2237 |
licensee_acceptance_url=licensee, |
| 2238 |
) |
| 2239 |
except BaseLicenseRefusedError as exc: |
| 2240 |
console.print(f"[red]pack:[/red] {exc}") |
| 2241 |
raise typer.Exit(code=1) from exc |
| 2242 |
except PackError as exc: |
| 2243 |
console.print(f"[red]pack:[/red] {exc}") |
| 2244 |
raise typer.Exit(code=1) from exc |
| 2245 |
except DlmParseError as exc: |
| 2246 |
console.print(f"[red]parse:[/red] {exc}") |
| 2247 |
raise typer.Exit(code=1) from exc |
| 2248 |
|
| 2249 |
size_mb = result.bytes_written / (1024 * 1024) |
| 2250 |
console.print( |
| 2251 |
f"[green]packed:[/green] {result.path} " |
| 2252 |
f"({size_mb:.2f} MB, content_type={result.content_type})" |
| 2253 |
) |
| 2254 |
|
| 2255 |
|
| 2256 |
def unpack_cmd( |
| 2257 |
path: Annotated[Path, typer.Argument(help=".dlm.pack to install.")], |
| 2258 |
force: Annotated[ |
| 2259 |
bool, |
| 2260 |
typer.Option("--force", help="Overwrite an existing store with the same dlm_id."), |
| 2261 |
] = False, |
| 2262 |
out: Annotated[ |
| 2263 |
Path | None, |
| 2264 |
typer.Option( |
| 2265 |
"--out", help="Directory to place the restored .dlm (default: alongside the pack)." |
| 2266 |
), |
| 2267 |
] = None, |
| 2268 |
) -> None: |
| 2269 |
"""Install a .dlm.pack into the local store.""" |
| 2270 |
from rich.console import Console |
| 2271 |
|
| 2272 |
from dlm.pack.errors import ( |
| 2273 |
PackFormatVersionError, |
| 2274 |
PackIntegrityError, |
| 2275 |
PackLayoutError, |
| 2276 |
) |
| 2277 |
from dlm.pack.unpacker import unpack |
| 2278 |
|
| 2279 |
console = Console(stderr=True) |
| 2280 |
|
| 2281 |
try: |
| 2282 |
result = unpack(path, force=force, out_dir=out) |
| 2283 |
except PackFormatVersionError as exc: |
| 2284 |
console.print(f"[red]unpack:[/red] {exc}") |
| 2285 |
raise typer.Exit(code=1) from exc |
| 2286 |
except PackIntegrityError as exc: |
| 2287 |
console.print(f"[red]unpack:[/red] {exc}") |
| 2288 |
raise typer.Exit(code=1) from exc |
| 2289 |
except PackLayoutError as exc: |
| 2290 |
console.print(f"[red]unpack:[/red] {exc}") |
| 2291 |
raise typer.Exit(code=1) from exc |
| 2292 |
|
| 2293 |
console.print(f"[green]unpacked:[/green] {result.dlm_path}") |
| 2294 |
console.print(f" store: {result.store_path}") |
| 2295 |
console.print(f" dlm_id: {result.dlm_id}") |
| 2296 |
if result.applied_migrations: |
| 2297 |
steps = " → ".join( |
| 2298 |
f"v{v}" for v in (*result.applied_migrations, result.header.pack_format_version + 1) |
| 2299 |
) |
| 2300 |
console.print(f" migrated: {steps}") |
| 2301 |
|
| 2302 |
|
| 2303 |
def verify_cmd( |
| 2304 |
path: Annotated[Path, typer.Argument(help=".dlm.pack to verify.")], |
| 2305 |
trust_on_first_use: Annotated[ |
| 2306 |
bool, |
| 2307 |
typer.Option( |
| 2308 |
"--trust-on-first-use", |
| 2309 |
help=( |
| 2310 |
"Record the signer's public key under ~/.dlm/trusted-keys/ " |
| 2311 |
"on first verify. Without this flag an unknown signer is " |
| 2312 |
"rejected with exit code 2." |
| 2313 |
), |
| 2314 |
), |
| 2315 |
] = False, |
| 2316 |
trusted_keys_dir: Annotated[ |
| 2317 |
Path | None, |
| 2318 |
typer.Option( |
| 2319 |
"--trusted-keys-dir", |
| 2320 |
help="Override ~/.dlm/trusted-keys/ (useful for scripted verify).", |
| 2321 |
hidden=True, |
| 2322 |
), |
| 2323 |
] = None, |
| 2324 |
) -> None: |
| 2325 |
"""Verify a .dlm.pack's provenance chain. |
| 2326 |
|
| 2327 |
Exit codes: 0 verified, 1 broken chain (or missing provenance), |
| 2328 |
2 untrusted signer, 3 signature rejected. |
| 2329 |
""" |
| 2330 |
from rich.console import Console |
| 2331 |
|
| 2332 |
from dlm.pack.errors import PackLayoutError |
| 2333 |
from dlm.pack.layout import PROVENANCE_FILENAME |
| 2334 |
from dlm.pack.unpacker import read_pack_member_bytes |
| 2335 |
from dlm.share.errors import ShareError |
| 2336 |
from dlm.share.provenance import ( |
| 2337 |
ProvenanceChainBroken, |
| 2338 |
ProvenanceSchemaError, |
| 2339 |
UnknownSignerError, |
| 2340 |
load_provenance_json, |
| 2341 |
verify_provenance, |
| 2342 |
) |
| 2343 |
|
| 2344 |
console = Console(stderr=True) |
| 2345 |
keys_dir = trusted_keys_dir or (Path.home() / ".dlm" / "trusted-keys") |
| 2346 |
|
| 2347 |
try: |
| 2348 |
payload = read_pack_member_bytes(path, PROVENANCE_FILENAME) |
| 2349 |
except PackLayoutError as exc: |
| 2350 |
console.print(f"[red]verify:[/red] {exc}") |
| 2351 |
raise typer.Exit(code=1) from exc |
| 2352 |
except OSError as exc: |
| 2353 |
console.print(f"[red]verify:[/red] cannot read {path}: {exc}") |
| 2354 |
raise typer.Exit(code=1) from exc |
| 2355 |
|
| 2356 |
if payload is None: |
| 2357 |
console.print(f"[red]verify:[/red] {path} is unsigned — no {PROVENANCE_FILENAME} inside.") |
| 2358 |
raise typer.Exit(code=1) |
| 2359 |
|
| 2360 |
# Write the in-pack JSON to a temp file so `load_provenance_json` |
| 2361 |
# can use its normal filesystem path. Keeps the parser single- |
| 2362 |
# sourced and the error messages consistent with the filesystem |
| 2363 |
# call-site. |
| 2364 |
import tempfile |
| 2365 |
|
| 2366 |
with tempfile.NamedTemporaryFile("wb", suffix=".json", delete=False) as fh: |
| 2367 |
fh.write(payload) |
| 2368 |
tmp_path = Path(fh.name) |
| 2369 |
try: |
| 2370 |
provenance = load_provenance_json(tmp_path) |
| 2371 |
except ProvenanceSchemaError as exc: |
| 2372 |
console.print(f"[red]verify:[/red] malformed provenance.json: {exc}") |
| 2373 |
raise typer.Exit(code=1) from exc |
| 2374 |
finally: |
| 2375 |
tmp_path.unlink(missing_ok=True) |
| 2376 |
|
| 2377 |
try: |
| 2378 |
result = verify_provenance( |
| 2379 |
provenance, |
| 2380 |
trusted_keys_dir=keys_dir, |
| 2381 |
tofu=trust_on_first_use, |
| 2382 |
) |
| 2383 |
except UnknownSignerError as exc: |
| 2384 |
console.print(f"[red]verify:[/red] {exc}") |
| 2385 |
raise typer.Exit(code=2) from exc |
| 2386 |
except ProvenanceChainBroken as exc: |
| 2387 |
console.print(f"[red]verify:[/red] chain broken: {exc}") |
| 2388 |
raise typer.Exit(code=1) from exc |
| 2389 |
except ShareError as exc: |
| 2390 |
console.print(f"[red]verify:[/red] signature rejected: {exc}") |
| 2391 |
raise typer.Exit(code=3) from exc |
| 2392 |
|
| 2393 |
out = Console() |
| 2394 |
out.print(f"[green]verified:[/green] {path.name}") |
| 2395 |
out.print(f" signer: {result.signer_fingerprint}") |
| 2396 |
out.print(f" trusted-key: {result.trusted_key_path}") |
| 2397 |
out.print(f" adapter_sha256: {provenance.adapter_sha256[:12]}...") |
| 2398 |
out.print(f" base_revision: {provenance.base_revision}") |
| 2399 |
out.print(f" corpus_root: {provenance.corpus_root_sha256[:12]}...") |
| 2400 |
out.print(f" signed_at: {provenance.signed_at}") |
| 2401 |
if result.tofu_recorded: |
| 2402 |
out.print( |
| 2403 |
f"[yellow]note:[/yellow] recorded new trust entry " |
| 2404 |
f"at {result.trusted_key_path}; subsequent verifies use strict mode." |
| 2405 |
) |
| 2406 |
|
| 2407 |
|
| 2408 |
def repl_cmd( |
| 2409 |
path: Annotated[Path, typer.Argument(help=".dlm file to start a REPL against.")], |
| 2410 |
adapter: Annotated[ |
| 2411 |
str | None, |
| 2412 |
typer.Option( |
| 2413 |
"--adapter", |
| 2414 |
help=( |
| 2415 |
"Named adapter to load. Required on multi-adapter " |
| 2416 |
"documents; rejected on single-adapter documents." |
| 2417 |
), |
| 2418 |
), |
| 2419 |
] = None, |
| 2420 |
backend: Annotated[ |
| 2421 |
str, |
| 2422 |
typer.Option( |
| 2423 |
"--backend", |
| 2424 |
help="Inference backend: `auto`, `pytorch`, or `mlx`.", |
| 2425 |
), |
| 2426 |
] = "auto", |
| 2427 |
) -> None: |
| 2428 |
"""Interactive REPL against the trained adapter.""" |
| 2429 |
from rich.console import Console |
| 2430 |
|
| 2431 |
from dlm.base_models import GatedModelError |
| 2432 |
from dlm.base_models import resolve as resolve_base_model |
| 2433 |
from dlm.doc.parser import parse_file |
| 2434 |
from dlm.hardware import doctor |
| 2435 |
from dlm.inference import AdapterNotFoundError |
| 2436 |
from dlm.inference.backends import ( |
| 2437 |
UnsupportedBackendError, |
| 2438 |
build_backend, |
| 2439 |
select_backend, |
| 2440 |
) |
| 2441 |
from dlm.repl.session import ReplSession |
| 2442 |
from dlm.store.paths import for_dlm |
| 2443 |
|
| 2444 |
console = Console(stderr=True) |
| 2445 |
|
| 2446 |
if backend not in ("auto", "pytorch", "mlx"): |
| 2447 |
console.print( |
| 2448 |
f"[red]repl:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})." |
| 2449 |
) |
| 2450 |
raise typer.Exit(code=2) |
| 2451 |
|
| 2452 |
parsed = parse_file(path) |
| 2453 |
declared = parsed.frontmatter.training.adapters |
| 2454 |
if adapter is not None: |
| 2455 |
if declared is None: |
| 2456 |
console.print( |
| 2457 |
"[red]repl:[/red] --adapter is only valid on multi-adapter " |
| 2458 |
"documents (this doc does not declare `training.adapters`)." |
| 2459 |
) |
| 2460 |
raise typer.Exit(code=2) |
| 2461 |
if adapter not in declared: |
| 2462 |
console.print( |
| 2463 |
f"[red]repl:[/red] --adapter {adapter!r} is not declared " |
| 2464 |
f"(declared: {sorted(declared)!r})." |
| 2465 |
) |
| 2466 |
raise typer.Exit(code=2) |
| 2467 |
|
| 2468 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 2469 |
already_accepted = _previously_accepted(store.manifest) |
| 2470 |
try: |
| 2471 |
spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted) |
| 2472 |
except GatedModelError as exc: |
| 2473 |
console.print( |
| 2474 |
f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has " |
| 2475 |
"no recorded acceptance in this store; run `dlm train --i-accept-license` first." |
| 2476 |
) |
| 2477 |
raise typer.Exit(code=1) from exc |
| 2478 |
caps = doctor().capabilities |
| 2479 |
|
| 2480 |
try: |
| 2481 |
backend_name = select_backend(backend, caps) # type: ignore[arg-type] |
| 2482 |
except UnsupportedBackendError as exc: |
| 2483 |
console.print(f"[red]repl:[/red] {exc}") |
| 2484 |
raise typer.Exit(code=2) from exc |
| 2485 |
backend_obj = build_backend(backend_name, caps) |
| 2486 |
|
| 2487 |
try: |
| 2488 |
backend_obj.load(spec, store, adapter_name=adapter) |
| 2489 |
except AdapterNotFoundError as exc: |
| 2490 |
console.print(f"[red]repl:[/red] {exc}") |
| 2491 |
raise typer.Exit(code=1) from exc |
| 2492 |
|
| 2493 |
tokenizer = getattr(backend_obj, "_loaded", None) |
| 2494 |
tokenizer = tokenizer.tokenizer if tokenizer is not None else None |
| 2495 |
|
| 2496 |
session = ReplSession( |
| 2497 |
backend=backend_obj, |
| 2498 |
tokenizer=tokenizer, |
| 2499 |
active_adapter=adapter, |
| 2500 |
declared_adapters=tuple(sorted(declared)) if declared else (), |
| 2501 |
) |
| 2502 |
|
| 2503 |
from dlm.repl.app import run_repl |
| 2504 |
|
| 2505 |
raise typer.Exit(code=run_repl(session, console=console)) |
| 2506 |
|
| 2507 |
|
| 2508 |
def metrics_cmd( |
| 2509 |
path: Annotated[Path, typer.Argument(help=".dlm file whose store we query.")], |
| 2510 |
json_out: Annotated[bool, typer.Option("--json", help="Emit JSON.")] = False, |
| 2511 |
csv_out: Annotated[bool, typer.Option("--csv", help="Emit CSV.")] = False, |
| 2512 |
run_id: Annotated[ |
| 2513 |
int | None, |
| 2514 |
typer.Option("--run-id", help="Only show this run (drill-down)."), |
| 2515 |
] = None, |
| 2516 |
phase: Annotated[ |
| 2517 |
str | None, |
| 2518 |
typer.Option("--phase", help="Filter by phase: sft|dpo|orpo|cpt."), |
| 2519 |
] = None, |
| 2520 |
since: Annotated[ |
| 2521 |
str | None, |
| 2522 |
typer.Option( |
| 2523 |
"--since", |
| 2524 |
help="Time window (e.g. `24h`, `7d`, `30m`). Filters `started_at`.", |
| 2525 |
), |
| 2526 |
] = None, |
| 2527 |
limit: Annotated[int, typer.Option("--limit")] = 20, |
| 2528 |
) -> None: |
| 2529 |
"""Query the per-store metrics database.""" |
| 2530 |
import csv |
| 2531 |
import json |
| 2532 |
import sys |
| 2533 |
|
| 2534 |
from rich.console import Console |
| 2535 |
|
| 2536 |
from dlm.doc.parser import parse_file |
| 2537 |
from dlm.metrics.queries import ( |
| 2538 |
evals_for_run, |
| 2539 |
evals_to_dict, |
| 2540 |
preference_mining_for_run, |
| 2541 |
preference_mining_to_dict, |
| 2542 |
recent_runs, |
| 2543 |
runs_to_dict, |
| 2544 |
steps_for_run, |
| 2545 |
steps_to_dict, |
| 2546 |
) |
| 2547 |
from dlm.store.paths import for_dlm |
| 2548 |
|
| 2549 |
console = Console(stderr=True) |
| 2550 |
|
| 2551 |
if json_out and csv_out: |
| 2552 |
console.print("[red]metrics:[/red] --json and --csv are mutually exclusive") |
| 2553 |
raise typer.Exit(code=2) |
| 2554 |
|
| 2555 |
since_delta = _parse_since_arg(since, console) if since else None |
| 2556 |
|
| 2557 |
parsed = parse_file(path) |
| 2558 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 2559 |
|
| 2560 |
runs = recent_runs(store.root, limit=limit, phase=phase, since=since_delta, run_id=run_id) |
| 2561 |
|
| 2562 |
if run_id is not None: |
| 2563 |
# Drill-down: show this run's steps + evals. |
| 2564 |
if not runs: |
| 2565 |
console.print(f"[red]metrics:[/red] no run with run_id={run_id}") |
| 2566 |
raise typer.Exit(code=1) |
| 2567 |
run = runs[0] |
| 2568 |
steps = steps_for_run(store.root, run_id) |
| 2569 |
evals = evals_for_run(store.root, run_id) |
| 2570 |
preference_rows = preference_mining_for_run(store.root, run_id) |
| 2571 |
|
| 2572 |
if json_out: |
| 2573 |
payload = { |
| 2574 |
"run": runs_to_dict([run])[0], |
| 2575 |
"steps": steps_to_dict(steps), |
| 2576 |
"evals": evals_to_dict(evals), |
| 2577 |
"preference_mining": preference_mining_to_dict(preference_rows), |
| 2578 |
} |
| 2579 |
sys.stdout.write(json.dumps(payload, indent=2) + "\n") |
| 2580 |
return |
| 2581 |
if csv_out: |
| 2582 |
writer = csv.writer(sys.stdout) |
| 2583 |
writer.writerow(["step", "loss", "lr", "grad_norm", "val_loss"]) |
| 2584 |
eval_by_step = {e.step: e.val_loss for e in evals} |
| 2585 |
for s in steps: |
| 2586 |
writer.writerow([s.step, s.loss, s.lr, s.grad_norm, eval_by_step.get(s.step)]) |
| 2587 |
return |
| 2588 |
console.print( |
| 2589 |
f"[green]run_id={run.run_id}[/green] phase={run.phase} " |
| 2590 |
f"seed={run.seed} status={run.status} steps={len(steps)} " |
| 2591 |
f"evals={len(evals)}" |
| 2592 |
) |
| 2593 |
if evals: |
| 2594 |
last = evals[-1] |
| 2595 |
console.print( |
| 2596 |
f" last eval: step={last.step} val_loss={last.val_loss} " |
| 2597 |
f"perplexity={last.perplexity}" |
| 2598 |
) |
| 2599 |
if preference_rows: |
| 2600 |
last_pref = preference_rows[-1] |
| 2601 |
console.print( |
| 2602 |
" preference mining: " |
| 2603 |
f"events={len(preference_rows)} " |
| 2604 |
f"mined_pairs={sum(row.mined_pairs for row in preference_rows)} " |
| 2605 |
f"skipped_prompts={sum(row.skipped_prompts for row in preference_rows)} " |
| 2606 |
f"last_mode={last_pref.write_mode} " |
| 2607 |
f"judge={last_pref.judge_name}" |
| 2608 |
) |
| 2609 |
return |
| 2610 |
|
| 2611 |
# Top-level: list runs. |
| 2612 |
if json_out: |
| 2613 |
sys.stdout.write(json.dumps({"runs": runs_to_dict(runs)}, indent=2) + "\n") |
| 2614 |
return |
| 2615 |
if csv_out: |
| 2616 |
writer = csv.writer(sys.stdout) |
| 2617 |
writer.writerow(["run_id", "phase", "seed", "status", "started_at", "ended_at"]) |
| 2618 |
for r in runs: |
| 2619 |
writer.writerow([r.run_id, r.phase, r.seed, r.status, r.started_at, r.ended_at]) |
| 2620 |
return |
| 2621 |
|
| 2622 |
if not runs: |
| 2623 |
console.print("[dim]metrics:[/dim] no runs found (hint: train first, or adjust filters)") |
| 2624 |
return |
| 2625 |
console.print(f"[bold]Runs: {len(runs)}[/bold]") |
| 2626 |
for r in runs: |
| 2627 |
console.print( |
| 2628 |
f" run_id={r.run_id} phase={r.phase} seed={r.seed} " |
| 2629 |
f"status={r.status} started={r.started_at}" |
| 2630 |
) |
| 2631 |
|
| 2632 |
|
| 2633 |
def metrics_watch_cmd( |
| 2634 |
path: Annotated[Path, typer.Argument(help=".dlm file whose store we tail.")], |
| 2635 |
poll_seconds: Annotated[ |
| 2636 |
float, |
| 2637 |
typer.Option("--poll-seconds", help="How often to re-read the metrics DB."), |
| 2638 |
] = 1.0, |
| 2639 |
) -> None: |
| 2640 |
"""Tail the metrics DB: print new steps/evals as they land.""" |
| 2641 |
import time |
| 2642 |
|
| 2643 |
from rich.console import Console |
| 2644 |
|
| 2645 |
from dlm.doc.parser import parse_file |
| 2646 |
from dlm.metrics.queries import evals_for_run, latest_run_id, steps_for_run |
| 2647 |
from dlm.store.paths import for_dlm |
| 2648 |
|
| 2649 |
console = Console() |
| 2650 |
|
| 2651 |
parsed = parse_file(path) |
| 2652 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 2653 |
|
| 2654 |
console.print( |
| 2655 |
f"[dim]metrics watch:[/dim] polling {store.root} every {poll_seconds}s (Ctrl-C to exit)" |
| 2656 |
) |
| 2657 |
|
| 2658 |
current_run: int | None = None |
| 2659 |
last_step_seen = 0 |
| 2660 |
last_eval_step_seen = 0 |
| 2661 |
try: |
| 2662 |
while True: |
| 2663 |
run_id = latest_run_id(store.root) |
| 2664 |
if run_id is None: |
| 2665 |
time.sleep(poll_seconds) |
| 2666 |
continue |
| 2667 |
if run_id != current_run: |
| 2668 |
current_run = run_id |
| 2669 |
last_step_seen = 0 |
| 2670 |
last_eval_step_seen = 0 |
| 2671 |
console.print(f"[green]→ following run_id={run_id}[/green]") |
| 2672 |
|
| 2673 |
new_steps = steps_for_run(store.root, run_id, since_step=last_step_seen) |
| 2674 |
for s in new_steps: |
| 2675 |
console.print( |
| 2676 |
f" step {s.step:>5} loss={s.loss} lr={s.lr} grad_norm={s.grad_norm}" |
| 2677 |
) |
| 2678 |
last_step_seen = s.step |
| 2679 |
|
| 2680 |
new_evals = evals_for_run(store.root, run_id, since_step=last_eval_step_seen) |
| 2681 |
for e in new_evals: |
| 2682 |
console.print( |
| 2683 |
f" [yellow]eval @ step {e.step}[/yellow] " |
| 2684 |
f"val_loss={e.val_loss} perplexity={e.perplexity}" |
| 2685 |
) |
| 2686 |
last_eval_step_seen = e.step |
| 2687 |
|
| 2688 |
time.sleep(poll_seconds) |
| 2689 |
except KeyboardInterrupt: |
| 2690 |
console.print("[dim]metrics watch:[/dim] bye") |
| 2691 |
|
| 2692 |
|
| 2693 |
def _parse_since_arg(since: str, console: object) -> timedelta: |
| 2694 |
"""Parse `24h` / `7d` / `30m` / `10s` into a timedelta.""" |
| 2695 |
from datetime import timedelta |
| 2696 |
|
| 2697 |
from rich.console import Console |
| 2698 |
|
| 2699 |
assert isinstance(console, Console) |
| 2700 |
|
| 2701 |
if not since: |
| 2702 |
raise typer.Exit(code=2) |
| 2703 |
unit = since[-1].lower() |
| 2704 |
try: |
| 2705 |
value = int(since[:-1]) |
| 2706 |
except ValueError: |
| 2707 |
console.print(f"[red]metrics:[/red] --since {since!r} not an integer+unit") |
| 2708 |
raise typer.Exit(code=2) from None |
| 2709 |
if unit == "s": |
| 2710 |
return timedelta(seconds=value) |
| 2711 |
if unit == "m": |
| 2712 |
return timedelta(minutes=value) |
| 2713 |
if unit == "h": |
| 2714 |
return timedelta(hours=value) |
| 2715 |
if unit == "d": |
| 2716 |
return timedelta(days=value) |
| 2717 |
console.print(f"[red]metrics:[/red] --since {since!r} unit must be s/m/h/d") |
| 2718 |
raise typer.Exit(code=2) |
| 2719 |
|
| 2720 |
|
| 2721 |
def doctor_cmd( |
| 2722 |
json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable output.")] = False, |
| 2723 |
) -> None: |
| 2724 |
"""Inspect hardware and print the resolved training plan.""" |
| 2725 |
import json |
| 2726 |
|
| 2727 |
from dlm.hardware import doctor, render_text |
| 2728 |
|
| 2729 |
result = doctor() |
| 2730 |
if json_out: |
| 2731 |
typer.echo(json.dumps(result.to_dict(), indent=2, default=str)) |
| 2732 |
else: |
| 2733 |
typer.echo(render_text(result)) |
| 2734 |
|
| 2735 |
|
| 2736 |
def show_cmd( |
| 2737 |
path: Annotated[Path, typer.Argument(help=".dlm file to inspect.")], |
| 2738 |
json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable JSON.")] = False, |
| 2739 |
) -> None: |
| 2740 |
"""Show training history, exports, and adapter state.""" |
| 2741 |
import json as _json |
| 2742 |
import sys |
| 2743 |
|
| 2744 |
from rich.console import Console |
| 2745 |
|
| 2746 |
from dlm.doc.errors import DlmParseError |
| 2747 |
from dlm.doc.parser import parse_file |
| 2748 |
from dlm.store.errors import ManifestCorruptError |
| 2749 |
from dlm.store.inspect import inspect_store |
| 2750 |
from dlm.store.paths import for_dlm |
| 2751 |
|
| 2752 |
console = Console(stderr=True) |
| 2753 |
out_console = Console() |
| 2754 |
|
| 2755 |
try: |
| 2756 |
parsed = parse_file(path) |
| 2757 |
except (DlmParseError, OSError) as exc: |
| 2758 |
console.print(f"[red]show:[/red] {exc}") |
| 2759 |
raise typer.Exit(code=1) from exc |
| 2760 |
|
| 2761 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 2762 |
training_sources, discovered_configs = _summarize_training_sources_and_discovered( |
| 2763 |
parsed, path.resolve().parent |
| 2764 |
) |
| 2765 |
# The per-document cache config comes from frontmatter, not on-disk |
| 2766 |
# state — report it on both the pre-train and initialized-store paths |
| 2767 |
# so authors can sanity-check the knobs before `dlm train` runs. |
| 2768 |
cache_cfg = parsed.frontmatter.training.cache |
| 2769 |
training_cache_config: dict[str, object] = { |
| 2770 |
"enabled": cache_cfg.enabled, |
| 2771 |
"max_bytes": cache_cfg.max_bytes, |
| 2772 |
"prune_older_than_days": cache_cfg.prune_older_than_days, |
| 2773 |
} |
| 2774 |
|
| 2775 |
# Store may not exist yet (no `dlm train` run). Treat that as an |
| 2776 |
# informational state rather than an error — useful after `dlm init`. |
| 2777 |
if not store.manifest.exists(): |
| 2778 |
if json_out: |
| 2779 |
payload: dict[str, object] = { |
| 2780 |
"dlm_id": parsed.frontmatter.dlm_id, |
| 2781 |
"base_model": parsed.frontmatter.base_model, |
| 2782 |
"store_initialized": False, |
| 2783 |
"source_path": str(path.resolve()), |
| 2784 |
"training_cache_config": training_cache_config, |
| 2785 |
} |
| 2786 |
if training_sources is not None: |
| 2787 |
payload["training_sources"] = training_sources |
| 2788 |
if discovered_configs: |
| 2789 |
payload["discovered_training_configs"] = discovered_configs |
| 2790 |
sys.stdout.write(_json.dumps(payload, indent=2) + "\n") |
| 2791 |
else: |
| 2792 |
out_console.print(f"[bold]{path}[/bold]") |
| 2793 |
out_console.print(f" dlm_id: {parsed.frontmatter.dlm_id}") |
| 2794 |
out_console.print(f" base_model: {parsed.frontmatter.base_model}") |
| 2795 |
out_console.print(" store: [dim]not yet initialized (run `dlm train`)[/dim]") |
| 2796 |
if training_sources: |
| 2797 |
_render_training_sources_text(out_console, training_sources) |
| 2798 |
return |
| 2799 |
|
| 2800 |
try: |
| 2801 |
inspection = inspect_store(store, source_path=path.resolve()) |
| 2802 |
except ManifestCorruptError as exc: |
| 2803 |
console.print(f"[red]show:[/red] {exc}") |
| 2804 |
raise typer.Exit(code=1) from exc |
| 2805 |
|
| 2806 |
training_cache = _summarize_training_cache(store.tokenized_cache_dir, store.root) |
| 2807 |
gate = _summarize_gate(store) |
| 2808 |
preference_mining = _summarize_preference_mining(store.root) |
| 2809 |
base_security = _summarize_base_security(parsed.frontmatter.base_model) |
| 2810 |
|
| 2811 |
if json_out: |
| 2812 |
payload_full = _inspection_to_dict(inspection) |
| 2813 |
if training_sources is not None: |
| 2814 |
payload_full["training_sources"] = training_sources |
| 2815 |
if discovered_configs: |
| 2816 |
payload_full["discovered_training_configs"] = discovered_configs |
| 2817 |
if training_cache is not None: |
| 2818 |
payload_full["training_cache"] = training_cache |
| 2819 |
payload_full["training_cache_config"] = training_cache_config |
| 2820 |
if gate is not None: |
| 2821 |
payload_full["gate"] = gate |
| 2822 |
if preference_mining is not None: |
| 2823 |
payload_full["preference_mining"] = preference_mining |
| 2824 |
payload_full["preference_mining_runs"] = preference_mining["run_count"] |
| 2825 |
payload_full["total_auto_mined_pairs"] = preference_mining["total_mined_pairs"] |
| 2826 |
if base_security is not None: |
| 2827 |
payload_full["base_security"] = base_security |
| 2828 |
# Write JSON to raw stdout — Rich's Console wraps lines at the |
| 2829 |
# terminal width and would corrupt the JSON. |
| 2830 |
sys.stdout.write(_json.dumps(payload_full, indent=2, default=str) + "\n") |
| 2831 |
return |
| 2832 |
|
| 2833 |
_render_inspection_text(out_console, path, inspection) |
| 2834 |
if training_sources: |
| 2835 |
_render_training_sources_text(out_console, training_sources) |
| 2836 |
if training_cache is not None and training_cache.get("entry_count", 0): |
| 2837 |
_render_training_cache_text(out_console, training_cache) |
| 2838 |
if gate is not None: |
| 2839 |
_render_gate_text(out_console, gate) |
| 2840 |
if base_security is not None and base_security.get("trust_remote_code"): |
| 2841 |
_render_base_security_text(out_console, base_security) |
| 2842 |
|
| 2843 |
|
| 2844 |
def _inspection_to_dict(inspection: object) -> dict[str, object]: |
| 2845 |
"""Flatten a StoreInspection into a JSON-safe dict. |
| 2846 |
|
| 2847 |
Schema is the v1 contract for `dlm show --json`; any reshape is a |
| 2848 |
version bump (recorded in tests/golden/cli-json/). |
| 2849 |
""" |
| 2850 |
from dlm.store.inspect import StoreInspection |
| 2851 |
|
| 2852 |
assert isinstance(inspection, StoreInspection) |
| 2853 |
return { |
| 2854 |
"dlm_id": inspection.dlm_id, |
| 2855 |
"path": str(inspection.path), |
| 2856 |
"base_model": inspection.base_model, |
| 2857 |
"base_model_revision": inspection.base_model_revision, |
| 2858 |
"adapter_version": inspection.adapter_version, |
| 2859 |
"training_runs": inspection.training_runs, |
| 2860 |
"last_trained_at": inspection.last_trained_at, |
| 2861 |
"has_adapter_current": inspection.has_adapter_current, |
| 2862 |
"replay_size_bytes": inspection.replay_size_bytes, |
| 2863 |
"total_size_bytes": inspection.total_size_bytes, |
| 2864 |
"source_path": str(inspection.source_path) if inspection.source_path else None, |
| 2865 |
"orphaned": inspection.orphaned, |
| 2866 |
"exports": [e.model_dump(mode="json") for e in inspection.exports], |
| 2867 |
"content_hashes": dict(inspection.content_hashes), |
| 2868 |
"pinned_versions": dict(inspection.pinned_versions), |
| 2869 |
"named_adapters": [ |
| 2870 |
{ |
| 2871 |
"name": a.name, |
| 2872 |
"has_current": a.has_current, |
| 2873 |
"latest_version": a.latest_version, |
| 2874 |
} |
| 2875 |
for a in inspection.named_adapters |
| 2876 |
], |
| 2877 |
} |
| 2878 |
|
| 2879 |
|
| 2880 |
def _render_inspection_text(console: object, path: Path, inspection: object) -> None: |
| 2881 |
"""Human-readable `dlm show` output.""" |
| 2882 |
from rich.console import Console |
| 2883 |
|
| 2884 |
from dlm.store.inspect import StoreInspection |
| 2885 |
|
| 2886 |
assert isinstance(console, Console) |
| 2887 |
assert isinstance(inspection, StoreInspection) |
| 2888 |
|
| 2889 |
console.print(f"[bold]{path}[/bold]") |
| 2890 |
console.print(f" dlm_id: {inspection.dlm_id}") |
| 2891 |
rev = inspection.base_model_revision |
| 2892 |
rev_str = f" (revision {rev[:7]})" if rev else "" |
| 2893 |
console.print(f" base_model: {inspection.base_model}{rev_str}") |
| 2894 |
console.print( |
| 2895 |
f" store: {inspection.path} ({_human_size(inspection.total_size_bytes)})" |
| 2896 |
) |
| 2897 |
if inspection.named_adapters: |
| 2898 |
# Multi-adapter store: render the per-adapter pointers rather |
| 2899 |
# than the flat field (which stays 0 on multi-adapter docs). |
| 2900 |
console.print(" adapters:") |
| 2901 |
for adapter in inspection.named_adapters: |
| 2902 |
if adapter.has_current: |
| 2903 |
console.print(f" {adapter.name:16}v{adapter.latest_version:04d}") |
| 2904 |
else: |
| 2905 |
console.print(f" {adapter.name:16}[dim]no current pointer[/dim]") |
| 2906 |
elif inspection.has_adapter_current: |
| 2907 |
console.print(f" adapter: v{inspection.adapter_version:04d}") |
| 2908 |
else: |
| 2909 |
console.print(" adapter: [dim]none (no `dlm train` yet)[/dim]") |
| 2910 |
last = inspection.last_trained_at |
| 2911 |
last_str = f" — last {last.isoformat(timespec='seconds')}" if last else "" |
| 2912 |
console.print(f" training runs: {inspection.training_runs}{last_str}") |
| 2913 |
console.print(f" exports: {len(inspection.exports)}") |
| 2914 |
for exp in inspection.exports: |
| 2915 |
tag = f" — {exp.ollama_name}" if exp.ollama_name else "" |
| 2916 |
console.print(f" {exp.quant}{tag}") |
| 2917 |
if inspection.orphaned: |
| 2918 |
console.print(" [yellow]orphaned:[/yellow] source .dlm is missing or mismatched") |
| 2919 |
|
| 2920 |
|
| 2921 |
def _human_size(n: int) -> str: |
| 2922 |
for unit in ("B", "KB", "MB", "GB", "TB"): |
| 2923 |
if n < 1024: |
| 2924 |
return f"{n:.1f} {unit}" if unit != "B" else f"{n} B" |
| 2925 |
n //= 1024 |
| 2926 |
return f"{n} PB" |
| 2927 |
|
| 2928 |
|
| 2929 |
def _summarize_training_sources(parsed: object, base_path: Path) -> list[dict[str, object]] | None: |
| 2930 |
"""Best-effort resolution of `training.sources` for `dlm show`. |
| 2931 |
|
| 2932 |
Returns None when the frontmatter declares no directives; returns |
| 2933 |
a list of per-source dicts otherwise. Failures to expand (missing |
| 2934 |
paths, policy escapes) fall back to declared-only records so the |
| 2935 |
show output stays useful for debugging a misconfigured directive. |
| 2936 |
""" |
| 2937 |
records, _ = _summarize_training_sources_and_discovered(parsed, base_path) |
| 2938 |
return records |
| 2939 |
|
| 2940 |
|
| 2941 |
def _summarize_training_sources_and_discovered( |
| 2942 |
parsed: object, base_path: Path |
| 2943 |
) -> tuple[list[dict[str, object]] | None, list[dict[str, object]]]: |
| 2944 |
"""Like `_summarize_training_sources` but also returns the per-anchor |
| 2945 |
`.dlm/training.yaml` + `.dlm/ignore` discovery records. |
| 2946 |
|
| 2947 |
Returns `(training_sources, discovered_configs)`. `discovered_configs` |
| 2948 |
is always a list (empty when nothing was found or the expansion |
| 2949 |
failed); `training_sources` matches the single-value helper's |
| 2950 |
contract. |
| 2951 |
""" |
| 2952 |
from dlm.directives import DirectiveError, expand_sources |
| 2953 |
from dlm.doc.parser import ParsedDlm |
| 2954 |
|
| 2955 |
assert isinstance(parsed, ParsedDlm) |
| 2956 |
directives = parsed.frontmatter.training.sources |
| 2957 |
if not directives: |
| 2958 |
return None, [] |
| 2959 |
|
| 2960 |
declared: list[dict[str, object]] = [ |
| 2961 |
{ |
| 2962 |
"path": d.path, |
| 2963 |
"include": list(d.include), |
| 2964 |
"exclude": list(d.exclude), |
| 2965 |
"max_files": d.max_files, |
| 2966 |
"max_bytes_per_file": d.max_bytes_per_file, |
| 2967 |
} |
| 2968 |
for d in directives |
| 2969 |
] |
| 2970 |
|
| 2971 |
try: |
| 2972 |
result = expand_sources(parsed, base_path=base_path) |
| 2973 |
except (DirectiveError, OSError): |
| 2974 |
return declared, [] |
| 2975 |
|
| 2976 |
records: list[dict[str, object]] = [] |
| 2977 |
for decl, prov in zip(declared, result.provenance, strict=False): |
| 2978 |
records.append( |
| 2979 |
{ |
| 2980 |
**decl, |
| 2981 |
"file_count": prov.file_count, |
| 2982 |
"total_bytes": prov.total_bytes, |
| 2983 |
"skipped_binary": prov.skipped_binary, |
| 2984 |
"skipped_encoding": prov.skipped_encoding, |
| 2985 |
"skipped_over_size": prov.skipped_over_size, |
| 2986 |
} |
| 2987 |
) |
| 2988 |
# If the expander returned fewer entries than declared (shouldn't |
| 2989 |
# happen on success but defensive), pad with declared-only. |
| 2990 |
if len(records) < len(declared): |
| 2991 |
records.extend(declared[len(records) :]) |
| 2992 |
|
| 2993 |
discovered_records: list[dict[str, object]] = [] |
| 2994 |
for dc in result.discovered: |
| 2995 |
discovered_records.append( |
| 2996 |
{ |
| 2997 |
"anchor": str(dc.anchor), |
| 2998 |
"has_training_yaml": dc.config is not None, |
| 2999 |
"has_ignore": bool(dc.ignore_rules), |
| 3000 |
"include": list(dc.config.include) if dc.config else [], |
| 3001 |
"exclude": list(dc.config.exclude) if dc.config else [], |
| 3002 |
"exclude_defaults": (dc.config.exclude_defaults if dc.config else True), |
| 3003 |
"metadata": dict(dc.config.metadata) if dc.config else {}, |
| 3004 |
"ignore_rules": len(dc.ignore_rules), |
| 3005 |
} |
| 3006 |
) |
| 3007 |
return records, discovered_records |
| 3008 |
|
| 3009 |
|
| 3010 |
def _summarize_training_cache(cache_dir: Path, store_root: Path) -> dict[str, object] | None: |
| 3011 |
"""Return a JSON-friendly snapshot of the tokenized-section cache. |
| 3012 |
|
| 3013 |
None when the cache dir doesn't exist (store never trained with |
| 3014 |
the cache, or pre-Sprint-31 layout). Cheap — reads the manifest |
| 3015 |
only, not the entry files. |
| 3016 |
""" |
| 3017 |
if not cache_dir.is_dir(): |
| 3018 |
return None |
| 3019 |
from dlm.directives.cache import TokenizedCache |
| 3020 |
from dlm.metrics import queries as _queries |
| 3021 |
|
| 3022 |
cache = TokenizedCache.open(cache_dir) |
| 3023 |
last = _queries.latest_tokenization(store_root) |
| 3024 |
return { |
| 3025 |
"path": str(cache_dir), |
| 3026 |
"entry_count": cache.entry_count, |
| 3027 |
"bytes": cache.total_bytes, |
| 3028 |
"last_run_hit_rate": last.hit_rate if last else None, |
| 3029 |
"last_run_id": last.run_id if last else None, |
| 3030 |
} |
| 3031 |
|
| 3032 |
|
| 3033 |
def _summarize_gate(store: object) -> dict[str, object] | None: |
| 3034 |
"""Return a JSON-friendly snapshot of the learned adapter gate. |
| 3035 |
|
| 3036 |
None when the store has no gate config (pre-Sprint-34 runs, or |
| 3037 |
`training.gate.enabled` was false). Reads two sources: the |
| 3038 |
on-disk `gate_config.json` for mode + adapter order, and the |
| 3039 |
metrics `gate_events` table for per-adapter mean weight from the |
| 3040 |
most recent run that recorded a gate. |
| 3041 |
""" |
| 3042 |
import json as _json |
| 3043 |
|
| 3044 |
from dlm.store.paths import StorePath |
| 3045 |
from dlm.train.gate.paths import gate_config_path |
| 3046 |
|
| 3047 |
assert isinstance(store, StorePath) |
| 3048 |
cfg_path = gate_config_path(store) |
| 3049 |
|
| 3050 |
from dlm.metrics import queries as _queries |
| 3051 |
from dlm.train.gate.module import GateMetadata |
| 3052 |
|
| 3053 |
events = _queries.latest_gate_events(store.root) |
| 3054 |
# Divergence path: training raised before writing a config, but we |
| 3055 |
# still emit one GateEvent per adapter with mode="diverged" so |
| 3056 |
# operators can see the failure. Surface it even when the config |
| 3057 |
# file is absent. |
| 3058 |
if not cfg_path.exists(): |
| 3059 |
if events and events[0].mode == "diverged": |
| 3060 |
return { |
| 3061 |
"mode": "diverged", |
| 3062 |
"adapter_names": [e.adapter_name for e in events], |
| 3063 |
"input_dim": None, |
| 3064 |
"hidden_proj_dim": None, |
| 3065 |
"last_run_id": events[0].run_id, |
| 3066 |
"per_adapter": [ |
| 3067 |
{ |
| 3068 |
"adapter_name": e.adapter_name, |
| 3069 |
"mean_weight": e.mean_weight, |
| 3070 |
"sample_count": e.sample_count, |
| 3071 |
"mode": e.mode, |
| 3072 |
} |
| 3073 |
for e in events |
| 3074 |
], |
| 3075 |
} |
| 3076 |
return None |
| 3077 |
|
| 3078 |
raw = _json.loads(cfg_path.read_text(encoding="utf-8")) |
| 3079 |
meta = GateMetadata.from_json(raw) |
| 3080 |
per_adapter: list[dict[str, object]] = [] |
| 3081 |
run_id: int | None = None |
| 3082 |
if events: |
| 3083 |
run_id = events[0].run_id |
| 3084 |
per_adapter = [ |
| 3085 |
{ |
| 3086 |
"adapter_name": e.adapter_name, |
| 3087 |
"mean_weight": e.mean_weight, |
| 3088 |
"sample_count": e.sample_count, |
| 3089 |
"mode": e.mode, |
| 3090 |
} |
| 3091 |
for e in events |
| 3092 |
] |
| 3093 |
else: |
| 3094 |
# No recorded events yet; fall back to the config so `dlm show` |
| 3095 |
# still reports that a gate exists and in which mode. |
| 3096 |
per_adapter = [{"adapter_name": name} for name in meta.adapter_names] |
| 3097 |
return { |
| 3098 |
"mode": meta.mode, |
| 3099 |
"adapter_names": list(meta.adapter_names), |
| 3100 |
"input_dim": meta.input_dim, |
| 3101 |
"hidden_proj_dim": meta.hidden_proj_dim, |
| 3102 |
"last_run_id": run_id, |
| 3103 |
"per_adapter": per_adapter, |
| 3104 |
} |
| 3105 |
|
| 3106 |
|
| 3107 |
def _summarize_preference_mining(store_root: Path) -> dict[str, object] | None: |
| 3108 |
"""Return the latest preference-mine summary for `dlm show --json`.""" |
| 3109 |
from dlm.metrics import queries as _queries |
| 3110 |
|
| 3111 |
totals = _queries.preference_mining_totals(store_root) |
| 3112 |
if totals is None: |
| 3113 |
return None |
| 3114 |
last = _queries.latest_preference_mining(store_root) |
| 3115 |
assert last is not None |
| 3116 |
rows = _queries.preference_mining_for_run(store_root, last.run_id) |
| 3117 |
return { |
| 3118 |
"run_count": totals.run_count, |
| 3119 |
"event_count": totals.event_count, |
| 3120 |
"total_mined_pairs": totals.total_mined_pairs, |
| 3121 |
"total_skipped_prompts": totals.total_skipped_prompts, |
| 3122 |
"last_run_id": last.run_id, |
| 3123 |
"last_run_event_count": len(rows), |
| 3124 |
"last_event": _queries.preference_mining_to_dict([last])[0], |
| 3125 |
} |
| 3126 |
|
| 3127 |
|
| 3128 |
def _summarize_base_security(base_model_key: str) -> dict[str, object] | None: |
| 3129 |
"""Surface security-sensitive base-model flags for `dlm show`. |
| 3130 |
|
| 3131 |
Today that's just `trust_remote_code` — a flag that causes the HF |
| 3132 |
loader to execute Python from the model repo. We resolve the spec |
| 3133 |
out of the in-process registry (no network: the resolver reads a |
| 3134 |
frozen Python dict) so users can see which bases opt in without |
| 3135 |
grepping source. Returns None when the key doesn't resolve (an |
| 3136 |
`hf:...` escape hatch that isn't in the registry); the caller |
| 3137 |
silently skips in that case. |
| 3138 |
""" |
| 3139 |
from dlm.base_models import resolve as resolve_base_model |
| 3140 |
from dlm.base_models.errors import BaseModelError |
| 3141 |
|
| 3142 |
try: |
| 3143 |
spec = resolve_base_model(base_model_key, accept_license=True) |
| 3144 |
except BaseModelError: |
| 3145 |
return None |
| 3146 |
return { |
| 3147 |
"base_model": spec.key, |
| 3148 |
"architecture": spec.architecture, |
| 3149 |
"trust_remote_code": bool(spec.trust_remote_code), |
| 3150 |
} |
| 3151 |
|
| 3152 |
|
| 3153 |
def _render_base_security_text(console: object, snap: dict[str, object]) -> None: |
| 3154 |
from rich.console import Console |
| 3155 |
|
| 3156 |
assert isinstance(console, Console) |
| 3157 |
arch = snap.get("architecture", "?") |
| 3158 |
console.print( |
| 3159 |
f" [yellow]security:[/yellow] base uses [red]trust_remote_code=True[/red] " |
| 3160 |
f"(arch={arch}) — HF loader will execute Python from the model repo" |
| 3161 |
) |
| 3162 |
|
| 3163 |
|
| 3164 |
def _render_gate_text(console: object, snap: dict[str, object]) -> None: |
| 3165 |
from rich.console import Console |
| 3166 |
|
| 3167 |
assert isinstance(console, Console) |
| 3168 |
mode = snap.get("mode", "?") |
| 3169 |
if mode == "diverged": |
| 3170 |
console.print(" adapter gate ([red]diverged[/red]):") |
| 3171 |
console.print( |
| 3172 |
" [yellow]gate training produced a non-finite loss; " |
| 3173 |
"store fell back to gate-less routing[/yellow]" |
| 3174 |
) |
| 3175 |
else: |
| 3176 |
console.print(f" adapter gate ({mode}):") |
| 3177 |
per_adapter = snap.get("per_adapter", []) |
| 3178 |
if isinstance(per_adapter, list): |
| 3179 |
for entry in per_adapter: |
| 3180 |
if not isinstance(entry, dict): |
| 3181 |
continue |
| 3182 |
name = entry.get("adapter_name", "?") |
| 3183 |
weight = entry.get("mean_weight") |
| 3184 |
count = entry.get("sample_count") |
| 3185 |
if weight is None: |
| 3186 |
console.print(f" {name} [dim](no recorded events)[/dim]") |
| 3187 |
else: |
| 3188 |
w = float(weight) if isinstance(weight, (int, float)) else 0.0 |
| 3189 |
c = count if isinstance(count, int) else 0 |
| 3190 |
console.print(f" {name:<16} weight={w:.3f} samples={c}") |
| 3191 |
|
| 3192 |
|
| 3193 |
def _render_training_cache_text(console: object, snap: dict[str, object]) -> None: |
| 3194 |
from rich.console import Console |
| 3195 |
|
| 3196 |
assert isinstance(console, Console) |
| 3197 |
ec_raw = snap.get("entry_count", 0) |
| 3198 |
by_raw = snap.get("bytes", 0) |
| 3199 |
entry_count = ec_raw if isinstance(ec_raw, int) else 0 |
| 3200 |
byte_count = by_raw if isinstance(by_raw, int) else 0 |
| 3201 |
console.print(" tokenized cache:") |
| 3202 |
console.print(f" entries: {entry_count}") |
| 3203 |
console.print(f" size: {_human_size(byte_count)}") |
| 3204 |
rate = snap.get("last_run_hit_rate") |
| 3205 |
if isinstance(rate, (int, float)): |
| 3206 |
console.print(f" last hit rate: {float(rate):.1%}") |
| 3207 |
|
| 3208 |
|
| 3209 |
def _render_training_sources_text(console: object, records: list[dict[str, object]]) -> None: |
| 3210 |
from rich.console import Console |
| 3211 |
|
| 3212 |
assert isinstance(console, Console) |
| 3213 |
console.print(" training sources:") |
| 3214 |
for rec in records: |
| 3215 |
path = rec["path"] |
| 3216 |
fc = rec.get("file_count") |
| 3217 |
tb = rec.get("total_bytes") |
| 3218 |
if fc is None: |
| 3219 |
console.print(f" {path} [dim](not expanded)[/dim]") |
| 3220 |
else: |
| 3221 |
size = int(tb) if isinstance(tb, int) else 0 |
| 3222 |
console.print(f" {path} {fc} file(s), {_human_size(size)}") |
| 3223 |
|
| 3224 |
|
| 3225 |
def migrate_cmd( |
| 3226 |
path: Annotated[Path, typer.Argument(help=".dlm file to migrate.")], |
| 3227 |
dry_run: Annotated[bool, typer.Option("--dry-run")] = False, |
| 3228 |
no_backup: Annotated[bool, typer.Option("--no-backup")] = False, |
| 3229 |
) -> None: |
| 3230 |
"""Migrate a .dlm frontmatter to the current schema version.""" |
| 3231 |
from rich.console import Console |
| 3232 |
|
| 3233 |
from dlm.doc.errors import DlmParseError |
| 3234 |
from dlm.doc.migrate import migrate_file |
| 3235 |
|
| 3236 |
console = Console(stderr=True) |
| 3237 |
|
| 3238 |
try: |
| 3239 |
result = migrate_file(path, dry_run=dry_run, no_backup=no_backup) |
| 3240 |
except DlmParseError as exc: |
| 3241 |
console.print(f"[red]migrate:[/red] {exc}") |
| 3242 |
raise typer.Exit(code=1) from exc |
| 3243 |
|
| 3244 |
if not result.applied: |
| 3245 |
console.print( |
| 3246 |
f"[green]migrate:[/green] {path} already at v{result.target_version} " |
| 3247 |
"(no migrations needed)." |
| 3248 |
) |
| 3249 |
return |
| 3250 |
|
| 3251 |
applied_str = " → ".join(f"v{v}" for v in (*result.applied, result.target_version)) |
| 3252 |
if dry_run: |
| 3253 |
console.print( |
| 3254 |
f"[yellow]dry-run:[/yellow] {path} would migrate {applied_str} " |
| 3255 |
"(re-run without --dry-run to apply)." |
| 3256 |
) |
| 3257 |
return |
| 3258 |
|
| 3259 |
if result.backup_path is not None: |
| 3260 |
console.print(f"[dim]backup:[/dim] {result.backup_path}") |
| 3261 |
console.print(f"[green]migrated:[/green] {path} {applied_str}") |
| 3262 |
|
| 3263 |
|
| 3264 |
def templates_list_cmd( |
| 3265 |
json_out: Annotated[ |
| 3266 |
bool, |
| 3267 |
typer.Option("--json", help="Emit a JSON array of template metadata."), |
| 3268 |
] = False, |
| 3269 |
refresh: Annotated[ |
| 3270 |
bool, |
| 3271 |
typer.Option( |
| 3272 |
"--refresh", |
| 3273 |
help=( |
| 3274 |
"Refresh from the upstream template gallery. Currently a no-op — " |
| 3275 |
"upstream repo + signing key are deferred." |
| 3276 |
), |
| 3277 |
), |
| 3278 |
] = False, |
| 3279 |
accept_unsigned: Annotated[ |
| 3280 |
bool, |
| 3281 |
typer.Option( |
| 3282 |
"--accept-unsigned", |
| 3283 |
help=( |
| 3284 |
"Bypass signed-tag verification on --refresh. Reserved; takes effect " |
| 3285 |
"once the upstream gallery signs its releases." |
| 3286 |
), |
| 3287 |
), |
| 3288 |
] = False, |
| 3289 |
) -> None: |
| 3290 |
"""List the bundled (and, one day, remote) template gallery.""" |
| 3291 |
|
| 3292 |
import json as _json |
| 3293 |
|
| 3294 |
from rich.console import Console |
| 3295 |
|
| 3296 |
from dlm.templates import list_bundled |
| 3297 |
|
| 3298 |
console_out = Console() |
| 3299 |
console_err = Console(stderr=True) |
| 3300 |
|
| 3301 |
if refresh: |
| 3302 |
from dlm.templates.fetcher import RemoteFetchUnavailable, cache_dir, fetch_all |
| 3303 |
|
| 3304 |
try: |
| 3305 |
fetch_all(cache_dir(), remote="") |
| 3306 |
except RemoteFetchUnavailable as exc: |
| 3307 |
console_err.print( |
| 3308 |
f"[yellow]templates:[/yellow] {exc} Falling back to the bundled gallery." |
| 3309 |
) |
| 3310 |
# --accept-unsigned is reserved for when the live fetcher lands; |
| 3311 |
# touching it here silences ARG001 without ceremony. |
| 3312 |
_ = accept_unsigned |
| 3313 |
|
| 3314 |
templates = list_bundled() |
| 3315 |
|
| 3316 |
if json_out: |
| 3317 |
payload = [ |
| 3318 |
{ |
| 3319 |
"name": t.name, |
| 3320 |
"title": t.meta.title, |
| 3321 |
"domain_tags": list(t.meta.domain_tags), |
| 3322 |
"recommended_base": t.meta.recommended_base, |
| 3323 |
"expected_steps": t.meta.expected_steps, |
| 3324 |
"expected_duration": dict(t.meta.expected_duration), |
| 3325 |
"summary": t.meta.summary, |
| 3326 |
"sample_prompts": list(t.meta.sample_prompts), |
| 3327 |
} |
| 3328 |
for t in templates |
| 3329 |
] |
| 3330 |
console_out.print_json(_json.dumps(payload)) |
| 3331 |
return |
| 3332 |
|
| 3333 |
if not templates: |
| 3334 |
console_err.print("[yellow]templates:[/yellow] no bundled templates found.") |
| 3335 |
raise typer.Exit(code=1) |
| 3336 |
|
| 3337 |
name_width = max(len(t.name) for t in templates) |
| 3338 |
for t in templates: |
| 3339 |
console_out.print( |
| 3340 |
f"[bold]{t.name:<{name_width}}[/bold] {t.meta.title} " |
| 3341 |
f"[dim]({t.meta.recommended_base})[/dim]" |
| 3342 |
) |
| 3343 |
|
| 3344 |
|
| 3345 |
def push_cmd( |
| 3346 |
path: Annotated[Path, typer.Argument(help=".dlm or .dlm.pack to push.")], |
| 3347 |
to: Annotated[ |
| 3348 |
str, |
| 3349 |
typer.Option( |
| 3350 |
"--to", |
| 3351 |
help=( |
| 3352 |
"Destination. `hf:<org>/<repo>` for HuggingFace Hub, " |
| 3353 |
"`https://...` for a generic HTTPS endpoint, or a local path." |
| 3354 |
), |
| 3355 |
), |
| 3356 |
], |
| 3357 |
sign: Annotated[ |
| 3358 |
bool, |
| 3359 |
typer.Option("--sign", help="Sign the pack with minisign before upload."), |
| 3360 |
] = False, |
| 3361 |
include_exports: Annotated[bool, typer.Option("--include-exports")] = False, |
| 3362 |
include_base: Annotated[bool, typer.Option("--include-base")] = False, |
| 3363 |
include_logs: Annotated[bool, typer.Option("--include-logs")] = False, |
| 3364 |
licensee: Annotated[ |
| 3365 |
str | None, |
| 3366 |
typer.Option( |
| 3367 |
"--i-am-the-licensee", |
| 3368 |
help="URL ack for --include-base on non-redistributable bases.", |
| 3369 |
), |
| 3370 |
] = None, |
| 3371 |
) -> None: |
| 3372 |
"""Upload a .dlm or .dlm.pack to an HF repo, URL endpoint, or local path.""" |
| 3373 |
from rich.console import Console |
| 3374 |
|
| 3375 |
from dlm.share import ShareError, push |
| 3376 |
from dlm.share.signing import MinisignNotAvailableError |
| 3377 |
|
| 3378 |
console = Console(stderr=True) |
| 3379 |
|
| 3380 |
try: |
| 3381 |
result = push( |
| 3382 |
path, |
| 3383 |
to, |
| 3384 |
sign=sign, |
| 3385 |
include_exports=include_exports, |
| 3386 |
include_base=include_base, |
| 3387 |
include_logs=include_logs, |
| 3388 |
licensee_acceptance_url=licensee, |
| 3389 |
) |
| 3390 |
except MinisignNotAvailableError as exc: |
| 3391 |
console.print(f"[red]push:[/red] {exc}") |
| 3392 |
raise typer.Exit(code=1) from exc |
| 3393 |
except ShareError as exc: |
| 3394 |
console.print(f"[red]push:[/red] {exc}") |
| 3395 |
raise typer.Exit(code=1) from exc |
| 3396 |
|
| 3397 |
size_mb = result.bytes_sent / (1024 * 1024) |
| 3398 |
console.print(f"[green]pushed:[/green] {result.destination} ({size_mb:.2f} MB)") |
| 3399 |
if result.sink_kind.value == "hf": |
| 3400 |
console.print(f"[dim]install:[/dim] dlm pull {result.destination}") |
| 3401 |
if result.detail: |
| 3402 |
console.print(f"[dim]{result.detail}[/dim]") |
| 3403 |
|
| 3404 |
|
| 3405 |
def pull_cmd( |
| 3406 |
source: Annotated[ |
| 3407 |
str, |
| 3408 |
typer.Argument( |
| 3409 |
help=( |
| 3410 |
"Source: `hf:<org>/<repo>`, `https://...`, " |
| 3411 |
"`peer://host:port/<id>?token=...`, or a local path." |
| 3412 |
) |
| 3413 |
), |
| 3414 |
], |
| 3415 |
out: Annotated[ |
| 3416 |
Path | None, |
| 3417 |
typer.Option("--out", help="Directory for the restored .dlm (default: CWD)."), |
| 3418 |
] = None, |
| 3419 |
force: Annotated[ |
| 3420 |
bool, |
| 3421 |
typer.Option("--force", help="Overwrite an existing store with the same dlm_id."), |
| 3422 |
] = False, |
| 3423 |
) -> None: |
| 3424 |
"""Download + verify + unpack a .dlm.pack from a remote source.""" |
| 3425 |
from rich.console import Console |
| 3426 |
|
| 3427 |
from dlm.pack.errors import PackError |
| 3428 |
from dlm.share import ShareError, pull |
| 3429 |
from dlm.share.signing import VerifyStatus |
| 3430 |
|
| 3431 |
console = Console(stderr=True) |
| 3432 |
|
| 3433 |
try: |
| 3434 |
result = pull(source, out_dir=out, force=force) |
| 3435 |
except ShareError as exc: |
| 3436 |
console.print(f"[red]pull:[/red] {exc}") |
| 3437 |
raise typer.Exit(code=1) from exc |
| 3438 |
except PackError as exc: |
| 3439 |
console.print(f"[red]pull:[/red] pack integrity: {exc}") |
| 3440 |
raise typer.Exit(code=1) from exc |
| 3441 |
|
| 3442 |
size_mb = result.bytes_received / (1024 * 1024) |
| 3443 |
console.print(f"[green]pulled:[/green] {result.source} → {result.dlm_path} ({size_mb:.2f} MB)") |
| 3444 |
|
| 3445 |
status = result.verification.status |
| 3446 |
if status == VerifyStatus.VERIFIED: |
| 3447 |
console.print( |
| 3448 |
f"[green]verified:[/green] signature matches " |
| 3449 |
f"[bold]{result.verification.key_path}[/bold]" |
| 3450 |
) |
| 3451 |
elif status == VerifyStatus.UNVERIFIED: |
| 3452 |
console.print( |
| 3453 |
f"[yellow]unverified:[/yellow] signature present but " |
| 3454 |
f"not matched ({result.verification.detail}); sha256 still validated" |
| 3455 |
) |
| 3456 |
else: |
| 3457 |
console.print("[dim]unsigned[/dim] (sha256 integrity still validated)") |
| 3458 |
|
| 3459 |
|
| 3460 |
def serve_cmd( |
| 3461 |
path: Annotated[Path, typer.Argument(help=".dlm file to serve.")], |
| 3462 |
port: Annotated[int, typer.Option("--port")] = 7337, |
| 3463 |
public: Annotated[ |
| 3464 |
bool, |
| 3465 |
typer.Option( |
| 3466 |
"--public", |
| 3467 |
help="Bind 0.0.0.0 (requires --i-know-this-is-public); otherwise 127.0.0.1.", |
| 3468 |
), |
| 3469 |
] = False, |
| 3470 |
i_know_public: Annotated[ |
| 3471 |
bool, |
| 3472 |
typer.Option( |
| 3473 |
"--i-know-this-is-public", |
| 3474 |
help="Confirm binding 0.0.0.0 is safe on this network.", |
| 3475 |
), |
| 3476 |
] = False, |
| 3477 |
max_concurrency: Annotated[ |
| 3478 |
int, |
| 3479 |
typer.Option("--max-concurrency", help="Max concurrent connections per token."), |
| 3480 |
] = 4, |
| 3481 |
rate_limit: Annotated[ |
| 3482 |
int, |
| 3483 |
typer.Option("--rate-limit", help="Max requests per minute per token."), |
| 3484 |
] = 30, |
| 3485 |
token_ttl_minutes: Annotated[ |
| 3486 |
int, typer.Option("--token-ttl-minutes", help="Token lifetime in minutes.") |
| 3487 |
] = 15, |
| 3488 |
) -> None: |
| 3489 |
"""Serve a .dlm's pack over LAN for peers to pull.""" |
| 3490 |
from rich.console import Console |
| 3491 |
|
| 3492 |
from dlm.doc.parser import parse_file |
| 3493 |
from dlm.pack.packer import pack as pack_fn |
| 3494 |
from dlm.share import ServeOptions, serve |
| 3495 |
from dlm.store.paths import for_dlm |
| 3496 |
|
| 3497 |
console = Console(stderr=True) |
| 3498 |
|
| 3499 |
parsed = parse_file(path) |
| 3500 |
dlm_id = parsed.frontmatter.dlm_id |
| 3501 |
|
| 3502 |
# pack() calls load_manifest(), which crashes with an unhelpful |
| 3503 |
# "store manifest corrupt" error on a .dlm that's never been |
| 3504 |
# trained. Surface the true cause instead. |
| 3505 |
store = for_dlm(dlm_id) |
| 3506 |
if not store.manifest.exists(): |
| 3507 |
console.print( |
| 3508 |
f"[red]serve:[/red] no training state for {dlm_id} — run [bold]dlm train[/bold] first." |
| 3509 |
) |
| 3510 |
raise typer.Exit(code=1) |
| 3511 |
|
| 3512 |
# Pack into a temp file that lives as long as the server does. |
| 3513 |
import tempfile |
| 3514 |
|
| 3515 |
tmp_dir = Path(tempfile.mkdtemp(prefix="dlm-serve-")) |
| 3516 |
tmp_pack = tmp_dir / f"{path.stem}.dlm.pack" |
| 3517 |
pack_fn(path, out=tmp_pack) |
| 3518 |
console.print(f"[dim]packed:[/dim] {tmp_pack} ({tmp_pack.stat().st_size} bytes)") |
| 3519 |
|
| 3520 |
opts = ServeOptions( |
| 3521 |
port=port, |
| 3522 |
public=public, |
| 3523 |
i_know_this_is_public=i_know_public, |
| 3524 |
max_concurrency=max_concurrency, |
| 3525 |
rate_limit_per_min=rate_limit, |
| 3526 |
token_ttl_seconds=token_ttl_minutes * 60, |
| 3527 |
) |
| 3528 |
handle = serve(dlm_id, tmp_pack, opts) |
| 3529 |
|
| 3530 |
console.print( |
| 3531 |
f"[green]serving:[/green] {path.name} (dlm_id {dlm_id}) on " |
| 3532 |
f"[bold]http://{handle.bind_host}:{handle.port}/{dlm_id}[/bold]" |
| 3533 |
) |
| 3534 |
console.print(f"[bold]peer URL:[/bold] {handle.peer_url}") |
| 3535 |
console.print(f"[dim]token valid for {token_ttl_minutes} min. Ctrl-C to stop.[/dim]") |
| 3536 |
|
| 3537 |
try: |
| 3538 |
handle.wait_shutdown() |
| 3539 |
finally: |
| 3540 |
import shutil |
| 3541 |
|
| 3542 |
shutil.rmtree(tmp_dir, ignore_errors=True) |
| 3543 |
console.print("[dim]stopped.[/dim]") |
| 3544 |
|
| 3545 |
|
| 3546 |
# ---- Cache Commands -------------------------------------------------- |
| 3547 |
|
| 3548 |
|
| 3549 |
def cache_show_cmd( |
| 3550 |
path: Annotated[Path, typer.Argument(help=".dlm file to inspect the cache for.")], |
| 3551 |
json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable JSON.")] = False, |
| 3552 |
) -> None: |
| 3553 |
"""Show tokenized-section cache size, entry count, last-run hit rate.""" |
| 3554 |
import json as _json |
| 3555 |
import sys as _sys |
| 3556 |
|
| 3557 |
from rich.console import Console |
| 3558 |
|
| 3559 |
from dlm.directives.cache import TokenizedCache |
| 3560 |
from dlm.doc.errors import DlmParseError |
| 3561 |
from dlm.doc.parser import parse_file |
| 3562 |
from dlm.metrics import queries as _queries |
| 3563 |
from dlm.store.paths import for_dlm |
| 3564 |
|
| 3565 |
console = Console(stderr=True) |
| 3566 |
out_console = Console() |
| 3567 |
|
| 3568 |
try: |
| 3569 |
parsed = parse_file(path) |
| 3570 |
except (DlmParseError, OSError) as exc: |
| 3571 |
console.print(f"[red]cache:[/red] {exc}") |
| 3572 |
raise typer.Exit(code=1) from exc |
| 3573 |
|
| 3574 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 3575 |
cache = TokenizedCache.open(store.tokenized_cache_dir) |
| 3576 |
last = _queries.latest_tokenization(store.root) |
| 3577 |
|
| 3578 |
payload: dict[str, object] = { |
| 3579 |
"dlm_id": parsed.frontmatter.dlm_id, |
| 3580 |
"cache_path": str(store.tokenized_cache_dir), |
| 3581 |
"entry_count": cache.entry_count, |
| 3582 |
"bytes": cache.total_bytes, |
| 3583 |
"last_run_hit_rate": last.hit_rate if last else None, |
| 3584 |
"last_run_id": last.run_id if last else None, |
| 3585 |
} |
| 3586 |
if json_out: |
| 3587 |
_sys.stdout.write(_json.dumps(payload, indent=2) + "\n") |
| 3588 |
return |
| 3589 |
|
| 3590 |
out_console.print(f"[bold]Cache for {parsed.frontmatter.dlm_id}[/bold]") |
| 3591 |
out_console.print(f" path: {store.tokenized_cache_dir}") |
| 3592 |
out_console.print(f" entries: {cache.entry_count}") |
| 3593 |
out_console.print(f" size: {_human_size(cache.total_bytes)}") |
| 3594 |
if last is not None: |
| 3595 |
out_console.print( |
| 3596 |
f" last-run hit rate: {last.hit_rate:.1%} " |
| 3597 |
f"({last.cache_hits}/{last.cache_hits + last.cache_misses})" |
| 3598 |
) |
| 3599 |
else: |
| 3600 |
out_console.print(" last-run hit rate: [dim]no tokenization runs yet[/dim]") |
| 3601 |
|
| 3602 |
|
| 3603 |
def cache_prune_cmd( |
| 3604 |
path: Annotated[Path, typer.Argument(help=".dlm file to prune the cache for.")], |
| 3605 |
older_than: Annotated[ |
| 3606 |
str | None, |
| 3607 |
typer.Option( |
| 3608 |
"--older-than", |
| 3609 |
help=( |
| 3610 |
"Drop entries not accessed in this duration. " |
| 3611 |
"Format: `30d`, `12h`, `45m`. When omitted, defaults to " |
| 3612 |
"the document's `training.cache.prune_older_than_days` " |
| 3613 |
"(90d pre-v9 docs inherit)." |
| 3614 |
), |
| 3615 |
), |
| 3616 |
] = None, |
| 3617 |
) -> None: |
| 3618 |
"""Remove tokenized-cache entries not accessed within a cutoff.""" |
| 3619 |
from rich.console import Console |
| 3620 |
|
| 3621 |
from dlm.directives.cache import TokenizedCache |
| 3622 |
from dlm.doc.errors import DlmParseError |
| 3623 |
from dlm.doc.parser import parse_file |
| 3624 |
from dlm.store.paths import for_dlm |
| 3625 |
|
| 3626 |
console = Console(stderr=True) |
| 3627 |
|
| 3628 |
# Parse the doc first — we need it either way (for dlm_id) AND |
| 3629 |
# for the frontmatter default when --older-than is absent. |
| 3630 |
try: |
| 3631 |
parsed = parse_file(path) |
| 3632 |
except (DlmParseError, OSError) as exc: |
| 3633 |
console.print(f"[red]cache:[/red] {exc}") |
| 3634 |
raise typer.Exit(code=1) from exc |
| 3635 |
|
| 3636 |
if older_than is not None: |
| 3637 |
seconds = _parse_duration(older_than) |
| 3638 |
if seconds is None: |
| 3639 |
console.print( |
| 3640 |
f"[red]cache:[/red] invalid --older-than {older_than!r} " |
| 3641 |
"(expected e.g. 30d, 12h, 45m)" |
| 3642 |
) |
| 3643 |
raise typer.Exit(code=2) |
| 3644 |
cutoff_label = older_than |
| 3645 |
else: |
| 3646 |
# Fall back to the frontmatter's per-doc default. Pre-v9 docs |
| 3647 |
# get the CacheConfig default of 90 days via the Pydantic |
| 3648 |
# factory on parse. |
| 3649 |
days = parsed.frontmatter.training.cache.prune_older_than_days |
| 3650 |
seconds = float(days) * 86400.0 |
| 3651 |
cutoff_label = f"{days}d" |
| 3652 |
|
| 3653 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 3654 |
cache = TokenizedCache.open(store.tokenized_cache_dir) |
| 3655 |
removed = cache.prune(older_than_seconds=seconds) |
| 3656 |
cache.save_manifest() |
| 3657 |
console.print(f"[green]cache:[/green] pruned {removed} entr(y/ies) older than {cutoff_label}") |
| 3658 |
|
| 3659 |
|
| 3660 |
def cache_clear_cmd( |
| 3661 |
path: Annotated[Path, typer.Argument(help=".dlm file to wipe the cache for.")], |
| 3662 |
force: Annotated[ |
| 3663 |
bool, |
| 3664 |
typer.Option("--force", help="Skip the confirmation prompt."), |
| 3665 |
] = False, |
| 3666 |
) -> None: |
| 3667 |
"""Wipe every entry in the tokenized-section cache for this store.""" |
| 3668 |
from rich.console import Console |
| 3669 |
|
| 3670 |
from dlm.directives.cache import TokenizedCache |
| 3671 |
from dlm.doc.errors import DlmParseError |
| 3672 |
from dlm.doc.parser import parse_file |
| 3673 |
from dlm.store.paths import for_dlm |
| 3674 |
|
| 3675 |
console = Console(stderr=True) |
| 3676 |
|
| 3677 |
try: |
| 3678 |
parsed = parse_file(path) |
| 3679 |
except (DlmParseError, OSError) as exc: |
| 3680 |
console.print(f"[red]cache:[/red] {exc}") |
| 3681 |
raise typer.Exit(code=1) from exc |
| 3682 |
|
| 3683 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 3684 |
cache = TokenizedCache.open(store.tokenized_cache_dir) |
| 3685 |
|
| 3686 |
if not force and cache.entry_count > 0: |
| 3687 |
confirmed = typer.confirm( |
| 3688 |
f"wipe {cache.entry_count} entries ({_human_size(cache.total_bytes)})?" |
| 3689 |
) |
| 3690 |
if not confirmed: |
| 3691 |
console.print("[yellow]cache:[/yellow] clear cancelled") |
| 3692 |
raise typer.Exit(code=0) |
| 3693 |
|
| 3694 |
removed = cache.clear() |
| 3695 |
cache.save_manifest() |
| 3696 |
console.print(f"[green]cache:[/green] cleared {removed} entr(y/ies)") |
| 3697 |
|
| 3698 |
|
| 3699 |
def _parse_duration(spec: str) -> float | None: |
| 3700 |
"""Parse a duration like `30d`, `12h`, `45m` → seconds. None on |
| 3701 |
malformed input.""" |
| 3702 |
if not spec or not spec[:-1].isdigit(): |
| 3703 |
return None |
| 3704 |
n = int(spec[:-1]) |
| 3705 |
unit = spec[-1].lower() |
| 3706 |
if unit == "s": |
| 3707 |
return float(n) |
| 3708 |
if unit == "m": |
| 3709 |
return float(n) * 60 |
| 3710 |
if unit == "h": |
| 3711 |
return float(n) * 3600 |
| 3712 |
if unit == "d": |
| 3713 |
return float(n) * 86400 |
| 3714 |
return None |
| 3715 |
|
| 3716 |
|
| 3717 |
# --- preference ----------------------------------------------------------- |
| 3718 |
|
| 3719 |
|
| 3720 |
def preference_mine_cmd( |
| 3721 |
path: Annotated[Path, typer.Argument(help=".dlm file to mine preferences from.")], |
| 3722 |
samples: Annotated[ |
| 3723 |
int, |
| 3724 |
typer.Option("--samples", help="Candidate responses to sample per prompt.", min=2), |
| 3725 |
] = 4, |
| 3726 |
judge: Annotated[ |
| 3727 |
str, |
| 3728 |
typer.Option( |
| 3729 |
"--judge", |
| 3730 |
help="Judge selector: sway, hf:<model>, or cli:<cmd>.", |
| 3731 |
), |
| 3732 |
] = "sway", |
| 3733 |
threshold: Annotated[ |
| 3734 |
float | None, |
| 3735 |
typer.Option( |
| 3736 |
"--threshold", |
| 3737 |
help="Minimum chosen-vs-rejected score margin. Defaults to the judge's native threshold.", |
| 3738 |
min=0.0, |
| 3739 |
), |
| 3740 |
] = None, |
| 3741 |
max_pairs: Annotated[ |
| 3742 |
int | None, |
| 3743 |
typer.Option( |
| 3744 |
"--max-pairs", |
| 3745 |
help="Maximum mined preference pairs to keep from this run.", |
| 3746 |
min=1, |
| 3747 |
), |
| 3748 |
] = None, |
| 3749 |
temp: Annotated[ |
| 3750 |
float, |
| 3751 |
typer.Option("--temp", help="Sampling temperature for candidate generation.", min=0.0), |
| 3752 |
] = 0.7, |
| 3753 |
top_p: Annotated[ |
| 3754 |
float | None, |
| 3755 |
typer.Option( |
| 3756 |
"--top-p", |
| 3757 |
help="Optional nucleus-sampling cutoff for candidate generation.", |
| 3758 |
min=0.0, |
| 3759 |
max=1.0, |
| 3760 |
), |
| 3761 |
] = None, |
| 3762 |
backend: Annotated[ |
| 3763 |
str, |
| 3764 |
typer.Option( |
| 3765 |
"--backend", |
| 3766 |
help="Generation backend: auto, pytorch, or mlx.", |
| 3767 |
), |
| 3768 |
] = "auto", |
| 3769 |
adapter: Annotated[ |
| 3770 |
str | None, |
| 3771 |
typer.Option( |
| 3772 |
"--adapter", |
| 3773 |
help=( |
| 3774 |
"Named adapter to mine from on multi-adapter documents. " |
| 3775 |
"Required there; invalid on single-adapter documents." |
| 3776 |
), |
| 3777 |
), |
| 3778 |
] = None, |
| 3779 |
apply: Annotated[ |
| 3780 |
bool, |
| 3781 |
typer.Option( |
| 3782 |
"--apply", |
| 3783 |
help=( |
| 3784 |
"Write mined preference sections directly to the .dlm. " |
| 3785 |
"Default stages them for `dlm preference apply`." |
| 3786 |
), |
| 3787 |
), |
| 3788 |
] = False, |
| 3789 |
) -> None: |
| 3790 |
"""Sample + stage auto-mined preference sections from the current adapter.""" |
| 3791 |
from rich.console import Console |
| 3792 |
|
| 3793 |
from dlm.base_models import GatedModelError |
| 3794 |
from dlm.base_models import resolve as resolve_base_model |
| 3795 |
from dlm.doc.errors import DlmParseError |
| 3796 |
from dlm.doc.parser import parse_file |
| 3797 |
from dlm.hardware import doctor |
| 3798 |
from dlm.inference import AdapterNotFoundError |
| 3799 |
from dlm.inference.backends import ( |
| 3800 |
UnsupportedBackendError, |
| 3801 |
build_backend, |
| 3802 |
select_backend, |
| 3803 |
) |
| 3804 |
from dlm.metrics import MetricsRecorder, PreferenceMineEvent |
| 3805 |
from dlm.metrics.events import PreferenceMineWriteMode |
| 3806 |
from dlm.modality import modality_for |
| 3807 |
from dlm.preference import ( |
| 3808 |
InvalidJudgeSpecError, |
| 3809 |
JudgeUnavailableError, |
| 3810 |
build_apply_plan, |
| 3811 |
build_judge, |
| 3812 |
build_mine_plan, |
| 3813 |
render_apply_plan, |
| 3814 |
render_mine_plan, |
| 3815 |
) |
| 3816 |
from dlm.preference.apply import apply_plan as apply_preference_plan |
| 3817 |
from dlm.preference.pending import clear_pending_plan, save_pending_plan |
| 3818 |
from dlm.store.paths import for_dlm |
| 3819 |
|
| 3820 |
console = Console(stderr=True) |
| 3821 |
out_console = Console() |
| 3822 |
|
| 3823 |
if backend not in ("auto", "pytorch", "mlx"): |
| 3824 |
console.print( |
| 3825 |
f"[red]preference:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})." |
| 3826 |
) |
| 3827 |
raise typer.Exit(code=2) |
| 3828 |
|
| 3829 |
try: |
| 3830 |
parsed = parse_file(path) |
| 3831 |
except (DlmParseError, OSError) as exc: |
| 3832 |
console.print(f"[red]preference:[/red] {exc}") |
| 3833 |
raise typer.Exit(code=1) from exc |
| 3834 |
|
| 3835 |
adapters_declared = parsed.frontmatter.training.adapters |
| 3836 |
if adapter is not None: |
| 3837 |
if adapters_declared is None: |
| 3838 |
console.print( |
| 3839 |
"[red]preference:[/red] --adapter is only valid on multi-adapter " |
| 3840 |
"documents (this doc does not declare `training.adapters`)." |
| 3841 |
) |
| 3842 |
raise typer.Exit(code=2) |
| 3843 |
if adapter not in adapters_declared: |
| 3844 |
declared = sorted(adapters_declared) |
| 3845 |
console.print( |
| 3846 |
f"[red]preference:[/red] --adapter {adapter!r} is not declared " |
| 3847 |
f"(declared: {declared})." |
| 3848 |
) |
| 3849 |
raise typer.Exit(code=2) |
| 3850 |
elif adapters_declared is not None: |
| 3851 |
console.print( |
| 3852 |
"[red]preference:[/red] multi-adapter documents require --adapter " |
| 3853 |
"so mining knows which adapter to sample." |
| 3854 |
) |
| 3855 |
raise typer.Exit(code=2) |
| 3856 |
|
| 3857 |
judge_kind = judge.split(":", 1)[0].strip() |
| 3858 |
if adapter is not None and judge_kind == "sway": |
| 3859 |
console.print( |
| 3860 |
"[red]preference:[/red] --judge sway is not yet wired for named adapters; " |
| 3861 |
"use `hf:<model>` or `cli:<cmd>` for multi-adapter mining." |
| 3862 |
) |
| 3863 |
raise typer.Exit(code=2) |
| 3864 |
|
| 3865 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 3866 |
run_id = _latest_training_run_id(store) |
| 3867 |
if run_id is None: |
| 3868 |
console.print( |
| 3869 |
"[red]preference:[/red] mining requires a prior training run (run `dlm train` first)." |
| 3870 |
) |
| 3871 |
raise typer.Exit(code=1) |
| 3872 |
|
| 3873 |
already_accepted = _previously_accepted(store.manifest) |
| 3874 |
try: |
| 3875 |
spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted) |
| 3876 |
except GatedModelError as exc: |
| 3877 |
console.print( |
| 3878 |
f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has " |
| 3879 |
"no recorded acceptance in this store; run `dlm train --i-accept-license` first." |
| 3880 |
) |
| 3881 |
raise typer.Exit(code=1) from exc |
| 3882 |
|
| 3883 |
dispatch = modality_for(spec) |
| 3884 |
if dispatch.accepts_images or dispatch.accepts_audio: |
| 3885 |
console.print( |
| 3886 |
f"[red]preference:[/red] preference mining currently supports text bases only; " |
| 3887 |
f"base {spec.key!r} is modality='{spec.modality}'." |
| 3888 |
) |
| 3889 |
raise typer.Exit(code=2) |
| 3890 |
|
| 3891 |
caps = doctor().capabilities |
| 3892 |
try: |
| 3893 |
backend_name = select_backend(backend, caps) # type: ignore[arg-type] |
| 3894 |
except UnsupportedBackendError as exc: |
| 3895 |
console.print(f"[red]preference:[/red] {exc}") |
| 3896 |
raise typer.Exit(code=2) from exc |
| 3897 |
backend_obj = build_backend(backend_name, caps) |
| 3898 |
|
| 3899 |
try: |
| 3900 |
backend_obj.load(spec, store, adapter_name=adapter) |
| 3901 |
except AdapterNotFoundError as exc: |
| 3902 |
console.print(f"[red]preference:[/red] {exc}") |
| 3903 |
raise typer.Exit(code=1) from exc |
| 3904 |
|
| 3905 |
try: |
| 3906 |
judge_obj = build_judge(judge, dlm_path=path) |
| 3907 |
plan = build_mine_plan( |
| 3908 |
parsed, |
| 3909 |
backend_obj, |
| 3910 |
judge_obj, |
| 3911 |
mined_run_id=run_id, |
| 3912 |
samples=samples, |
| 3913 |
max_pairs=max_pairs, |
| 3914 |
threshold=threshold, |
| 3915 |
temperature=temp, |
| 3916 |
top_p=top_p, |
| 3917 |
) |
| 3918 |
except InvalidJudgeSpecError as exc: |
| 3919 |
console.print(f"[red]preference:[/red] {exc}") |
| 3920 |
raise typer.Exit(code=2) from exc |
| 3921 |
except JudgeUnavailableError as exc: |
| 3922 |
console.print(f"[red]preference:[/red] {exc}") |
| 3923 |
raise typer.Exit(code=1) from exc |
| 3924 |
except ValueError as exc: |
| 3925 |
console.print(f"[red]preference:[/red] {exc}") |
| 3926 |
raise typer.Exit(code=2) from exc |
| 3927 |
finally: |
| 3928 |
backend_obj.unload() |
| 3929 |
|
| 3930 |
recorder = MetricsRecorder(store.root) |
| 3931 |
|
| 3932 |
def _record_preference_mine(write_mode: PreferenceMineWriteMode) -> None: |
| 3933 |
recorder.record_preference_mine( |
| 3934 |
PreferenceMineEvent( |
| 3935 |
run_id=run_id, |
| 3936 |
judge_name=judge_obj.name, |
| 3937 |
sample_count=samples, |
| 3938 |
mined_pairs=len(plan.additions), |
| 3939 |
skipped_prompts=len(plan.skipped), |
| 3940 |
write_mode=write_mode, |
| 3941 |
) |
| 3942 |
) |
| 3943 |
|
| 3944 |
out_console.print(render_mine_plan(plan)) |
| 3945 |
|
| 3946 |
if not plan.additions: |
| 3947 |
clear_pending_plan(store) |
| 3948 |
_record_preference_mine("empty") |
| 3949 |
out_console.print( |
| 3950 |
"\n[yellow]no candidates to mine[/yellow] — either instruction prompts " |
| 3951 |
"did not yield a confident pair, or the matching preference sections " |
| 3952 |
"already exist in the document." |
| 3953 |
) |
| 3954 |
raise typer.Exit(code=2) |
| 3955 |
|
| 3956 |
sections = [addition.section for addition in plan.additions] |
| 3957 |
|
| 3958 |
if apply: |
| 3959 |
apply_plan = build_apply_plan(parsed, sections) |
| 3960 |
out_console.print("") |
| 3961 |
out_console.print(render_apply_plan(apply_plan)) |
| 3962 |
summary = apply_preference_plan(parsed, apply_plan, target=path) |
| 3963 |
clear_pending_plan(store) |
| 3964 |
_record_preference_mine("applied") |
| 3965 |
out_console.print( |
| 3966 |
f"\n[green]preference:[/green] wrote {summary.added} section(s) to {path} " |
| 3967 |
f"({summary.skipped} skipped)" |
| 3968 |
) |
| 3969 |
return |
| 3970 |
|
| 3971 |
pending = save_pending_plan(store, source_path=path.resolve(), sections=sections) |
| 3972 |
_record_preference_mine("staged") |
| 3973 |
out_console.print( |
| 3974 |
f"\n[green]preference:[/green] staged {len(pending.sections)} mined preference " |
| 3975 |
f"section(s). Run [bold]dlm preference apply {path}[/bold] to write them." |
| 3976 |
) |
| 3977 |
|
| 3978 |
|
| 3979 |
def preference_apply_cmd( |
| 3980 |
path: Annotated[Path, typer.Argument(help=".dlm file to apply staged preferences into.")], |
| 3981 |
) -> None: |
| 3982 |
"""Write the staged preference-mine plan into the `.dlm`.""" |
| 3983 |
from rich.console import Console |
| 3984 |
|
| 3985 |
from dlm.doc.errors import DlmParseError |
| 3986 |
from dlm.doc.parser import parse_file |
| 3987 |
from dlm.preference import build_apply_plan, render_apply_plan |
| 3988 |
from dlm.preference.apply import apply_plan as apply_preference_plan |
| 3989 |
from dlm.preference.pending import ( |
| 3990 |
PendingPreferencePlanError, |
| 3991 |
clear_pending_plan, |
| 3992 |
load_pending_plan, |
| 3993 |
) |
| 3994 |
from dlm.store.paths import for_dlm |
| 3995 |
|
| 3996 |
console = Console(stderr=True) |
| 3997 |
out_console = Console() |
| 3998 |
|
| 3999 |
try: |
| 4000 |
parsed = parse_file(path) |
| 4001 |
except (DlmParseError, OSError) as exc: |
| 4002 |
console.print(f"[red]preference:[/red] {exc}") |
| 4003 |
raise typer.Exit(code=1) from exc |
| 4004 |
|
| 4005 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 4006 |
try: |
| 4007 |
pending = load_pending_plan(store) |
| 4008 |
except PendingPreferencePlanError as exc: |
| 4009 |
console.print(f"[red]preference:[/red] {exc}") |
| 4010 |
raise typer.Exit(code=1) from exc |
| 4011 |
|
| 4012 |
if pending is None: |
| 4013 |
console.print( |
| 4014 |
"[red]preference:[/red] no staged mined preferences found; " |
| 4015 |
"run `dlm preference mine` first." |
| 4016 |
) |
| 4017 |
raise typer.Exit(code=1) |
| 4018 |
|
| 4019 |
plan = build_apply_plan(parsed, list(pending.sections)) |
| 4020 |
out_console.print(render_apply_plan(plan)) |
| 4021 |
|
| 4022 |
if not plan.additions: |
| 4023 |
clear_pending_plan(store) |
| 4024 |
out_console.print( |
| 4025 |
"\n[yellow]no staged preferences to write[/yellow] — the pending plan was " |
| 4026 |
"already present in the document." |
| 4027 |
) |
| 4028 |
raise typer.Exit(code=2) |
| 4029 |
|
| 4030 |
summary = apply_preference_plan(parsed, plan, target=path) |
| 4031 |
clear_pending_plan(store) |
| 4032 |
out_console.print( |
| 4033 |
f"\n[green]preference:[/green] wrote {summary.added} section(s) to {path} " |
| 4034 |
f"({summary.skipped} skipped)" |
| 4035 |
) |
| 4036 |
|
| 4037 |
|
| 4038 |
def preference_revert_cmd( |
| 4039 |
path: Annotated[Path, typer.Argument(help=".dlm file to strip auto-mined preferences from.")], |
| 4040 |
) -> None: |
| 4041 |
"""Remove every `auto_mined: true` preference section from the `.dlm`.""" |
| 4042 |
from rich.console import Console |
| 4043 |
|
| 4044 |
from dlm.doc.errors import DlmParseError |
| 4045 |
from dlm.doc.parser import parse_file |
| 4046 |
from dlm.preference import revert_all_auto_mined |
| 4047 |
|
| 4048 |
console = Console(stderr=True) |
| 4049 |
out_console = Console() |
| 4050 |
|
| 4051 |
try: |
| 4052 |
parsed = parse_file(path) |
| 4053 |
except (DlmParseError, OSError) as exc: |
| 4054 |
console.print(f"[red]preference:[/red] {exc}") |
| 4055 |
raise typer.Exit(code=1) from exc |
| 4056 |
|
| 4057 |
summary = revert_all_auto_mined(parsed, target=path) |
| 4058 |
out_console.print( |
| 4059 |
f"[green]preference:[/green] stripped {len(summary.added_section_ids)} " |
| 4060 |
f"auto-mined preference section(s) from {path}" |
| 4061 |
) |
| 4062 |
|
| 4063 |
|
| 4064 |
def preference_list_cmd( |
| 4065 |
path: Annotated[Path, typer.Argument(help=".dlm file whose auto-mined preferences we list.")], |
| 4066 |
) -> None: |
| 4067 |
"""List applied + staged auto-mined preference sections.""" |
| 4068 |
from rich.console import Console |
| 4069 |
|
| 4070 |
from dlm.doc.errors import DlmParseError |
| 4071 |
from dlm.doc.parser import parse_file |
| 4072 |
from dlm.doc.sections import SectionType |
| 4073 |
from dlm.preference.pending import PendingPreferencePlanError, load_pending_plan |
| 4074 |
from dlm.store.paths import for_dlm |
| 4075 |
|
| 4076 |
console = Console(stderr=True) |
| 4077 |
out_console = Console() |
| 4078 |
|
| 4079 |
try: |
| 4080 |
parsed = parse_file(path) |
| 4081 |
except (DlmParseError, OSError) as exc: |
| 4082 |
console.print(f"[red]preference:[/red] {exc}") |
| 4083 |
raise typer.Exit(code=1) from exc |
| 4084 |
|
| 4085 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 4086 |
try: |
| 4087 |
pending = load_pending_plan(store) |
| 4088 |
except PendingPreferencePlanError as exc: |
| 4089 |
console.print(f"[red]preference:[/red] {exc}") |
| 4090 |
raise typer.Exit(code=1) from exc |
| 4091 |
|
| 4092 |
applied = [ |
| 4093 |
section |
| 4094 |
for section in parsed.sections |
| 4095 |
if section.type is SectionType.PREFERENCE and section.auto_mined |
| 4096 |
] |
| 4097 |
|
| 4098 |
out_console.print(f"[bold]{path}[/bold]") |
| 4099 |
out_console.print(f" applied auto-mined: {len(applied)}") |
| 4100 |
out_console.print(f" staged pending: {len(pending.sections) if pending else 0}") |
| 4101 |
|
| 4102 |
if not applied and pending is None: |
| 4103 |
out_console.print(" [dim]no auto-mined preference sections yet[/dim]") |
| 4104 |
return |
| 4105 |
|
| 4106 |
if applied: |
| 4107 |
out_console.print("\n[bold]Applied[/bold]") |
| 4108 |
for section in applied: |
| 4109 |
prompt = _preference_prompt_summary(section.content, section_id=section.section_id) |
| 4110 |
judge_name = section.judge_name or "unknown" |
| 4111 |
run_id = section.mined_run_id if section.mined_run_id is not None else "?" |
| 4112 |
out_console.print( |
| 4113 |
f" - {section.section_id} judge={judge_name} run={run_id} prompt={prompt}" |
| 4114 |
) |
| 4115 |
|
| 4116 |
if pending is not None: |
| 4117 |
out_console.print("\n[bold]Pending[/bold]") |
| 4118 |
for section in pending.sections: |
| 4119 |
prompt = _preference_prompt_summary(section.content, section_id=section.section_id) |
| 4120 |
judge_name = section.judge_name or "unknown" |
| 4121 |
run_id = section.mined_run_id if section.mined_run_id is not None else "?" |
| 4122 |
out_console.print( |
| 4123 |
f" - {section.section_id} judge={judge_name} run={run_id} prompt={prompt}" |
| 4124 |
) |
| 4125 |
|
| 4126 |
|
| 4127 |
def _latest_training_run_id(store: object) -> int | None: |
| 4128 |
"""Most recent run id from metrics DB or manifest.""" |
| 4129 |
from dlm.metrics.queries import latest_run_id |
| 4130 |
from dlm.store.errors import ManifestCorruptError |
| 4131 |
from dlm.store.manifest import load_manifest |
| 4132 |
from dlm.store.paths import StorePath |
| 4133 |
|
| 4134 |
assert isinstance(store, StorePath) |
| 4135 |
|
| 4136 |
metrics_run_id = latest_run_id(store.root) |
| 4137 |
if metrics_run_id is not None: |
| 4138 |
return metrics_run_id |
| 4139 |
if not store.manifest.exists(): |
| 4140 |
return None |
| 4141 |
try: |
| 4142 |
manifest = load_manifest(store.manifest) |
| 4143 |
except (ManifestCorruptError, OSError): |
| 4144 |
return None |
| 4145 |
if not manifest.training_runs: |
| 4146 |
return None |
| 4147 |
return max(run.run_id for run in manifest.training_runs) |
| 4148 |
|
| 4149 |
|
| 4150 |
def _preference_prompt_summary(content: str, *, section_id: str) -> str: |
| 4151 |
"""Best-effort prompt summary for `preference list`.""" |
| 4152 |
from dlm.data.errors import PreferenceParseError |
| 4153 |
from dlm.data.preference_parser import parse_preference_body |
| 4154 |
|
| 4155 |
try: |
| 4156 |
triples = parse_preference_body(content, section_id=section_id) |
| 4157 |
except PreferenceParseError: |
| 4158 |
return "<unparseable>" |
| 4159 |
if not triples: |
| 4160 |
return "<empty>" |
| 4161 |
prompt = triples[0].prompt.splitlines()[0].strip() |
| 4162 |
return prompt or "<blank>" |
| 4163 |
|
| 4164 |
|
| 4165 |
# --- synth ----------------------------------------------------------------- |
| 4166 |
|
| 4167 |
|
| 4168 |
def synth_instructions_cmd( |
| 4169 |
path: Annotated[ |
| 4170 |
Path, typer.Argument(help=".dlm file to synthesize instruction sections from.") |
| 4171 |
], |
| 4172 |
teacher: Annotated[ |
| 4173 |
str, |
| 4174 |
typer.Option( |
| 4175 |
"--teacher", |
| 4176 |
help=( |
| 4177 |
"Teacher selector: self, hf:<model>, openai:<model>, " |
| 4178 |
"anthropic:<model>, or vllm-server:<url>." |
| 4179 |
), |
| 4180 |
), |
| 4181 |
] = "self", |
| 4182 |
per_section: Annotated[ |
| 4183 |
int, |
| 4184 |
typer.Option( |
| 4185 |
"--per-section", |
| 4186 |
help="Instruction pairs to generate per prose section.", |
| 4187 |
min=1, |
| 4188 |
), |
| 4189 |
] = 3, |
| 4190 |
strategy: Annotated[ |
| 4191 |
str, |
| 4192 |
typer.Option( |
| 4193 |
"--strategy", |
| 4194 |
help="Synthesis strategy: extraction, expansion, or both.", |
| 4195 |
), |
| 4196 |
] = "extraction", |
| 4197 |
filter_kind: Annotated[ |
| 4198 |
str, |
| 4199 |
typer.Option( |
| 4200 |
"--filter", |
| 4201 |
help="Filter pipeline: sway, none, or dedup-only.", |
| 4202 |
), |
| 4203 |
] = "sway", |
| 4204 |
threshold: Annotated[ |
| 4205 |
float | None, |
| 4206 |
typer.Option( |
| 4207 |
"--threshold", |
| 4208 |
help="Optional minimum sway-judge margin when --filter=sway.", |
| 4209 |
min=0.0, |
| 4210 |
), |
| 4211 |
] = None, |
| 4212 |
max_pairs: Annotated[ |
| 4213 |
int | None, |
| 4214 |
typer.Option( |
| 4215 |
"--max-pairs", |
| 4216 |
help="Maximum accepted synth pairs to keep from this run.", |
| 4217 |
min=1, |
| 4218 |
), |
| 4219 |
] = None, |
| 4220 |
max_new_tokens: Annotated[ |
| 4221 |
int, |
| 4222 |
typer.Option( |
| 4223 |
"--max-new-tokens", |
| 4224 |
help="Maximum new tokens the teacher may emit per prompt.", |
| 4225 |
min=1, |
| 4226 |
), |
| 4227 |
] = 512, |
| 4228 |
temp: Annotated[ |
| 4229 |
float, |
| 4230 |
typer.Option("--temp", help="Teacher sampling temperature.", min=0.0), |
| 4231 |
] = 0.0, |
| 4232 |
top_p: Annotated[ |
| 4233 |
float | None, |
| 4234 |
typer.Option( |
| 4235 |
"--top-p", |
| 4236 |
help="Optional top-p cutoff for teacher sampling.", |
| 4237 |
min=0.0, |
| 4238 |
max=1.0, |
| 4239 |
), |
| 4240 |
] = None, |
| 4241 |
seed: Annotated[ |
| 4242 |
int | None, |
| 4243 |
typer.Option("--seed", help="Optional teacher sampling seed."), |
| 4244 |
] = None, |
| 4245 |
apply: Annotated[ |
| 4246 |
bool, |
| 4247 |
typer.Option( |
| 4248 |
"--apply", |
| 4249 |
help="Write accepted auto-synth sections directly to the .dlm.", |
| 4250 |
), |
| 4251 |
] = False, |
| 4252 |
dry_run: Annotated[ |
| 4253 |
bool, |
| 4254 |
typer.Option( |
| 4255 |
"--dry-run", |
| 4256 |
help="Preview the synth plan without staging or writing anything.", |
| 4257 |
), |
| 4258 |
] = False, |
| 4259 |
) -> None: |
| 4260 |
"""Generate, stage, or apply auto-synth instruction sections.""" |
| 4261 |
from rich.console import Console |
| 4262 |
|
| 4263 |
from dlm.doc.errors import DlmParseError |
| 4264 |
from dlm.doc.parser import parse_file |
| 4265 |
from dlm.preference import JudgeUnavailableError, build_judge |
| 4266 |
from dlm.store.paths import for_dlm |
| 4267 |
from dlm.synth import ( |
| 4268 |
InvalidTeacherSpecError, |
| 4269 |
TeacherInvocationError, |
| 4270 |
TeacherUnavailableError, |
| 4271 |
build_synth_plan, |
| 4272 |
build_teacher, |
| 4273 |
clear_pending_plan, |
| 4274 |
filter_synth_plan, |
| 4275 |
render_filter_report, |
| 4276 |
render_synth_plan, |
| 4277 |
save_pending_plan, |
| 4278 |
) |
| 4279 |
from dlm.synth import ( |
| 4280 |
apply_plan as apply_synth_plan, |
| 4281 |
) |
| 4282 |
from dlm.synth import ( |
| 4283 |
build_apply_plan as build_synth_apply_plan, |
| 4284 |
) |
| 4285 |
from dlm.synth import ( |
| 4286 |
render_apply_plan as render_synth_apply_plan, |
| 4287 |
) |
| 4288 |
|
| 4289 |
console = Console(stderr=True) |
| 4290 |
out_console = Console() |
| 4291 |
|
| 4292 |
if strategy not in ("extraction", "expansion", "both"): |
| 4293 |
console.print( |
| 4294 |
"[red]synth:[/red] --strategy must be one of extraction|expansion|both " |
| 4295 |
f"(got {strategy!r})." |
| 4296 |
) |
| 4297 |
raise typer.Exit(code=2) |
| 4298 |
if filter_kind not in ("sway", "none", "dedup-only"): |
| 4299 |
console.print( |
| 4300 |
f"[red]synth:[/red] --filter must be one of sway|none|dedup-only (got {filter_kind!r})." |
| 4301 |
) |
| 4302 |
raise typer.Exit(code=2) |
| 4303 |
if apply and dry_run: |
| 4304 |
console.print("[red]synth:[/red] --apply and --dry-run are mutually exclusive.") |
| 4305 |
raise typer.Exit(code=2) |
| 4306 |
if threshold is not None and filter_kind != "sway": |
| 4307 |
console.print("[red]synth:[/red] --threshold is only valid when --filter is `sway`.") |
| 4308 |
raise typer.Exit(code=2) |
| 4309 |
|
| 4310 |
try: |
| 4311 |
parsed = parse_file(path) |
| 4312 |
except (DlmParseError, OSError) as exc: |
| 4313 |
console.print(f"[red]synth:[/red] {exc}") |
| 4314 |
raise typer.Exit(code=1) from exc |
| 4315 |
|
| 4316 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 4317 |
|
| 4318 |
try: |
| 4319 |
strategy_value = cast(Literal["extraction", "expansion", "both"], strategy) |
| 4320 |
teacher_obj = build_teacher(teacher, dlm_path=path) |
| 4321 |
plan = build_synth_plan( |
| 4322 |
parsed, |
| 4323 |
teacher_obj, |
| 4324 |
per_section=per_section, |
| 4325 |
strategy=strategy_value, |
| 4326 |
max_pairs=max_pairs, |
| 4327 |
max_new_tokens=max_new_tokens, |
| 4328 |
temperature=temp, |
| 4329 |
top_p=top_p, |
| 4330 |
seed=seed, |
| 4331 |
) |
| 4332 |
except InvalidTeacherSpecError as exc: |
| 4333 |
console.print(f"[red]synth:[/red] {exc}") |
| 4334 |
raise typer.Exit(code=2) from exc |
| 4335 |
except TeacherUnavailableError as exc: |
| 4336 |
console.print(f"[red]synth:[/red] {exc}") |
| 4337 |
raise typer.Exit(code=1) from exc |
| 4338 |
except TeacherInvocationError as exc: |
| 4339 |
console.print(f"[red]synth:[/red] {exc}") |
| 4340 |
raise typer.Exit(code=1) from exc |
| 4341 |
except ValueError as exc: |
| 4342 |
console.print(f"[red]synth:[/red] {exc}") |
| 4343 |
raise typer.Exit(code=2) from exc |
| 4344 |
|
| 4345 |
judge_obj = None |
| 4346 |
if filter_kind == "sway": |
| 4347 |
try: |
| 4348 |
judge_obj = build_judge("sway", dlm_path=path) |
| 4349 |
except JudgeUnavailableError as exc: |
| 4350 |
console.print(f"[red]synth:[/red] {exc}") |
| 4351 |
raise typer.Exit(code=1) from exc |
| 4352 |
|
| 4353 |
try: |
| 4354 |
filter_value = cast(Literal["sway", "none", "dedup-only"], filter_kind) |
| 4355 |
filtered = filter_synth_plan( |
| 4356 |
plan, |
| 4357 |
filter_kind=filter_value, |
| 4358 |
judge=judge_obj, |
| 4359 |
threshold=threshold, |
| 4360 |
) |
| 4361 |
except ValueError as exc: |
| 4362 |
console.print(f"[red]synth:[/red] {exc}") |
| 4363 |
raise typer.Exit(code=2) from exc |
| 4364 |
|
| 4365 |
out_console.print(render_synth_plan(plan)) |
| 4366 |
out_console.print("") |
| 4367 |
out_console.print(render_filter_report(filtered)) |
| 4368 |
|
| 4369 |
if not filtered.additions: |
| 4370 |
if not dry_run: |
| 4371 |
clear_pending_plan(store) |
| 4372 |
out_console.print( |
| 4373 |
"\n[yellow]no synth additions accepted[/yellow] — either generation " |
| 4374 |
"yielded no valid pairs, dedup removed them, or the filter rejected them." |
| 4375 |
) |
| 4376 |
raise typer.Exit(code=2) |
| 4377 |
|
| 4378 |
sections = [addition.addition.section for addition in filtered.additions] |
| 4379 |
|
| 4380 |
if apply: |
| 4381 |
apply_plan = build_synth_apply_plan(parsed, sections) |
| 4382 |
out_console.print("") |
| 4383 |
out_console.print(render_synth_apply_plan(apply_plan)) |
| 4384 |
summary = apply_synth_plan(parsed, apply_plan, target=path) |
| 4385 |
clear_pending_plan(store) |
| 4386 |
out_console.print( |
| 4387 |
f"\n[green]synth:[/green] wrote {summary.added} section(s) to {path} " |
| 4388 |
f"({summary.skipped} skipped)" |
| 4389 |
) |
| 4390 |
return |
| 4391 |
|
| 4392 |
if dry_run: |
| 4393 |
out_console.print("\n[green]synth:[/green] dry-run only — nothing staged.") |
| 4394 |
return |
| 4395 |
|
| 4396 |
pending = save_pending_plan(store, source_path=path.resolve(), sections=sections) |
| 4397 |
out_console.print( |
| 4398 |
f"\n[green]synth:[/green] staged {len(pending.sections)} auto-synth instruction " |
| 4399 |
f"section(s). Run [bold]dlm synth list {path}[/bold] to inspect them." |
| 4400 |
) |
| 4401 |
|
| 4402 |
|
| 4403 |
def synth_revert_cmd( |
| 4404 |
path: Annotated[Path, typer.Argument(help=".dlm file to strip auto-synth instructions from.")], |
| 4405 |
) -> None: |
| 4406 |
"""Remove every `auto_synth: true` instruction section from the `.dlm`.""" |
| 4407 |
from rich.console import Console |
| 4408 |
|
| 4409 |
from dlm.doc.errors import DlmParseError |
| 4410 |
from dlm.doc.parser import parse_file |
| 4411 |
from dlm.synth import revert_all_auto_synth |
| 4412 |
|
| 4413 |
console = Console(stderr=True) |
| 4414 |
out_console = Console() |
| 4415 |
|
| 4416 |
try: |
| 4417 |
parsed = parse_file(path) |
| 4418 |
except (DlmParseError, OSError) as exc: |
| 4419 |
console.print(f"[red]synth:[/red] {exc}") |
| 4420 |
raise typer.Exit(code=1) from exc |
| 4421 |
|
| 4422 |
summary = revert_all_auto_synth(parsed, target=path) |
| 4423 |
out_console.print( |
| 4424 |
f"[green]synth:[/green] stripped {len(summary.added_section_ids)} " |
| 4425 |
f"auto-synth instruction section(s) from {path}" |
| 4426 |
) |
| 4427 |
|
| 4428 |
|
| 4429 |
def synth_list_cmd( |
| 4430 |
path: Annotated[Path, typer.Argument(help=".dlm file whose auto-synth instructions we list.")], |
| 4431 |
) -> None: |
| 4432 |
"""List applied + staged auto-synth instruction sections.""" |
| 4433 |
from rich.console import Console |
| 4434 |
|
| 4435 |
from dlm.doc.errors import DlmParseError |
| 4436 |
from dlm.doc.parser import parse_file |
| 4437 |
from dlm.doc.sections import SectionType |
| 4438 |
from dlm.store.paths import for_dlm |
| 4439 |
from dlm.synth import PendingSynthPlanError, load_pending_plan |
| 4440 |
|
| 4441 |
console = Console(stderr=True) |
| 4442 |
out_console = Console() |
| 4443 |
|
| 4444 |
try: |
| 4445 |
parsed = parse_file(path) |
| 4446 |
except (DlmParseError, OSError) as exc: |
| 4447 |
console.print(f"[red]synth:[/red] {exc}") |
| 4448 |
raise typer.Exit(code=1) from exc |
| 4449 |
|
| 4450 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 4451 |
try: |
| 4452 |
pending = load_pending_plan(store) |
| 4453 |
except PendingSynthPlanError as exc: |
| 4454 |
console.print(f"[red]synth:[/red] {exc}") |
| 4455 |
raise typer.Exit(code=1) from exc |
| 4456 |
|
| 4457 |
applied = [ |
| 4458 |
section |
| 4459 |
for section in parsed.sections |
| 4460 |
if section.type is SectionType.INSTRUCTION and section.auto_synth |
| 4461 |
] |
| 4462 |
|
| 4463 |
out_console.print(f"[bold]{path}[/bold]") |
| 4464 |
out_console.print(f" applied auto-synth: {len(applied)}") |
| 4465 |
out_console.print(f" staged pending: {len(pending.sections) if pending else 0}") |
| 4466 |
|
| 4467 |
if not applied and pending is None: |
| 4468 |
out_console.print(" [dim]no auto-synth instruction sections yet[/dim]") |
| 4469 |
return |
| 4470 |
|
| 4471 |
if applied: |
| 4472 |
_render_synth_listing(out_console, "Applied", applied) |
| 4473 |
if pending is not None: |
| 4474 |
_render_synth_listing(out_console, "Pending", pending.sections) |
| 4475 |
|
| 4476 |
|
| 4477 |
def _render_synth_listing( |
| 4478 |
out_console: object, |
| 4479 |
heading: str, |
| 4480 |
sections: Sequence[object], |
| 4481 |
) -> None: |
| 4482 |
from collections import Counter |
| 4483 |
|
| 4484 |
from rich.console import Console |
| 4485 |
|
| 4486 |
from dlm.doc.sections import Section |
| 4487 |
|
| 4488 |
assert isinstance(out_console, Console) |
| 4489 |
typed_sections = [section for section in sections if isinstance(section, Section)] |
| 4490 |
|
| 4491 |
out_console.print(f"\n[bold]{heading}[/bold]") |
| 4492 |
|
| 4493 |
teacher_counts = Counter(section.synth_teacher or "unknown" for section in typed_sections) |
| 4494 |
strategy_counts = Counter(section.synth_strategy or "unknown" for section in typed_sections) |
| 4495 |
source_counts = Counter(section.source_section_id or "unknown" for section in typed_sections) |
| 4496 |
|
| 4497 |
out_console.print(" by teacher:") |
| 4498 |
for teacher_name in sorted(teacher_counts): |
| 4499 |
out_console.print(f" - {teacher_name}: {teacher_counts[teacher_name]}") |
| 4500 |
|
| 4501 |
out_console.print(" by strategy:") |
| 4502 |
for strategy_name in sorted(strategy_counts): |
| 4503 |
out_console.print(f" - {strategy_name}: {strategy_counts[strategy_name]}") |
| 4504 |
|
| 4505 |
out_console.print(" by source section:") |
| 4506 |
for source_id in sorted(source_counts): |
| 4507 |
out_console.print(f" - {source_id}: {source_counts[source_id]}") |
| 4508 |
|
| 4509 |
out_console.print(" sections:") |
| 4510 |
for section in typed_sections: |
| 4511 |
prompt = _synth_prompt_summary(section.content, section_id=section.section_id) |
| 4512 |
out_console.print( |
| 4513 |
" - " |
| 4514 |
f"{section.section_id} teacher={section.synth_teacher or 'unknown'} " |
| 4515 |
f"strategy={section.synth_strategy or 'unknown'} " |
| 4516 |
f"source={section.source_section_id or 'unknown'} " |
| 4517 |
f"prompt={prompt}" |
| 4518 |
) |
| 4519 |
|
| 4520 |
|
| 4521 |
def _synth_prompt_summary(content: str, *, section_id: str) -> str: |
| 4522 |
"""Best-effort prompt summary for `synth list`.""" |
| 4523 |
from dlm.data.errors import InstructionParseError |
| 4524 |
from dlm.data.instruction_parser import parse_instruction_body |
| 4525 |
|
| 4526 |
try: |
| 4527 |
pairs = parse_instruction_body(content, section_id=section_id) |
| 4528 |
except InstructionParseError: |
| 4529 |
return "<unparseable>" |
| 4530 |
if not pairs: |
| 4531 |
return "<empty>" |
| 4532 |
prompt = pairs[0].question.splitlines()[0].strip() |
| 4533 |
return prompt or "<blank>" |
| 4534 |
|
| 4535 |
|
| 4536 |
# --- harvest -------------------------------------------------------------- |
| 4537 |
|
| 4538 |
|
| 4539 |
def harvest_cmd( |
| 4540 |
path: Annotated[Path, typer.Argument(help=".dlm file to harvest into.")], |
| 4541 |
sway_json: Annotated[ |
| 4542 |
Path | None, |
| 4543 |
typer.Option( |
| 4544 |
"--sway-json", |
| 4545 |
help="Path to a sway JSON report. Required unless --revert is set.", |
| 4546 |
), |
| 4547 |
] = None, |
| 4548 |
apply: Annotated[ |
| 4549 |
bool, |
| 4550 |
typer.Option( |
| 4551 |
"--apply", |
| 4552 |
help="Write harvested sections to the .dlm. Default is dry-run (review only).", |
| 4553 |
), |
| 4554 |
] = False, |
| 4555 |
tag: Annotated[ |
| 4556 |
str, |
| 4557 |
typer.Option( |
| 4558 |
"--tag", |
| 4559 |
help="Prefix for the synthesized section's harvest_source metadata.", |
| 4560 |
), |
| 4561 |
] = "auto-harvest", |
| 4562 |
min_confidence: Annotated[ |
| 4563 |
float, |
| 4564 |
typer.Option( |
| 4565 |
"--min-confidence", |
| 4566 |
help="Drop candidates whose sway evidence.confidence is below this.", |
| 4567 |
min=0.0, |
| 4568 |
max=1.0, |
| 4569 |
), |
| 4570 |
] = 0.0, |
| 4571 |
strict: Annotated[ |
| 4572 |
bool, |
| 4573 |
typer.Option( |
| 4574 |
"--strict/--lax", |
| 4575 |
help=( |
| 4576 |
"Strict (default): refuse if any failing probe lacks a " |
| 4577 |
"reference. Lax: log a warning and skip those probes." |
| 4578 |
), |
| 4579 |
), |
| 4580 |
] = True, |
| 4581 |
revert: Annotated[ |
| 4582 |
bool, |
| 4583 |
typer.Option( |
| 4584 |
"--revert", |
| 4585 |
help=( |
| 4586 |
"Strip every auto-harvested section from the document. " |
| 4587 |
"Mutually exclusive with --sway-json / --apply." |
| 4588 |
), |
| 4589 |
), |
| 4590 |
] = False, |
| 4591 |
) -> None: |
| 4592 |
"""Adversarial replay: harvest failing sway probes back into the .dlm. |
| 4593 |
|
| 4594 |
Default mode is `--dry-run`-style preview; pass `--apply` to write. |
| 4595 |
""" |
| 4596 |
from rich.console import Console |
| 4597 |
|
| 4598 |
from dlm.doc.errors import DlmParseError |
| 4599 |
from dlm.doc.parser import parse_file |
| 4600 |
from dlm.harvest import ( |
| 4601 |
HarvestError, |
| 4602 |
MalformedSwayReportError, |
| 4603 |
NoReferenceError, |
| 4604 |
apply_plan, |
| 4605 |
build_plan, |
| 4606 |
read_sway_report, |
| 4607 |
render_plan, |
| 4608 |
revert_all_auto_harvests, |
| 4609 |
) |
| 4610 |
|
| 4611 |
console = Console(stderr=True) |
| 4612 |
out_console = Console() |
| 4613 |
|
| 4614 |
if revert and (sway_json is not None or apply): |
| 4615 |
console.print( |
| 4616 |
"[red]harvest:[/red] --revert is mutually exclusive with --sway-json / --apply" |
| 4617 |
) |
| 4618 |
raise typer.Exit(code=1) |
| 4619 |
if not revert and sway_json is None: |
| 4620 |
console.print( |
| 4621 |
"[red]harvest:[/red] --sway-json is required (or pass --revert " |
| 4622 |
"to strip auto-harvested sections)" |
| 4623 |
) |
| 4624 |
raise typer.Exit(code=1) |
| 4625 |
|
| 4626 |
try: |
| 4627 |
parsed = parse_file(path) |
| 4628 |
except (DlmParseError, OSError) as exc: |
| 4629 |
console.print(f"[red]harvest:[/red] {exc}") |
| 4630 |
raise typer.Exit(code=1) from exc |
| 4631 |
|
| 4632 |
if revert: |
| 4633 |
summary = revert_all_auto_harvests(parsed, target=path) |
| 4634 |
out_console.print( |
| 4635 |
f"[green]harvest:[/green] stripped {len(summary.added_section_ids)} " |
| 4636 |
f"auto-harvested section(s) from {path} (all harvest runs, not just last)" |
| 4637 |
) |
| 4638 |
return |
| 4639 |
|
| 4640 |
assert sway_json is not None # narrowed by the check above |
| 4641 |
try: |
| 4642 |
candidates = read_sway_report( |
| 4643 |
sway_json, |
| 4644 |
strict=strict, |
| 4645 |
min_confidence=min_confidence, |
| 4646 |
) |
| 4647 |
except MalformedSwayReportError as exc: |
| 4648 |
console.print(f"[red]harvest:[/red] {exc}") |
| 4649 |
raise typer.Exit(code=1) from exc |
| 4650 |
except NoReferenceError as exc: |
| 4651 |
console.print(f"[red]harvest:[/red] {exc}") |
| 4652 |
console.print(" Pass [bold]--lax[/bold] to skip probes without references instead.") |
| 4653 |
raise typer.Exit(code=1) from exc |
| 4654 |
except HarvestError as exc: |
| 4655 |
console.print(f"[red]harvest:[/red] {exc}") |
| 4656 |
raise typer.Exit(code=1) from exc |
| 4657 |
|
| 4658 |
plan = build_plan(parsed, candidates, tag=tag) |
| 4659 |
out_console.print(render_plan(plan)) |
| 4660 |
|
| 4661 |
if not plan.additions: |
| 4662 |
out_console.print( |
| 4663 |
"\n[yellow]no candidates to harvest[/yellow] — either the sway " |
| 4664 |
"report had no failing probes with references, or all matched " |
| 4665 |
"sections already exist in the document." |
| 4666 |
) |
| 4667 |
raise typer.Exit(code=2) |
| 4668 |
|
| 4669 |
if not apply: |
| 4670 |
out_console.print("\n[dim]dry-run — re-run with [bold]--apply[/bold] to write.[/dim]") |
| 4671 |
return |
| 4672 |
|
| 4673 |
summary = apply_plan(parsed, plan, target=path) |
| 4674 |
out_console.print( |
| 4675 |
f"\n[green]harvest:[/green] wrote {summary.added} section(s) to {path} " |
| 4676 |
f"({summary.skipped} skipped)" |
| 4677 |
) |