Python · 168381 bytes Raw Blame History
1 """Subcommand stubs for the v1.0 CLI surface.
2
3 Every stub raises `NotImplementedError` with the sprint number that will
4 implement it. This makes `dlm --help` self-documenting about project
5 progress. Arguments are accepted so `--help` renders the real eventual
6 surface; they're unused until each subcommand's owning sprint lands,
7 which is why `src/dlm/cli/commands.py` has a ruff per-file-ignore for
8 `ARG001` in `pyproject.toml`.
9 """
10
11 from __future__ import annotations
12
13 import os
14 from collections.abc import Sequence
15 from pathlib import Path
16 from typing import TYPE_CHECKING, Annotated, Any, Literal, cast
17
18 import typer
19
20 if TYPE_CHECKING:
21 from datetime import timedelta
22
23
24 def _stub(sprint: str, subject: str) -> None:
25 """Raise a clear unimplemented error pointing to the owning sprint."""
26 raise NotImplementedError(
27 f"`{subject}` is not implemented yet (owned by Sprint {sprint}).",
28 )
29
30
31 def init_cmd(
32 path: Annotated[Path, typer.Argument(help="Target .dlm path to create.")],
33 base: Annotated[
34 str, typer.Option("--base", help="Base model key or hf:org/name.")
35 ] = "qwen2.5-1.5b",
36 template: Annotated[
37 str | None,
38 typer.Option(
39 "--template",
40 help="Start from a named gallery template (see `dlm templates list`).",
41 ),
42 ] = None,
43 i_accept_license: Annotated[
44 bool,
45 typer.Option("--i-accept-license", help="Accept gated base-model license."),
46 ] = False,
47 force: Annotated[
48 bool,
49 typer.Option("--force", help="Overwrite an existing .dlm at path."),
50 ] = False,
51 skip_export_probes: Annotated[
52 bool,
53 typer.Option(
54 "--skip-export-probes",
55 help=(
56 "Skip the llama.cpp / GGUF-conversion probes so brand-new "
57 "architectures (not yet in our vendored llama.cpp) can still "
58 "be used for training + HF inference. Forfeits `dlm export` "
59 "to Ollama until the vendored copy catches up."
60 ),
61 ),
62 ] = False,
63 multimodal: Annotated[
64 bool,
65 typer.Option(
66 "--multimodal",
67 help=(
68 "Scaffold a vision-language .dlm with an `::image::` section. "
69 "Defaults --base to paligemma-3b-mix-224 and skips GGUF "
70 "export probes because current GGUF export does not "
71 "support vision-language bases."
72 ),
73 ),
74 ] = False,
75 audio: Annotated[
76 bool,
77 typer.Option(
78 "--audio",
79 help=(
80 "Scaffold an audio-language .dlm with an `::audio::` section. "
81 "Defaults --base to qwen2-audio-7b-instruct and skips GGUF "
82 "export probes (audio archs are not on llama.cpp's roadmap)."
83 ),
84 ),
85 ] = False,
86 ) -> None:
87 """Bootstrap a new .dlm file with sensible defaults."""
88
89 from rich.console import Console
90
91 from dlm.base_models import (
92 GatedModelError,
93 UnknownBaseModelError,
94 is_gated,
95 require_acceptance,
96 )
97 from dlm.base_models import resolve as resolve_base_model
98 from dlm.io.ulid import mint_ulid
99
100 console = Console(stderr=True)
101
102 if path.exists() and not force:
103 console.print(
104 f"[red]init:[/red] {path} already exists. "
105 "Re-run with [bold]--force[/bold] to overwrite."
106 )
107 raise typer.Exit(code=1)
108
109 # --multimodal / --audio are mutually exclusive with each other and
110 # with --template (templates pin their own base + body shape; v1
111 # doesn't ship media templates yet).
112 if multimodal and audio:
113 console.print(
114 "[red]init:[/red] --multimodal and --audio are mutually exclusive "
115 "(each targets a different modality)."
116 )
117 raise typer.Exit(code=2)
118 if multimodal and template is not None:
119 console.print(
120 "[red]init:[/red] --multimodal and --template are mutually exclusive; "
121 "v1 doesn't ship a VL template (see `dlm templates list`)."
122 )
123 raise typer.Exit(code=2)
124 if audio and template is not None:
125 console.print(
126 "[red]init:[/red] --audio and --template are mutually exclusive; "
127 "v1 doesn't ship an audio template (see `dlm templates list`)."
128 )
129 raise typer.Exit(code=2)
130
131 # --multimodal / --audio override the text-first --base default. A
132 # user who wants a different media base passes --base explicitly;
133 # we verify the pick is the right modality below.
134 if multimodal and base == "qwen2.5-1.5b":
135 base = "paligemma-3b-mix-224"
136 if audio and base == "qwen2.5-1.5b":
137 base = "qwen2-audio-7b-instruct"
138
139 # --template resolves the base from the template's meta.yaml; the
140 # --base default is kept for the no-template path only. Users who
141 # pass both a template and an explicit --base get a warning but the
142 # template still wins (the template body was authored against its
143 # recommended base).
144 if template is not None:
145 from dlm.templates import load_template
146
147 # Peek at the template's recommended base WITHOUT writing
148 # anything yet, so we can handle the license prompt against the
149 # right base (the template's, not `--base`) before committing.
150 try:
151 resolved_base = load_template(template).meta.recommended_base
152 except Exception as exc:
153 console.print(f"[red]init:[/red] {exc}")
154 raise typer.Exit(code=1) from exc
155 if base != "qwen2.5-1.5b" and base != resolved_base:
156 console.print(
157 f"[yellow]init:[/yellow] --base {base} ignored; template "
158 f"{template!r} uses {resolved_base}."
159 )
160 else:
161 resolved_base = base
162
163 # Media bases can't clear the GGUF-conversion probes. Force-skip
164 # them so the probe suite doesn't false-fail the init.
165 if multimodal or audio:
166 skip_export_probes = True
167
168 try:
169 spec = resolve_base_model(
170 resolved_base,
171 accept_license=i_accept_license,
172 skip_export_probes=skip_export_probes,
173 )
174 except UnknownBaseModelError as exc:
175 console.print(f"[red]init:[/red] {exc}")
176 raise typer.Exit(code=1) from exc
177 except GatedModelError as exc:
178 # Gated + user didn't pass --i-accept-license up-front. Prompt
179 # interactively if we have a TTY; otherwise refuse with the flag
180 # pointer (audit F22 non-interactive path).
181 if not _prompt_accept_license(console, resolved_base, exc.license_url):
182 console.print(
183 "[red]license:[/red] refused. Re-run with "
184 "[bold]--i-accept-license[/bold] to accept non-interactively."
185 )
186 raise typer.Exit(code=1) from exc
187 spec = resolve_base_model(
188 resolved_base,
189 accept_license=True,
190 skip_export_probes=skip_export_probes,
191 )
192
193 # NOW apply the template — license has already been accepted
194 # (either by --i-accept-license or interactive prompt), so pass
195 # the acceptance through. apply_template enforces the license
196 # contract at its boundary.
197 applied_result = None
198 if template is not None:
199 from dlm.templates import TemplateError, apply_template
200
201 try:
202 applied_result = apply_template(template, path, force=force, accept_license=True)
203 except TemplateError as exc:
204 console.print(f"[red]init:[/red] {exc}")
205 raise typer.Exit(code=1) from exc
206
207 # Record the license acceptance (or None for non-gated specs). We
208 # know `resolve_base_model` already validated the flag/prompt chain
209 # — `accept_license=True` means either the user passed the flag or
210 # answered the interactive prompt. Either path is a real
211 # acceptance; persist the record now so subsequent `dlm train` /
212 # `dlm export` don't re-prompt.
213 acceptance_via: Literal["cli_flag", "interactive"] = (
214 "cli_flag" if i_accept_license else "interactive"
215 )
216 acceptance = (
217 require_acceptance(spec, accept_license=True, via=acceptance_via)
218 if is_gated(spec)
219 else None
220 )
221
222 # Media flags require a matching-modality base. Check after resolve
223 # so users pointing at an unknown or wrong-modality hf:org/name get
224 # a clear explanation rather than a schema error deep in parse time.
225 if multimodal and spec.modality != "vision-language":
226 console.print(
227 f"[red]init:[/red] --multimodal requires a vision-language base; "
228 f"{spec.key!r} is modality='{spec.modality}'. "
229 "Pick --base paligemma-3b-mix-224 or drop --multimodal."
230 )
231 raise typer.Exit(code=2)
232 if audio and spec.modality != "audio-language":
233 console.print(
234 f"[red]init:[/red] --audio requires an audio-language base; "
235 f"{spec.key!r} is modality='{spec.modality}'. "
236 "Pick --base qwen2-audio-7b-instruct or drop --audio."
237 )
238 raise typer.Exit(code=2)
239
240 if applied_result is not None:
241 dlm_id = applied_result.dlm_id
242 else:
243 dlm_id = mint_ulid()
244 if multimodal:
245 _write_init_scaffold_multimodal(path, spec.key, dlm_id)
246 elif audio:
247 _write_init_scaffold_audio(path, spec.key, dlm_id)
248 else:
249 _write_init_scaffold(path, spec.key, dlm_id)
250
251 # Create the store + write the initial manifest so `dlm show` sees
252 # the license record and `dlm train` has a prior manifest to diff
253 # against.
254 from dlm.store.manifest import Manifest, save_manifest
255 from dlm.store.paths import for_dlm
256
257 store = for_dlm(dlm_id)
258 store.ensure_layout()
259 save_manifest(
260 store.manifest,
261 Manifest(
262 dlm_id=dlm_id,
263 base_model=spec.key,
264 base_model_revision=spec.revision,
265 source_path=path.resolve(),
266 license_acceptance=acceptance,
267 ),
268 )
269 if applied_result is not None:
270 meta = applied_result.template.meta
271 console.print(
272 f"[green]init:[/green] wrote {path} from template "
273 f"[bold]{meta.name}[/bold] ({meta.title}) — base {spec.key}."
274 )
275 else:
276 console.print(f"[green]init:[/green] wrote {path}")
277
278
279 def _previously_accepted(store_manifest_path: Path) -> bool:
280 """Return True iff the store manifest already holds a LicenseAcceptance.
281
282 `dlm prompt` and `dlm export` operate on an already-trained
283 adapter; the gated-base license was accepted during training and
284 persisted into `manifest.license_acceptance`. Replaying that
285 acceptance here is correct; silently hardcoding
286 `accept_license=True` is not — it would let a never-accepted
287 gated base slip through.
288 """
289 if not store_manifest_path.exists():
290 return False
291 from dlm.store.errors import ManifestCorruptError
292 from dlm.store.manifest import load_manifest
293
294 try:
295 manifest = load_manifest(store_manifest_path)
296 except (ManifestCorruptError, OSError):
297 # Narrow from bare `Exception` so programmer bugs propagate
298 # instead of being silently treated as "no acceptance."
299 return False
300 return manifest.license_acceptance is not None
301
302
303 def _prompt_accept_license(console: object, base: str, license_url: str | None) -> bool:
304 """Interactive y/N prompt for gated base-model license acceptance.
305
306 Non-interactive runs (no TTY) return False; the caller surfaces the
307 `--i-accept-license` flag pointer in that case.
308 """
309 import sys
310
311 from rich.console import Console
312
313 assert isinstance(console, Console)
314
315 if not sys.stdin.isatty():
316 return False
317
318 console.print(
319 f"[yellow]This base model ({base}) requires accepting the upstream license.[/yellow]"
320 )
321 if license_url:
322 console.print(f" Review the license at: {license_url}")
323 console.print("Accept and continue? [y/N]: ", end="")
324 try:
325 answer = input().strip().lower()
326 except EOFError:
327 return False
328 return answer in ("y", "yes")
329
330
331 def _write_init_scaffold(path: Path, base_model_key: str, dlm_id: str) -> None:
332 """Write a minimal-but-valid .dlm file at `path`.
333
334 Body has one PROSE paragraph + a commented instruction section so
335 users see both section shapes on first open.
336 """
337 scaffold = f"""---
338 dlm_id: {dlm_id}
339 dlm_version: 1
340 base_model: {base_model_key}
341 ---
342
343 # Your document title
344
345 Write prose here. It will train via continued pretraining (CPT) loss.
346
347 ::instruction::
348
349 ### Q
350 Your example question.
351
352 ### A
353 Your example answer.
354 """
355 path.write_text(scaffold, encoding="utf-8")
356
357
358 def _write_init_scaffold_multimodal(path: Path, base_model_key: str, dlm_id: str) -> None:
359 """Write a VL-shaped .dlm file at `path`.
360
361 Body shows the `::image::` attribute fence + a caption so users
362 see the v10 grammar on first open. The placeholder path
363 `figures/your-image.png` is deliberately non-existent — first
364 `dlm train` will refuse with a clear file-missing error, prompting
365 the user to drop a real image in. This is friendlier than
366 committing an inert sample that users might not notice isn't theirs.
367
368 `dlm_version: 10` because IMAGE sections require schema v10.
369 """
370 scaffold = f"""---
371 dlm_id: {dlm_id}
372 dlm_version: 10
373 base_model: {base_model_key}
374 ---
375
376 # Your document title
377
378 Write prose here. It will train via continued pretraining (CPT) loss.
379
380 ::image path="figures/your-image.png" alt="short description"::
381 Caption text describing the image. Training rows bundle the image
382 with this caption as `<image>\\n<caption>`.
383
384 ::instruction::
385
386 ### Q
387 What is in this image?
388
389 ### A
390 Describe what the image shows.
391 """
392 path.write_text(scaffold, encoding="utf-8")
393
394
395 def _write_init_scaffold_audio(path: Path, base_model_key: str, dlm_id: str) -> None:
396 """Write an audio-shaped .dlm file at `path`.
397
398 Body shows the `::audio::` attribute fence with the sibling-
399 transcript-friendly `transcript="..."` form so users see the v11
400 grammar on first open. The placeholder path `clips/your-clip.wav`
401 is deliberately non-existent — first `dlm train` refuses with a
402 clear file-missing error rather than silently training on an inert
403 sample.
404
405 `dlm_version: 11` because AUDIO sections require schema v11.
406 """
407 scaffold = f"""---
408 dlm_id: {dlm_id}
409 dlm_version: 11
410 base_model: {base_model_key}
411 ---
412
413 # Your document title
414
415 Write prose here. It will train via continued pretraining (CPT) loss.
416
417 ::audio path="clips/your-clip.wav" transcript="Transcript of the audio clip."::
418
419 ::instruction::
420
421 ### Q
422 What was said in this recording?
423
424 ### A
425 Describe what you hear in the audio.
426 """
427 path.write_text(scaffold, encoding="utf-8")
428
429
430 def train_cmd(
431 path: Annotated[
432 Path,
433 typer.Argument(
434 help=(
435 ".dlm file to train. Or a directory — when passed a directory, "
436 "`dlm train` auto-scaffolds `<dir>/.dlm/corpus.dlm` on first run "
437 "(with --base) and reuses it on subsequent runs."
438 ),
439 ),
440 ],
441 resume: Annotated[bool, typer.Option("--resume", help="Resume from last checkpoint.")] = False,
442 fresh: Annotated[bool, typer.Option("--fresh", help="Discard prior adapter state.")] = False,
443 seed: Annotated[int | None, typer.Option("--seed", help="Override training seed.")] = None,
444 max_steps: Annotated[int | None, typer.Option("--max-steps", help="Cap step count.")] = None,
445 phase: Annotated[
446 str,
447 typer.Option(
448 "--phase",
449 help=(
450 "Which training phases to run: 'sft' (supervised only), "
451 "'preference' (DPO/ORPO only — requires a prior SFT "
452 "adapter), or 'all' (SFT then preference when enabled). "
453 "The preference method (dpo / orpo) comes from "
454 "training.preference.method in the frontmatter."
455 ),
456 ),
457 ] = "all",
458 i_accept_license: Annotated[
459 bool,
460 typer.Option(
461 "--i-accept-license",
462 help="Accept the base model's license (required for gated bases like llama-3.2).",
463 ),
464 ] = False,
465 strict_lock: Annotated[
466 bool,
467 typer.Option(
468 "--strict-lock",
469 help="Fail on any dlm.lock drift, including version warns.",
470 ),
471 ] = False,
472 update_lock: Annotated[
473 bool,
474 typer.Option(
475 "--update-lock",
476 help="Overwrite dlm.lock without validating prior entries.",
477 ),
478 ] = False,
479 ignore_lock: Annotated[
480 bool,
481 typer.Option(
482 "--ignore-lock",
483 help="Skip dlm.lock validation and don't write a new lock.",
484 ),
485 ] = False,
486 strict_metrics: Annotated[
487 bool,
488 typer.Option(
489 "--strict-metrics",
490 help="Promote metrics SQLite write failures to hard errors.",
491 ),
492 ] = False,
493 no_mined: Annotated[
494 bool,
495 typer.Option(
496 "--no-mined",
497 help=(
498 "Exclude auto-mined preference sections from the preference "
499 "phase, including replay-sampled mined pairs. Hand-authored "
500 "`::preference::` sections still train normally."
501 ),
502 ),
503 ] = False,
504 gpus: Annotated[
505 str | None,
506 typer.Option(
507 "--gpus",
508 help=(
509 "Multi-GPU training. `all` uses every visible CUDA device; "
510 "`N` uses the first N; `0,1` selects exact device ids. "
511 "Dispatches to `accelerate launch` when >1 device is "
512 "selected. Omit for single-process training."
513 ),
514 ),
515 ] = None,
516 watch: Annotated[
517 bool,
518 typer.Option(
519 "--watch",
520 help=(
521 "Save-to-train mode. After an initial train, block on "
522 "filesystem events and run incremental retrains "
523 "(mode=resume, step-capped) on each settled save. Ctrl-C "
524 "exits cleanly between cycles."
525 ),
526 ),
527 ] = False,
528 watch_max_steps: Annotated[
529 int,
530 typer.Option(
531 "--watch-max-steps",
532 help="Per-cycle step cap for --watch. Default 100 keeps cycles responsive.",
533 ),
534 ] = 100,
535 watch_debounce_ms: Annotated[
536 int,
537 typer.Option(
538 "--watch-debounce-ms",
539 help="Quiet interval (ms) before a burst of saves triggers a retrain.",
540 ),
541 ] = 400,
542 watch_repl: Annotated[
543 bool,
544 typer.Option(
545 "--repl",
546 help=(
547 "With --watch: also open the REPL so prompts reflect the "
548 "latest adapter. **Scaffolded** — threading integration "
549 "is untestable without a two-process harness; emit a "
550 "not-yet-implemented refusal and exit 2."
551 ),
552 ),
553 ] = False,
554 base: Annotated[
555 str | None,
556 typer.Option(
557 "--base",
558 help=(
559 "Base model key for auto-scaffold. Required on first run when "
560 "`path` is a directory without an existing .dlm/ config. "
561 "Accepts registry keys (smollm2-135m, qwen2.5-coder-1.5b, ...) "
562 "or `hf:<org>/<name>` for off-registry models."
563 ),
564 ),
565 ] = None,
566 include: Annotated[
567 list[str] | None,
568 typer.Option(
569 "--include",
570 help=(
571 "Glob pattern for files to train on (auto-scaffold only). "
572 "Repeatable. Default: '**/*' with --recursive, '*' without. "
573 "Examples: '**/*.py', '**/*.f90', '**/*.{md,rst}'."
574 ),
575 ),
576 ] = None,
577 exclude: Annotated[
578 list[str] | None,
579 typer.Option(
580 "--exclude",
581 help=(
582 "Glob pattern for files to skip (auto-scaffold only). "
583 "Repeatable. Defaults (secrets, VCS, lockfiles, binaries) "
584 "apply on top via the descent protocol."
585 ),
586 ),
587 ] = None,
588 recursive: Annotated[
589 bool,
590 typer.Option(
591 "--recursive/--no-recursive",
592 "-r/-R",
593 help=(
594 "Auto-scaffold include patterns descend into subdirectories. "
595 "Default True. --no-recursive limits the default include to "
596 "top-level files only."
597 ),
598 ),
599 ] = True,
600 name: Annotated[
601 str,
602 typer.Option(
603 "--name",
604 help=(
605 "Adapter name for auto-scaffold → `<dir>/.dlm/<name>.dlm`. "
606 "Default 'corpus'. Lets a single tree host multiple adapters."
607 ),
608 ),
609 ] = "corpus",
610 policy: Annotated[
611 str,
612 typer.Option(
613 "--policy",
614 help=(
615 "Auto-scaffold sources_policy: 'strict' (default; confines "
616 "training to the target directory) or 'permissive' (allows "
617 "absolute paths anywhere)."
618 ),
619 ),
620 ] = "strict",
621 rescaffold: Annotated[
622 bool,
623 typer.Option(
624 "--rescaffold",
625 help=(
626 "Rewrite an existing scaffolded .dlm in place with the new "
627 "--base/--include/--exclude/--policy flags. Keeps the same "
628 "dlm_id (store stays intact). Without it, re-running with "
629 "frontmatter-editing flags refuses to shadow-edit."
630 ),
631 ),
632 ] = False,
633 listen_rpc: Annotated[
634 str | None,
635 typer.Option(
636 "--listen-rpc",
637 help=(
638 "Open a JSON-RPC endpoint at <host:port> (e.g. `127.0.0.1:7429`) "
639 "that accepts `inject_probe` pushes from sway-style eval "
640 "harnesses. Probes enter the queue and drain at the next "
641 "training-cycle boundary. Requires --watch or --max-cycles. "
642 "Bearer token from DLM_PROBE_TOKEN."
643 ),
644 ),
645 ] = None,
646 max_cycles: Annotated[
647 int,
648 typer.Option(
649 "--max-cycles",
650 help=(
651 "Convergence stop for --listen-rpc without --watch: cap the "
652 "probe-driven retrain loop at N cycles. Ignored without "
653 "--listen-rpc."
654 ),
655 ),
656 ] = 0,
657 no_cache: Annotated[
658 bool,
659 typer.Option(
660 "--no-cache",
661 help=(
662 "Opt out of the tokenized-section cache for this run. By "
663 "default, `dlm train` pre-tokenizes directive-sourced rows "
664 "via ~/.dlm/store/<id>/tokenized-cache/ so subsequent runs "
665 "on the same corpus skip re-tokenization. Use this to "
666 "bypass the cache for debugging or to compare cached vs "
667 "uncached training determinism."
668 ),
669 ),
670 ] = False,
671 skip_export_probes: Annotated[
672 bool,
673 typer.Option(
674 "--skip-export-probes",
675 help=(
676 "Skip the llama.cpp / GGUF-conversion probes so brand-new "
677 "architectures (not yet in our vendored llama.cpp) can still "
678 "be used for training + HF inference. Forfeits `dlm export` "
679 "to Ollama until the vendored copy catches up. Mirrors the "
680 "flag of the same name on `dlm init`."
681 ),
682 ),
683 ] = False,
684 ) -> None:
685 """Train / retrain a .dlm against its base model."""
686 import sqlite3
687 import sys
688
689 from rich.console import Console
690
691 from dlm.base_models import GatedModelError
692 from dlm.base_models import resolve as resolve_base_model
693 from dlm.doc.errors import DlmParseError
694 from dlm.doc.parser import parse_file
695 from dlm.hardware import doctor
696 from dlm.lock import LockMode, LockValidationError
697 from dlm.store.paths import for_dlm
698 from dlm.train import (
699 DiskSpaceError,
700 OOMError,
701 ResumeIntegrityError,
702 TrainingError,
703 )
704 from dlm.train.preference import (
705 DpoPhaseError,
706 NoPreferenceContentError,
707 PriorAdapterRequiredError,
708 )
709 from dlm.train.preference.phase_orchestrator import Phase, run_phases
710
711 console = Console(stderr=True)
712
713 if phase not in ("sft", "preference", "all"):
714 console.print(f"[red]error:[/red] --phase must be one of sft|preference|all, got {phase!r}")
715 raise typer.Exit(code=2)
716 phase_literal: Phase = phase # type: ignore[assignment]
717
718 if resume and fresh:
719 console.print("[red]error:[/red] --resume and --fresh are mutually exclusive")
720 raise typer.Exit(code=2)
721 mode: Literal["fresh", "resume"] = "resume" if resume else "fresh"
722
723 # --gpus dispatches to accelerate launch when >1 device is
724 # selected. The single-GPU path falls through to the existing
725 # in-process trainer; a bare `--gpus 1` is a no-op (users can use
726 # it to lock the visible device set via CUDA_VISIBLE_DEVICES
727 # without spawning a subprocess).
728 if gpus is not None:
729 # Resolve mixed_precision from capabilities so bf16-incapable
730 # CUDA GPUs (SM<8.0) don't trip the `accelerate launch`
731 # default. `probe()` is cheap and runs in the launcher-side
732 # process only; each rank re-probes via `doctor()` later.
733 from dlm.hardware.capabilities import probe as _probe_caps
734
735 _caps = _probe_caps()
736 _mp = "bf16" if _caps.supports_bf16 else "fp16"
737 exit_code = _maybe_dispatch_multi_gpu(gpus, sys.argv, console, mixed_precision=_mp)
738 if exit_code is not None:
739 raise typer.Exit(code=exit_code)
740
741 # Mutual-exclusion gate for the three lock flags. Exactly one (or
742 # zero) may be set — silently ignoring a conflicting pair would
743 # mask operator intent.
744 lock_flag_count = sum((strict_lock, update_lock, ignore_lock))
745 if lock_flag_count > 1:
746 console.print(
747 "[red]error:[/red] --strict-lock / --update-lock / --ignore-lock "
748 "are mutually exclusive",
749 )
750 raise typer.Exit(code=2)
751 lock_mode: LockMode = "default"
752 if strict_lock:
753 lock_mode = "strict"
754 elif update_lock:
755 lock_mode = "update"
756 elif ignore_lock:
757 lock_mode = "ignore"
758
759 # `--no-cache` bypasses the tokenized-section cache for this run.
760 # Plumbed as an env var because the trainer's pre-tokenize helper
761 # already reads one — the CLI flag is a discoverable surface over
762 # the same switch. Rolling the flag into `TrainingPlan` is a
763 # deferred refactor; the env var is sufficient for the user-facing
764 # contract and survives `accelerate launch` re-invocations.
765 if no_cache:
766 from dlm.train.cache import set_disable_flag
767
768 set_disable_flag("--no-cache")
769
770 if policy not in ("permissive", "strict"):
771 console.print(
772 f"[red]error:[/red] --policy must be 'permissive' or 'strict', got {policy!r}"
773 )
774 raise typer.Exit(code=2)
775 policy_literal: Literal["permissive", "strict"] = policy # type: ignore[assignment]
776
777 # --listen-rpc requires a loop to drain the queue — either --watch
778 # (file-change cycles) or --max-cycles N (bounded retrain loop).
779 # Without one, the server would accept probes that never train. We
780 # also need the bearer token up front so the user sees the refusal
781 # before we spend time downloading weights.
782 rpc_config: tuple[str, int, str] | None = None
783 if listen_rpc is not None:
784 if not watch and max_cycles <= 0:
785 console.print(
786 "[red]error:[/red] --listen-rpc requires --watch or --max-cycles N "
787 "(the probe queue needs a drain cadence)"
788 )
789 raise typer.Exit(code=2)
790 token = os.environ.get("DLM_PROBE_TOKEN", "").strip()
791 if not token:
792 console.print(
793 "[red]error:[/red] --listen-rpc needs a bearer token; "
794 "export DLM_PROBE_TOKEN=<secret>"
795 )
796 raise typer.Exit(code=2)
797 host, _, port_s = listen_rpc.rpartition(":")
798 if not host or not port_s:
799 console.print(f"[red]error:[/red] --listen-rpc expects host:port, got {listen_rpc!r}")
800 raise typer.Exit(code=2)
801 try:
802 port = int(port_s)
803 except ValueError:
804 console.print(f"[red]error:[/red] --listen-rpc port must be an integer, got {port_s!r}")
805 raise typer.Exit(code=2) from None
806 rpc_config = (host, port, token)
807
808 # Directory targets auto-scaffold `<dir>/.dlm/corpus.dlm` (or
809 # reuse an existing one). After this block, `path` always points
810 # at an actual `.dlm` file that the rest of the flow can parse.
811 if path.is_dir():
812 from dlm.cli.scaffold import ScaffoldError, scaffold_train_target
813
814 try:
815 scaffold_result = scaffold_train_target(
816 path,
817 base=base,
818 include=tuple(include or ()),
819 exclude=tuple(exclude or ()),
820 recursive=recursive,
821 name=name,
822 policy=policy_literal,
823 rescaffold=rescaffold,
824 )
825 except ScaffoldError as exc:
826 console.print(f"[red]scaffold:[/red] {exc.message}")
827 raise typer.Exit(code=1) from exc
828
829 if scaffold_result.scaffolded:
830 console.print(
831 f"[cyan]scaffolded:[/cyan] {scaffold_result.dlm_path} "
832 f"(dlm_id={scaffold_result.dlm_id})"
833 )
834 path = scaffold_result.dlm_path
835
836 try:
837 parsed = parse_file(path)
838 except (DlmParseError, OSError) as exc:
839 console.print(f"[red]error:[/red] {exc}")
840 raise typer.Exit(code=1) from exc
841 try:
842 spec = resolve_base_model(
843 parsed.frontmatter.base_model,
844 accept_license=i_accept_license,
845 skip_export_probes=skip_export_probes,
846 )
847 except GatedModelError as exc:
848 console.print(f"[red]license:[/red] base model {parsed.frontmatter.base_model!r} is gated.")
849 if exc.license_url:
850 console.print(f" review the license at: {exc.license_url}")
851 console.print(
852 " re-run with [bold]--i-accept-license[/bold] once you have accepted. "
853 "Acceptance will be persisted in the store manifest."
854 )
855 raise typer.Exit(code=1) from exc
856 # Detect the DDP world_size set by `accelerate launch`
857 # (WORLD_SIZE env var) and thread it into the doctor so the plan's
858 # effective_batch_size reflects the rank count. Single-process
859 # runs read 1 and the plan math is unchanged.
860 from dlm.train.distributed import detect_world_size
861
862 ws = detect_world_size()
863 doctor_result = doctor(
864 training_config=parsed.frontmatter.training,
865 base_params=spec.params,
866 seq_len=min(parsed.frontmatter.training.sequence_len, spec.effective_context_length),
867 world_size=ws,
868 )
869 plan = doctor_result.plan
870 if plan is None:
871 console.print(
872 "[red]doctor:[/red] no viable training plan for this host. "
873 "Run `dlm doctor` for details."
874 )
875 raise typer.Exit(code=1)
876
877 store = for_dlm(parsed.frontmatter.dlm_id)
878 store.ensure_layout()
879
880 # `dlm init` writes a manifest as part of store provisioning. Mirror
881 # that manifest write here when the store layout exists but has no
882 # manifest yet — covers two flows:
883 # - auto-scaffold via `dlm train <dir>` on a fresh directory
884 # - hand-authored .dlm with a fresh ULID that never went through
885 # `dlm init` (e.g. authored via the LSP / VSCode extension)
886 # License acceptance has already been validated upstream by this
887 # point, so we just record it.
888 if not store.manifest.exists():
889 from dlm.base_models import is_gated
890 from dlm.base_models.license import require_acceptance
891 from dlm.store.manifest import Manifest, save_manifest
892
893 acceptance = (
894 require_acceptance(spec, accept_license=True, via="cli_flag")
895 if is_gated(spec)
896 else None
897 )
898 save_manifest(
899 store.manifest,
900 Manifest(
901 dlm_id=parsed.frontmatter.dlm_id,
902 base_model=spec.key,
903 base_model_revision=spec.revision,
904 source_path=path.resolve(),
905 license_acceptance=acceptance,
906 ),
907 )
908
909 from dlm.modality import ModalityError
910
911 try:
912 phase_results = run_phases(
913 store,
914 parsed,
915 spec,
916 plan,
917 phase=phase_literal,
918 mode=mode,
919 seed=seed,
920 max_steps=max_steps,
921 lock_mode=lock_mode,
922 capabilities=doctor_result.capabilities,
923 world_size=ws,
924 strict_metrics=strict_metrics,
925 include_auto_mined=not no_mined,
926 )
927 except sqlite3.Error as exc:
928 console.print(f"[red]metrics:[/red] {exc}")
929 raise typer.Exit(code=1) from exc
930 except LockValidationError as exc:
931 console.print(f"[red]lock:[/red] {exc}")
932 console.print(
933 " Re-run with [bold]--update-lock[/bold] to accept the drift or "
934 "[bold]--ignore-lock[/bold] to continue without persisting a new lock."
935 )
936 raise typer.Exit(code=1) from exc
937 except DiskSpaceError as exc:
938 console.print(f"[red]disk:[/red] {exc}")
939 raise typer.Exit(code=1) from exc
940 except OOMError as exc:
941 from dlm.train import format_oom_message
942
943 console.print(
944 format_oom_message(
945 step=exc.step,
946 peak_bytes=exc.peak_bytes,
947 free_at_start_bytes=exc.free_at_start_bytes,
948 current_grad_accum=exc.current_grad_accum,
949 recommended_grad_accum=exc.recommended_grad_accum,
950 )
951 )
952 raise typer.Exit(code=1) from exc
953 except ResumeIntegrityError as exc:
954 console.print(f"[red]resume:[/red] {exc}")
955 raise typer.Exit(code=1) from exc
956 except (NoPreferenceContentError, PriorAdapterRequiredError) as exc:
957 console.print(f"[red]dpo:[/red] {exc}")
958 raise typer.Exit(code=1) from exc
959 except DpoPhaseError as exc:
960 console.print(f"[red]dpo:[/red] {exc}")
961 raise typer.Exit(code=1) from exc
962 except TrainingError as exc:
963 console.print(f"[red]training:[/red] {exc}")
964 raise typer.Exit(code=1) from exc
965 except ModalityError as exc:
966 console.print(f"[red]training:[/red] {exc}")
967 raise typer.Exit(code=1) from exc
968
969 if not phase_results:
970 console.print(
971 "[yellow]no-op:[/yellow] nothing to train for the requested phase. "
972 "Check that the document has the section types the phase consumes "
973 "(prose/instruction for SFT, preference for DPO)."
974 )
975 raise typer.Exit(code=0)
976
977 for pr in phase_results:
978 result = pr.result
979 console.print(
980 f"[green]{pr.phase}:[/green] v{result.adapter_version:04d} "
981 f"({result.steps} steps, seed={result.seed}, "
982 f"determinism={result.determinism.class_})"
983 )
984 console.print(f"adapter: {result.adapter_path}")
985 console.print(f"log: {result.log_path}")
986 # Final-train-loss stdout line mirrors the last phase so existing
987 # downstream scripts keep working.
988 result = phase_results[-1].result
989 if result.final_train_loss is not None:
990 sys.stdout.write(f"{result.final_train_loss}\n")
991
992 # --watch keeps the training context alive and re-runs incremental
993 # cycles on file change. Entered AFTER the initial train so the
994 # loop resumes from a real committed adapter.
995 if watch:
996 if watch_repl:
997 console.print(
998 "[red]train:[/red] --watch --repl is scaffolded but not yet "
999 "implemented. The threaded REPL bridge needs a test "
1000 "harness we don't have in CI today."
1001 )
1002 raise typer.Exit(code=2)
1003
1004 from dlm.watch.loop import run_watch
1005 from dlm.watch.status import WatchStatus, render_status
1006
1007 status = WatchStatus(doc_path=str(path), sections=len(parsed.sections))
1008
1009 # Start the probe-RPC server if --listen-rpc was requested. The
1010 # queue is exposed; end-to-end flow into `build_dataset` at the
1011 # next cycle boundary is the follow-up consumer task — for now
1012 # the server accepts and buffers probes so sway sinks can be
1013 # wired + tested against a live endpoint.
1014 rpc_server = None
1015 probe_queue = None
1016 if rpc_config is not None:
1017 from dlm.train.inject import InjectedProbeQueue
1018 from dlm.train.rpc import ProbeRpcServer
1019
1020 rpc_host, rpc_port, rpc_token = rpc_config
1021 probe_queue = InjectedProbeQueue()
1022 rpc_server = ProbeRpcServer(
1023 host=rpc_host, port=rpc_port, token=rpc_token, queue=probe_queue
1024 )
1025 rpc_server.start()
1026 bound_host, bound_port = rpc_server.address
1027 console.print(
1028 f"[dim]rpc:[/dim] listening on {bound_host}:{bound_port} "
1029 f"(queue capacity {probe_queue.capacity})"
1030 )
1031
1032 console.print(
1033 f"[dim]watch:[/dim] {render_status(status)}; "
1034 f"max_steps={watch_max_steps}, debounce_ms={watch_debounce_ms}"
1035 )
1036
1037 def _log_cycle(result_: object) -> None:
1038 from dlm.watch.loop import CycleResult
1039
1040 assert isinstance(result_, CycleResult)
1041 if result_.ran and result_.run_result is not None:
1042 status.mark_cycle_done(
1043 train_loss=result_.run_result.final_train_loss,
1044 val_loss=result_.run_result.final_val_loss,
1045 steps=result_.run_result.steps,
1046 coalesced=1,
1047 )
1048 console.print(f"[dim]watch:[/dim] {render_status(status)}")
1049 else:
1050 console.print("[dim]watch:[/dim] no new content, skipping retrain")
1051
1052 try:
1053 exit_code = run_watch(
1054 doc_path=path,
1055 store=store,
1056 spec=spec,
1057 plan=plan,
1058 max_steps=watch_max_steps,
1059 debounce_ms=watch_debounce_ms,
1060 on_cycle=_log_cycle,
1061 drain_probes=probe_queue.drain if probe_queue is not None else None,
1062 )
1063 except KeyboardInterrupt:
1064 if rpc_server is not None:
1065 rpc_server.stop()
1066 console.print("[dim]watch:[/dim] Ctrl-C received, exiting")
1067 raise typer.Exit(code=0) # noqa: B904
1068 finally:
1069 if rpc_server is not None:
1070 rpc_server.stop()
1071 raise typer.Exit(code=exit_code)
1072
1073 # --max-cycles without --watch: the bounded-loop cycle driver is
1074 # the next consumer-side integration step. Accept the flags, refuse
1075 # execution until the loop lands.
1076 if rpc_config is not None and not watch:
1077 console.print(
1078 "[red]train:[/red] --listen-rpc --max-cycles (without --watch) is "
1079 "scaffolded; the bounded cycle loop is the follow-up. Use "
1080 "--watch for now."
1081 )
1082 raise typer.Exit(code=2)
1083
1084
1085 def _maybe_dispatch_multi_gpu(
1086 gpus_flag: str,
1087 argv: list[str],
1088 console: object,
1089 *,
1090 mixed_precision: str = "bf16",
1091 ) -> int | None:
1092 """Resolve `--gpus`; if multi-GPU, spawn accelerate launch and return its exit code.
1093
1094 Returns None when the resolved world_size is 1 — caller falls
1095 through to the in-process trainer. Returns an int exit code when
1096 the launcher ran, so the caller can `raise typer.Exit(code=...)`.
1097 """
1098 from rich.console import Console
1099
1100 from dlm.train.distributed import UnsupportedGpuSpecError, launch_multi_gpu, parse_gpus
1101
1102 assert isinstance(console, Console)
1103
1104 try:
1105 spec = parse_gpus(gpus_flag)
1106 except UnsupportedGpuSpecError as exc:
1107 console.print(f"[red]train:[/red] {exc}")
1108 return 2
1109
1110 try:
1111 import torch
1112
1113 device_count = int(torch.cuda.device_count())
1114 except Exception: # pragma: no cover - torch probing has many failure modes
1115 device_count = 0
1116
1117 try:
1118 device_ids = spec.resolve(device_count)
1119 except UnsupportedGpuSpecError as exc:
1120 console.print(f"[red]train:[/red] {exc}")
1121 return 2
1122
1123 if len(device_ids) < 2:
1124 # Single-GPU (or --gpus 1) — no subprocess needed. Caller
1125 # continues with the in-process path.
1126 return None
1127
1128 # Forward the original argv minus `--gpus` / `--gpus=...`; the
1129 # worker entry strips it defensively too, but we drop it here so
1130 # the launched accelerate cmd carries exactly the intended args.
1131 cli_args = _strip_gpus_from_argv(argv)
1132 console.print(
1133 f"[dim]train:[/dim] dispatching to accelerate launch on devices {list(device_ids)} "
1134 f"(mixed_precision={mixed_precision})"
1135 )
1136 return launch_multi_gpu(device_ids, cli_args, mixed_precision=mixed_precision)
1137
1138
1139 def _strip_gpus_from_argv(argv: list[str]) -> list[str]:
1140 """Drop `--gpus <v>` / `--gpus=<v>` from raw sys.argv (launcher side).
1141
1142 Skips argv[0] (script path) — `accelerate launch -m <entry>`
1143 provides the rank entrypoint separately, so the launcher forwards
1144 argv[1:] minus the multi-GPU flag. Delegates to the shared
1145 `strip_gpus_flag` helper.
1146 """
1147 from dlm.train.distributed.gpus import strip_gpus_flag
1148
1149 return strip_gpus_flag(argv, skip_argv0=True)
1150
1151
1152 def prompt_cmd(
1153 ctx: typer.Context,
1154 path: Annotated[Path, typer.Argument(help=".dlm file to query.")],
1155 query: Annotated[str | None, typer.Argument(help="One-shot prompt (omit for stdin).")] = None,
1156 max_tokens: Annotated[
1157 int,
1158 typer.Option("--max-tokens", help="Max new tokens to generate."),
1159 ] = 256,
1160 temp: Annotated[
1161 float,
1162 typer.Option("--temp", help="Sampling temperature. `0.0` = greedy decoding."),
1163 ] = 0.7,
1164 top_p: Annotated[
1165 float | None,
1166 typer.Option(
1167 "--top-p",
1168 help="Top-p sampling cutoff. Omit to disable nucleus sampling.",
1169 ),
1170 ] = None,
1171 verbose: Annotated[bool, typer.Option("--verbose", help="Log resolved InferencePlan.")] = False,
1172 adapter: Annotated[
1173 str | None,
1174 typer.Option(
1175 "--adapter",
1176 help=(
1177 "Named adapter to prompt against. Required on multi-adapter "
1178 "documents; rejected on single-adapter documents."
1179 ),
1180 ),
1181 ] = None,
1182 gate: Annotated[
1183 str,
1184 typer.Option(
1185 "--gate",
1186 help=(
1187 "Learned adapter gate. `auto` (default) uses the "
1188 "gate when one exists in the store; `off` forces uniform "
1189 "weights across declared adapters. Ignored when --adapter "
1190 "explicitly pins a single adapter."
1191 ),
1192 ),
1193 ] = "auto",
1194 backend: Annotated[
1195 str,
1196 typer.Option(
1197 "--backend",
1198 help=(
1199 "Inference backend: `auto` (default) picks MLX on Apple "
1200 "Silicon, else PyTorch. Force with `pytorch` or `mlx`. "
1201 "MLX requires `uv sync --extra mlx` on darwin-arm64."
1202 ),
1203 ),
1204 ] = "auto",
1205 image: Annotated[
1206 list[Path] | None,
1207 typer.Option(
1208 "--image",
1209 help=(
1210 "Attach an image file to the prompt. Repeat for multiple "
1211 "images; each expands to the base's image-token placeholder. "
1212 "Requires a vision-language base."
1213 ),
1214 ),
1215 ] = None,
1216 audio: Annotated[
1217 list[Path] | None,
1218 typer.Option(
1219 "--audio",
1220 help=(
1221 "Attach an audio file (.wav/.flac/.ogg) to the prompt. "
1222 "Repeat for multiple clips; each expands to the base's "
1223 "audio-token placeholder. Requires an audio-language base "
1224 "(for example Qwen2-Audio-7B-Instruct)."
1225 ),
1226 ),
1227 ] = None,
1228 ) -> None:
1229 """Run inference against the trained adapter."""
1230 import sys
1231
1232 from rich.console import Console
1233
1234 from dlm.base_models import resolve as resolve_base_model
1235 from dlm.doc.parser import parse_file
1236 from dlm.hardware import doctor
1237 from dlm.inference import AdapterNotFoundError
1238 from dlm.inference.backends import (
1239 UnsupportedBackendError,
1240 build_backend,
1241 select_backend,
1242 )
1243 from dlm.store.paths import for_dlm
1244
1245 console = Console(stderr=True)
1246
1247 if backend not in ("auto", "pytorch", "mlx"):
1248 console.print(
1249 f"[red]prompt:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})."
1250 )
1251 raise typer.Exit(code=2)
1252
1253 # Typer passes None when the option was never given; normalize early so
1254 # downstream branching can just check truthiness + len().
1255 image_paths: list[Path] = list(image or [])
1256 audio_paths: list[Path] = list(audio or [])
1257 if image_paths and audio_paths:
1258 console.print(
1259 "[red]prompt:[/red] --image and --audio cannot be combined "
1260 "(each targets a different modality)."
1261 )
1262 raise typer.Exit(code=2)
1263
1264 from dlm.base_models import GatedModelError
1265
1266 parsed = parse_file(path)
1267 adapters_declared = parsed.frontmatter.training.adapters
1268 if adapter is not None:
1269 if adapters_declared is None:
1270 console.print(
1271 "[red]prompt:[/red] --adapter is only valid on multi-adapter "
1272 "documents (this doc does not declare `training.adapters`)."
1273 )
1274 raise typer.Exit(code=2)
1275 if adapter not in adapters_declared:
1276 declared = sorted(adapters_declared)
1277 console.print(
1278 f"[red]prompt:[/red] --adapter {adapter!r} is not declared (declared: {declared})."
1279 )
1280 raise typer.Exit(code=2)
1281
1282 if gate not in ("auto", "off"):
1283 console.print(f"[red]prompt:[/red] --gate must be `auto` or `off`, got {gate!r}.")
1284 raise typer.Exit(code=2)
1285 # --adapter explicitly pins a single adapter — gate routing is moot.
1286 # We silently ignore --gate in that case (the flag has a non-default
1287 # value only when the user cares, and pairing it with --adapter is
1288 # not an error, just a no-op).
1289
1290 store = for_dlm(parsed.frontmatter.dlm_id)
1291 already_accepted = _previously_accepted(store.manifest)
1292 try:
1293 spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
1294 except GatedModelError as exc:
1295 console.print(
1296 f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has "
1297 "no recorded acceptance in this store; run `dlm train --i-accept-license` first."
1298 )
1299 raise typer.Exit(code=1) from exc
1300 caps = doctor().capabilities
1301
1302 # --- VL path -------------------------------------------------------
1303 # The VL branch has its own model / processor / adapter loader and
1304 # its own generate function. `--image` and vision-language bases
1305 # must appear together; each alone is a usage error.
1306 from dlm.modality import modality_for
1307
1308 dispatch = modality_for(spec)
1309 from click.core import ParameterSource
1310
1311 if ctx.get_parameter_source("temp") == ParameterSource.DEFAULT:
1312 temp = spec.suggested_prompt_temperature
1313 if image_paths and not dispatch.accepts_images:
1314 console.print(
1315 f"[red]prompt:[/red] --image is only valid with vision-language bases; "
1316 f"base {spec.key!r} is modality='{spec.modality}'."
1317 )
1318 raise typer.Exit(code=2)
1319 if dispatch.accepts_images and not image_paths:
1320 console.print(
1321 f"[red]prompt:[/red] base {spec.key!r} is vision-language; "
1322 "pass at least one --image PATH to prompt it."
1323 )
1324 raise typer.Exit(code=2)
1325 if dispatch.accepts_images:
1326 _dispatch_vl_prompt(
1327 console=console,
1328 spec=spec,
1329 store=store,
1330 caps=caps,
1331 adapter_name=adapter,
1332 image_paths=image_paths,
1333 query=query,
1334 max_tokens=max_tokens,
1335 temp=temp,
1336 top_p=top_p,
1337 verbose=verbose,
1338 )
1339 return
1340
1341 # --- Audio path ----------------------------------------------------
1342 if audio_paths and not dispatch.accepts_audio:
1343 console.print(
1344 f"[red]prompt:[/red] --audio is only valid with audio-language bases; "
1345 f"base {spec.key!r} is modality='{spec.modality}'."
1346 )
1347 raise typer.Exit(code=2)
1348 if dispatch.accepts_audio and not audio_paths:
1349 console.print(
1350 f"[red]prompt:[/red] base {spec.key!r} is audio-language; "
1351 "pass at least one --audio PATH to prompt it."
1352 )
1353 raise typer.Exit(code=2)
1354 if dispatch.accepts_audio:
1355 _dispatch_audio_prompt(
1356 console=console,
1357 spec=spec,
1358 store=store,
1359 caps=caps,
1360 adapter_name=adapter,
1361 audio_paths=audio_paths,
1362 query=query,
1363 max_tokens=max_tokens,
1364 temp=temp,
1365 top_p=top_p,
1366 verbose=verbose,
1367 auto_resample=parsed.frontmatter.training.audio.auto_resample,
1368 )
1369 return
1370
1371 try:
1372 backend_name = select_backend(backend, caps) # type: ignore[arg-type]
1373 except UnsupportedBackendError as exc:
1374 console.print(f"[red]prompt:[/red] {exc}")
1375 raise typer.Exit(code=2) from exc
1376 backend_obj = build_backend(backend_name, caps)
1377
1378 if verbose:
1379 console.print(f"[dim]backend:[/dim] {backend_name}")
1380
1381 try:
1382 backend_obj.load(spec, store, adapter_name=adapter)
1383 except AdapterNotFoundError as exc:
1384 console.print(f"[red]prompt:[/red] {exc}")
1385 raise typer.Exit(code=1) from exc
1386
1387 if query is None:
1388 query = sys.stdin.read().strip()
1389 if not query:
1390 console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)")
1391 raise typer.Exit(code=2)
1392
1393 response = backend_obj.generate(
1394 query,
1395 max_new_tokens=max_tokens,
1396 temperature=temp,
1397 top_p=top_p,
1398 )
1399 sys.stdout.write(response + "\n")
1400
1401
1402 def _dispatch_vl_prompt( # pragma: no cover
1403 *,
1404 console: Any,
1405 spec: Any,
1406 store: Any,
1407 caps: Any,
1408 adapter_name: str | None,
1409 image_paths: list[Path],
1410 query: str | None,
1411 max_tokens: int,
1412 temp: float,
1413 top_p: float | None,
1414 verbose: bool,
1415 ) -> None:
1416 """Run the VL generate path. Keeps `prompt_cmd` readable.
1417
1418 Pragma'd from unit coverage because it calls the VL HF stack.
1419 Covered by the slow-marked vision-language integration test (T12).
1420 """
1421 import sys
1422
1423 import typer
1424
1425 from dlm.inference import (
1426 AdapterNotFoundError,
1427 generate_vl,
1428 load_for_vl_inference,
1429 load_images,
1430 )
1431 from dlm.modality import ProcessorContractError
1432
1433 if verbose:
1434 console.print("[dim]vl-backend:[/dim] pytorch (AutoModelForImageTextToText)")
1435
1436 try:
1437 loaded = load_for_vl_inference(store, spec, caps, adapter_name=adapter_name)
1438 except AdapterNotFoundError as exc:
1439 console.print(f"[red]prompt:[/red] {exc}")
1440 raise typer.Exit(code=1) from exc
1441 except ProcessorContractError as exc:
1442 console.print(f"[red]prompt:[/red] {exc}")
1443 raise typer.Exit(code=1) from exc
1444
1445 try:
1446 images = load_images(image_paths)
1447 except FileNotFoundError as exc:
1448 console.print(f"[red]prompt:[/red] {exc}")
1449 raise typer.Exit(code=2) from exc
1450
1451 if query is None:
1452 query = sys.stdin.read().strip()
1453 if not query:
1454 console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)")
1455 raise typer.Exit(code=2)
1456
1457 # Every VL spec in the registry must declare a preprocessor plan
1458 # (schema validator); the fallback is defensive for the hf: escape
1459 # hatch, which could in principle skip one.
1460 image_token = "<image>"
1461 if spec.vl_preprocessor_plan is not None:
1462 image_token = spec.vl_preprocessor_plan.image_token
1463
1464 response = generate_vl(
1465 loaded.model,
1466 loaded.processor,
1467 query,
1468 images,
1469 image_token=image_token,
1470 max_new_tokens=max_tokens,
1471 temperature=temp,
1472 top_p=top_p,
1473 )
1474 sys.stdout.write(response + "\n")
1475
1476
1477 def _dispatch_audio_prompt( # pragma: no cover
1478 *,
1479 console: Any,
1480 spec: Any,
1481 store: Any,
1482 caps: Any,
1483 adapter_name: str | None,
1484 audio_paths: list[Path],
1485 query: str | None,
1486 max_tokens: int,
1487 temp: float,
1488 top_p: float | None,
1489 verbose: bool,
1490 auto_resample: bool = False,
1491 ) -> None:
1492 """Run the audio-LM generate path. Keeps `prompt_cmd` readable.
1493
1494 Pragma'd from unit coverage because it calls the audio HF stack.
1495 Covered by the slow-marked audio integration test (T12).
1496 """
1497 import sys
1498
1499 import typer
1500
1501 from dlm.inference import (
1502 AdapterNotFoundError,
1503 generate_audio,
1504 load_audios,
1505 load_for_audio_inference,
1506 )
1507
1508 if verbose:
1509 console.print(f"[dim]audio-backend:[/dim] pytorch ({spec.architecture})")
1510
1511 try:
1512 loaded = load_for_audio_inference(store, spec, caps, adapter_name=adapter_name)
1513 except AdapterNotFoundError as exc:
1514 console.print(f"[red]prompt:[/red] {exc}")
1515 raise typer.Exit(code=1) from exc
1516
1517 if spec.audio_preprocessor_plan is None:
1518 # Defensive — every registry audio spec carries the plan, but
1519 # the hf: escape hatch could skip it.
1520 console.print(
1521 f"[red]prompt:[/red] base {spec.key!r} is audio-language "
1522 "but has no audio_preprocessor_plan; cannot resolve sample rate."
1523 )
1524 raise typer.Exit(code=2)
1525
1526 target_sr = spec.audio_preprocessor_plan.sample_rate
1527 try:
1528 waveforms = load_audios(
1529 audio_paths,
1530 target_sample_rate=target_sr,
1531 auto_resample=auto_resample,
1532 )
1533 except FileNotFoundError as exc:
1534 console.print(f"[red]prompt:[/red] {exc}")
1535 raise typer.Exit(code=2) from exc
1536 except ValueError as exc:
1537 # Sample-rate mismatch — surface the actionable ffmpeg hint.
1538 console.print(f"[red]prompt:[/red] {exc}")
1539 raise typer.Exit(code=2) from exc
1540
1541 if query is None:
1542 query = sys.stdin.read().strip()
1543 if not query:
1544 console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)")
1545 raise typer.Exit(code=2)
1546
1547 audio_token = spec.audio_preprocessor_plan.audio_token
1548
1549 response = generate_audio(
1550 loaded.model,
1551 loaded.processor,
1552 query,
1553 waveforms,
1554 audio_token=audio_token,
1555 sample_rate=target_sr,
1556 max_new_tokens=max_tokens,
1557 temperature=temp,
1558 top_p=top_p,
1559 )
1560 sys.stdout.write(response + "\n")
1561
1562
1563 def export_cmd(
1564 path: Annotated[Path, typer.Argument(help=".dlm file to export.")],
1565 target: Annotated[
1566 str,
1567 typer.Option(
1568 "--target",
1569 help="Export destination. Currently supported: ollama, llama-server, vllm, mlx-serve.",
1570 ),
1571 ] = "ollama",
1572 quant: Annotated[
1573 str | None,
1574 typer.Option("--quant", help="GGUF quant level (defaults to frontmatter)."),
1575 ] = None,
1576 merged: Annotated[
1577 bool,
1578 typer.Option("--merged", help="Merge the adapter into the base before export."),
1579 ] = False,
1580 dequantize: Annotated[
1581 bool,
1582 typer.Option(
1583 "--dequantize",
1584 help="Dequantize a QLoRA base to fp16 before merging.",
1585 ),
1586 ] = False,
1587 name: Annotated[str | None, typer.Option("--name", help="Ollama model name.")] = None,
1588 no_template: Annotated[
1589 bool,
1590 typer.Option("--no-template", help="Skip writing TEMPLATE into the Modelfile."),
1591 ] = False,
1592 no_smoke: Annotated[
1593 bool,
1594 typer.Option("--no-smoke", help="Register the export but skip the smoke prompt."),
1595 ] = False,
1596 no_imatrix: Annotated[
1597 bool,
1598 typer.Option(
1599 "--no-imatrix",
1600 help=(
1601 "Skip importance-matrix calibration. Default uses the "
1602 "replay corpus to calibrate k-quant quantization."
1603 ),
1604 ),
1605 ] = False,
1606 draft: Annotated[
1607 str | None,
1608 typer.Option(
1609 "--draft",
1610 help=(
1611 "Speculative-decoding draft model Ollama tag "
1612 "(e.g. qwen2.5:0.5b). Default uses the registered pair "
1613 "for this base; override here to pick a custom draft."
1614 ),
1615 ),
1616 ] = None,
1617 no_draft: Annotated[
1618 bool,
1619 typer.Option(
1620 "--no-draft",
1621 help="Suppress PARAMETER draft_model emission even when a pair is registered.",
1622 ),
1623 ] = False,
1624 skip_ollama: Annotated[
1625 bool,
1626 typer.Option(
1627 "--skip-ollama",
1628 help="Emit GGUFs + manifest only; do not touch the Ollama binary.",
1629 ),
1630 ] = False,
1631 adapter: Annotated[
1632 str | None,
1633 typer.Option(
1634 "--adapter",
1635 help=(
1636 "Named adapter to export. Required on multi-adapter "
1637 "documents; rejected on single-adapter documents."
1638 ),
1639 ),
1640 ] = None,
1641 adapter_mix: Annotated[
1642 str | None,
1643 typer.Option(
1644 "--adapter-mix",
1645 help=(
1646 "Weighted composition of named adapters, e.g. "
1647 "`knowledge:1.0,tone:0.5`. Mutually exclusive with --adapter. "
1648 "Multi-adapter docs only. LoRA-only; QLoRA requires "
1649 "--dequantize."
1650 ),
1651 ),
1652 ] = None,
1653 adapter_mix_method: Annotated[
1654 str,
1655 typer.Option(
1656 "--adapter-mix-method",
1657 help=(
1658 "PEFT combination strategy for --adapter-mix. `linear` "
1659 "(default) sums LoRA deltas; `svd` recomposes via SVD "
1660 "(higher fidelity, heavier compute). Only meaningful "
1661 "with --adapter-mix."
1662 ),
1663 ),
1664 ] = "linear",
1665 verbose: Annotated[
1666 bool,
1667 typer.Option("--verbose", help="Log each subprocess command as it launches."),
1668 ] = False,
1669 emit_sway_json: Annotated[
1670 bool,
1671 typer.Option(
1672 "--emit-sway-json",
1673 help=(
1674 "After the export, also write a ready-to-run sway.yaml "
1675 "(via dlm-sway autogen) into the export dir. Requires the "
1676 "[sway] extra: pip install 'dlm[sway]'."
1677 ),
1678 ),
1679 ] = False,
1680 ) -> None:
1681 """Export the adapter to a runtime target."""
1682
1683 from rich.console import Console
1684
1685 from dlm.base_models import GatedModelError, download_spec
1686 from dlm.base_models import resolve as resolve_base_model
1687 from dlm.doc.parser import parse_file
1688 from dlm.export import (
1689 ExportError,
1690 PreflightError,
1691 SubprocessError,
1692 UnknownExportTargetError,
1693 UnsafeMergeError,
1694 VendoringError,
1695 resolve_export_plan,
1696 run_export,
1697 )
1698 from dlm.export.ollama import (
1699 OllamaBinaryNotFoundError,
1700 OllamaCreateError,
1701 OllamaError,
1702 OllamaSmokeError,
1703 OllamaVersionError,
1704 )
1705 from dlm.export.quantize import run_checked
1706 from dlm.export.targets import (
1707 finalize_mlx_serve_export,
1708 finalize_vllm_export,
1709 prepare_llama_server_export,
1710 prepare_mlx_serve_export,
1711 prepare_vllm_export,
1712 resolve_target,
1713 )
1714 from dlm.store.paths import for_dlm
1715
1716 console = Console(stderr=True)
1717
1718 if draft is not None and no_draft:
1719 console.print("[red]error:[/red] --draft and --no-draft are mutually exclusive; pick one.")
1720 raise typer.Exit(code=2)
1721 if adapter is not None and adapter_mix is not None:
1722 console.print(
1723 "[red]export:[/red] --adapter and --adapter-mix are mutually exclusive; pick one."
1724 )
1725 raise typer.Exit(code=2)
1726 try:
1727 resolved_target = resolve_target(target)
1728 except UnknownExportTargetError as exc:
1729 console.print(f"[red]export:[/red] {exc}")
1730 raise typer.Exit(code=2) from exc
1731 parsed = parse_file(path)
1732 adapters_declared = parsed.frontmatter.training.adapters
1733 if adapter is not None:
1734 if adapters_declared is None:
1735 console.print(
1736 "[red]export:[/red] --adapter is only valid on multi-adapter "
1737 "documents (this doc does not declare `training.adapters`)."
1738 )
1739 raise typer.Exit(code=2)
1740 if adapter not in adapters_declared:
1741 declared = sorted(adapters_declared)
1742 console.print(
1743 f"[red]export:[/red] --adapter {adapter!r} is not declared (declared: {declared})."
1744 )
1745 raise typer.Exit(code=2)
1746
1747 mix_entries: list[tuple[str, float]] | None = None
1748 if adapter_mix is not None:
1749 from dlm.export.weighted_merge import (
1750 InvalidMixSpecError,
1751 parse_mix_spec,
1752 validate_mix_against_declared,
1753 )
1754
1755 if adapters_declared is None:
1756 console.print(
1757 "[red]export:[/red] --adapter-mix is only valid on multi-adapter "
1758 "documents (this doc does not declare `training.adapters`)."
1759 )
1760 raise typer.Exit(code=2)
1761 if adapter_mix_method not in ("linear", "svd"):
1762 console.print(
1763 f"[red]export:[/red] --adapter-mix-method must be "
1764 f"`linear` or `svd`, got {adapter_mix_method!r}."
1765 )
1766 raise typer.Exit(code=2)
1767 try:
1768 entries = parse_mix_spec(adapter_mix)
1769 validate_mix_against_declared(entries, set(adapters_declared))
1770 except InvalidMixSpecError as exc:
1771 console.print(f"[red]export:[/red] {exc}")
1772 raise typer.Exit(code=2) from exc
1773 mix_entries = [(e.name, e.weight) for e in entries]
1774
1775 store = for_dlm(parsed.frontmatter.dlm_id)
1776
1777 # Gate-driven static mix: when the doc has an enabled gate and the
1778 # user didn't pass --adapter-mix / --adapter, freeze the learned
1779 # gate to per-adapter weights for the GGUF export path. Dynamic
1780 # routing only lives in the `dlm prompt` flow; the runtime can't
1781 # evaluate the torch gate, so we substitute the prior here. A CLI
1782 # --adapter-mix wins — users who know what they want get full
1783 # control.
1784 if mix_entries is None and adapter is None:
1785 from dlm.export.gate_fallback import resolve_and_announce
1786
1787 resolution = resolve_and_announce(store, parsed)
1788 if resolution.entries is not None:
1789 mix_entries = resolution.entries
1790 for line in resolution.banner_lines:
1791 console.print(line)
1792
1793 already_accepted = _previously_accepted(store.manifest)
1794 try:
1795 spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
1796 except GatedModelError as exc:
1797 console.print(f"[red]license:[/red] base model {parsed.frontmatter.base_model!r} is gated.")
1798 if exc.license_url:
1799 console.print(f" review the license at: {exc.license_url}")
1800 console.print(" accept via `dlm train --i-accept-license` before exporting.")
1801 raise typer.Exit(code=1) from exc
1802
1803 # Audio bases take HF-snapshot unconditionally — llama.cpp has no
1804 # audio-arch roadmap at our pinned tag — so branch early without
1805 # resolving a GGUF plan.
1806 from dlm.modality import modality_for
1807
1808 export_dispatch = modality_for(spec)
1809 if resolved_target.name == "vllm" and export_dispatch.accepts_audio:
1810 console.print(
1811 "[red]export:[/red] --target vllm is not wired for audio-language "
1812 "documents yet; the current vllm export path only supports text bases."
1813 )
1814 raise typer.Exit(code=2)
1815 if resolved_target.name == "mlx-serve" and export_dispatch.accepts_audio:
1816 console.print(
1817 "[red]export:[/red] --target mlx-serve is not wired for audio-language "
1818 "documents yet; the current mlx-serve export path only supports text bases."
1819 )
1820 raise typer.Exit(code=2)
1821 if export_dispatch.accepts_audio:
1822 try:
1823 dispatch_result = export_dispatch.dispatch_export(
1824 store=store,
1825 spec=spec,
1826 adapter_name=adapter,
1827 quant=quant,
1828 merged=merged,
1829 adapter_mix_raw=adapter_mix,
1830 )
1831 except ExportError as exc:
1832 console.print(f"[red]export:[/red] {exc}")
1833 raise typer.Exit(code=1) from exc
1834 assert dispatch_result is not None # audio modality always returns a result
1835 for line in dispatch_result.banner_lines:
1836 console.print(line)
1837 return
1838
1839 try:
1840 plan = resolve_export_plan(
1841 cli_quant=quant,
1842 cli_merged=merged,
1843 cli_dequantize=dequantize,
1844 cli_no_template=no_template,
1845 cli_ollama_name=name,
1846 cli_no_imatrix=no_imatrix,
1847 frontmatter_default_quant=parsed.frontmatter.export.default_quant,
1848 )
1849 except ValueError as exc:
1850 console.print(f"[red]export:[/red] {exc}")
1851 raise typer.Exit(code=2) from exc
1852
1853 store.ensure_layout()
1854
1855 # VL bases: arch-probe + try single-file GGUF on SUPPORTED (with
1856 # fallback to HF-snapshot on refusal or subprocess failure). A
1857 # missing local base snapshot should not hard-fail the whole
1858 # export — the dispatcher can still emit the HF-snapshot path
1859 # without GGUF context.
1860 if resolved_target.name == "vllm" and export_dispatch.accepts_images:
1861 console.print(
1862 "[red]export:[/red] --target vllm is not wired for vision-language "
1863 "documents yet; the current vllm export path only supports text bases."
1864 )
1865 raise typer.Exit(code=2)
1866 if resolved_target.name == "mlx-serve" and export_dispatch.accepts_images:
1867 console.print(
1868 "[red]export:[/red] --target mlx-serve is not wired for vision-language "
1869 "documents yet; the current mlx-serve export path only supports text bases."
1870 )
1871 raise typer.Exit(code=2)
1872 if export_dispatch.accepts_images:
1873 gguf_emission_context = None
1874 try:
1875 cached_vl = download_spec(spec, local_files_only=True)
1876 except RuntimeError as exc:
1877 _ = exc
1878 else:
1879 gguf_emission_context = {
1880 "plan": plan,
1881 "cached_base_dir": cached_vl.path,
1882 "source_dlm_path": path.resolve(),
1883 "training_sequence_len": parsed.frontmatter.training.sequence_len,
1884 "dlm_version": f"v{parsed.frontmatter.dlm_version}",
1885 }
1886 try:
1887 dispatch_result = export_dispatch.dispatch_export(
1888 store=store,
1889 spec=spec,
1890 adapter_name=adapter,
1891 quant=quant,
1892 merged=merged,
1893 adapter_mix_raw=adapter_mix,
1894 gguf_emission_context=gguf_emission_context,
1895 )
1896 except ExportError as exc:
1897 console.print(f"[red]export:[/red] {exc}")
1898 raise typer.Exit(code=1) from exc
1899 assert dispatch_result is not None # VL modality always returns a result
1900 for line in dispatch_result.banner_lines:
1901 console.print(line)
1902 return
1903
1904 try:
1905 cached = download_spec(spec, local_files_only=True)
1906 except RuntimeError as exc:
1907 console.print(
1908 f"[red]export:[/red] base model not in local cache — run `dlm train` first.\n {exc}"
1909 )
1910 raise typer.Exit(code=1) from exc
1911
1912 def _verbose_runner(cmd: Sequence[str]) -> object:
1913 console.print(f"[dim]$ {' '.join(cmd)}[/dim]")
1914 return run_checked(cmd)
1915
1916 adapter_path_override = None
1917 if mix_entries is not None: # pragma: no cover - heavy path
1918 # Build the weighted-merged adapter into an ephemeral dir,
1919 # then feed the path to run_export as an override. The tmp
1920 # dir lives under the store's cache/ so it cleans up with
1921 # the rest of the store on `dlm pack`.
1922 from dlm.export.weighted_merge import MixEntry, build_and_stage
1923
1924 entries_typed = [MixEntry(name=n, weight=w) for (n, w) in mix_entries]
1925 adapter_path_override = build_and_stage(
1926 store=store,
1927 spec=spec,
1928 cached_base_dir=cached.path,
1929 entries=entries_typed,
1930 combination_type=adapter_mix_method, # type: ignore[arg-type]
1931 )
1932
1933 if resolved_target.name == "vllm":
1934 ignored_flags: list[str] = []
1935 if quant is not None:
1936 ignored_flags.append("--quant")
1937 if merged:
1938 ignored_flags.append("--merged")
1939 if dequantize:
1940 ignored_flags.append("--dequantize")
1941 if no_template:
1942 ignored_flags.append("--no-template")
1943 if skip_ollama:
1944 ignored_flags.append("--skip-ollama")
1945 if no_imatrix:
1946 ignored_flags.append("--no-imatrix")
1947 if draft is not None:
1948 ignored_flags.append("--draft")
1949 if no_draft:
1950 ignored_flags.append("--no-draft")
1951 if ignored_flags:
1952 console.print(
1953 "[yellow]export:[/yellow] ignoring flags not applicable to "
1954 f"`--target vllm`: {', '.join(ignored_flags)}"
1955 )
1956
1957 declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None
1958 try:
1959 vllm_result = prepare_vllm_export(
1960 store=store,
1961 spec=spec,
1962 served_model_name=name or f"dlm-{parsed.frontmatter.dlm_id.lower()}",
1963 training_sequence_len=parsed.frontmatter.training.sequence_len,
1964 adapter_name=adapter,
1965 adapter_path_override=adapter_path_override,
1966 declared_adapter_names=declared_adapter_names,
1967 )
1968 except ExportError as exc:
1969 console.print(f"[red]export:[/red] {exc}")
1970 raise typer.Exit(code=1) from exc
1971
1972 vllm_smoke = None if no_smoke else resolved_target.smoke_test(vllm_result)
1973 if vllm_smoke is not None and not vllm_smoke.ok:
1974 console.print(
1975 f"[red]smoke:[/red] {vllm_smoke.detail}\n"
1976 " re-run with `--no-smoke` to skip the smoke test."
1977 )
1978 raise typer.Exit(code=1)
1979
1980 manifest_path = finalize_vllm_export(
1981 store=store,
1982 spec=spec,
1983 prepared=vllm_result,
1984 smoke_output_first_line=None if vllm_smoke is None else vllm_smoke.detail,
1985 adapter_name=adapter,
1986 adapter_mix=mix_entries,
1987 )
1988 console.print(f"[green]exported:[/green] {vllm_result.export_dir}")
1989 console.print("target: vllm")
1990 assert vllm_result.launch_script_path is not None
1991 assert vllm_result.config_path is not None
1992 console.print(f"launch: {vllm_result.launch_script_path.name}")
1993 console.print(f"config: {vllm_result.config_path.name}")
1994 console.print(f"manifest: {manifest_path.name}")
1995 if vllm_smoke is not None and vllm_smoke.detail:
1996 console.print(f"smoke: {vllm_smoke.detail}")
1997 return
1998
1999 if resolved_target.name == "mlx-serve":
2000 mlx_ignored_flags: list[str] = []
2001 if quant is not None:
2002 mlx_ignored_flags.append("--quant")
2003 if merged:
2004 mlx_ignored_flags.append("--merged")
2005 if dequantize:
2006 mlx_ignored_flags.append("--dequantize")
2007 if name is not None:
2008 mlx_ignored_flags.append("--name")
2009 if no_template:
2010 mlx_ignored_flags.append("--no-template")
2011 if skip_ollama:
2012 mlx_ignored_flags.append("--skip-ollama")
2013 if no_imatrix:
2014 mlx_ignored_flags.append("--no-imatrix")
2015 if draft is not None:
2016 mlx_ignored_flags.append("--draft")
2017 if no_draft:
2018 mlx_ignored_flags.append("--no-draft")
2019 if mlx_ignored_flags:
2020 console.print(
2021 "[yellow]export:[/yellow] ignoring flags not applicable to "
2022 f"`--target mlx-serve`: {', '.join(mlx_ignored_flags)}"
2023 )
2024
2025 declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None
2026 try:
2027 mlx_serve_result = prepare_mlx_serve_export(
2028 store=store,
2029 spec=spec,
2030 adapter_name=adapter,
2031 adapter_path_override=adapter_path_override,
2032 declared_adapter_names=declared_adapter_names,
2033 )
2034 except ExportError as exc:
2035 console.print(f"[red]export:[/red] {exc}")
2036 raise typer.Exit(code=1) from exc
2037
2038 mlx_serve_smoke = None if no_smoke else resolved_target.smoke_test(mlx_serve_result)
2039 if mlx_serve_smoke is not None and not mlx_serve_smoke.ok:
2040 console.print(
2041 f"[red]smoke:[/red] {mlx_serve_smoke.detail}\n"
2042 " re-run with `--no-smoke` to skip the smoke test."
2043 )
2044 raise typer.Exit(code=1)
2045
2046 manifest_path = finalize_mlx_serve_export(
2047 store=store,
2048 spec=spec,
2049 prepared=mlx_serve_result,
2050 smoke_output_first_line=None if mlx_serve_smoke is None else mlx_serve_smoke.detail,
2051 adapter_name=adapter,
2052 adapter_mix=mix_entries,
2053 )
2054 console.print(f"[green]exported:[/green] {mlx_serve_result.export_dir}")
2055 console.print("target: mlx-serve")
2056 assert mlx_serve_result.launch_script_path is not None
2057 console.print(f"launch: {mlx_serve_result.launch_script_path.name}")
2058 console.print(f"manifest: {manifest_path.name}")
2059 if mlx_serve_smoke is not None and mlx_serve_smoke.detail:
2060 console.print(f"smoke: {mlx_serve_smoke.detail}")
2061 return
2062
2063 try:
2064 result = run_export(
2065 store,
2066 spec,
2067 plan,
2068 target=resolved_target.name,
2069 cached_base_dir=cached.path,
2070 subprocess_runner=_verbose_runner if verbose else None,
2071 skip_ollama=skip_ollama or resolved_target.name != "ollama",
2072 skip_smoke=no_smoke,
2073 source_dlm_path=path.resolve(),
2074 training_sequence_len=parsed.frontmatter.training.sequence_len,
2075 override_temperature=parsed.frontmatter.export.default_temperature,
2076 override_top_p=parsed.frontmatter.export.default_top_p,
2077 draft_override=draft,
2078 draft_disabled=no_draft,
2079 adapter_name=adapter,
2080 adapter_path_override=adapter_path_override,
2081 adapter_mix=mix_entries,
2082 )
2083 except UnsafeMergeError as exc:
2084 console.print(f"[red]merge:[/red] {exc}")
2085 raise typer.Exit(code=1) from exc
2086 except VendoringError as exc:
2087 console.print(
2088 f"[red]vendor:[/red] {exc}\n"
2089 " run `scripts/bump-llama-cpp.sh build` or "
2090 "`git submodule update --init --recursive`."
2091 )
2092 raise typer.Exit(code=1) from exc
2093 except PreflightError as exc:
2094 console.print(f"[red]preflight[{exc.probe}]:[/red] {exc.detail}")
2095 raise typer.Exit(code=1) from exc
2096 except SubprocessError as exc:
2097 console.print(f"[red]subprocess:[/red] {exc}")
2098 raise typer.Exit(code=1) from exc
2099 except OllamaBinaryNotFoundError as exc:
2100 console.print(
2101 f"[red]ollama:[/red] {exc}\n"
2102 " install from https://ollama.com/download "
2103 "or re-run with `--skip-ollama`."
2104 )
2105 raise typer.Exit(code=1) from exc
2106 except OllamaVersionError as exc:
2107 console.print(f"[red]ollama:[/red] {exc}")
2108 raise typer.Exit(code=1) from exc
2109 except OllamaCreateError as exc:
2110 console.print(f"[red]ollama create:[/red] {exc}")
2111 raise typer.Exit(code=1) from exc
2112 except OllamaSmokeError as exc:
2113 console.print(
2114 f"[red]smoke:[/red] {exc}\n re-run with `--no-smoke` to skip the smoke test."
2115 )
2116 raise typer.Exit(code=1) from exc
2117 except OllamaError as exc:
2118 console.print(f"[red]ollama:[/red] {exc}")
2119 raise typer.Exit(code=1) from exc
2120 except ExportError as exc:
2121 console.print(f"[red]export:[/red] {exc}")
2122 raise typer.Exit(code=1) from exc
2123
2124 if resolved_target.name == "llama-server":
2125 adapter_dir = adapter_path_override
2126 if adapter_dir is None:
2127 if adapter is None:
2128 adapter_dir = store.resolve_current_adapter()
2129 else:
2130 adapter_dir = store.resolve_current_adapter_for(adapter)
2131 assert adapter_dir is not None
2132 try:
2133 llama_server_result = prepare_llama_server_export(
2134 export_dir=result.export_dir,
2135 manifest_path=result.manifest_path,
2136 artifacts=result.artifacts,
2137 adapter_dir=adapter_dir,
2138 spec=spec,
2139 training_sequence_len=parsed.frontmatter.training.sequence_len,
2140 )
2141 except VendoringError as exc:
2142 console.print(
2143 f"[red]vendor:[/red] {exc}\n"
2144 " run `scripts/bump-llama-cpp.sh build --with-server` or "
2145 "`git submodule update --init --recursive`."
2146 )
2147 raise typer.Exit(code=1) from exc
2148 except ExportError as exc:
2149 console.print(f"[red]export:[/red] {exc}")
2150 raise typer.Exit(code=1) from exc
2151 llama_server_smoke = None if no_smoke else resolved_target.smoke_test(llama_server_result)
2152 if llama_server_smoke is not None and not llama_server_smoke.ok:
2153 console.print(
2154 f"[red]smoke:[/red] {llama_server_smoke.detail}\n"
2155 " re-run with `--no-smoke` to skip the smoke test."
2156 )
2157 raise typer.Exit(code=1)
2158
2159 cached_tag = " [dim](cached base)[/dim]" if result.cached else ""
2160 console.print(f"[green]exported:[/green] {result.export_dir}{cached_tag}")
2161 for artifact in result.artifacts:
2162 console.print(f" {artifact.name}")
2163
2164 # S26 X1 — also emit a sway.yaml next to the GGUF when the user
2165 # asks for it. Done AFTER the regular export so a sway-side
2166 # failure can never roll back a working GGUF deployment.
2167 if emit_sway_json:
2168 from dlm.export.sway_json import SwayJsonExportError, write_sway_json
2169
2170 try:
2171 sway_yaml_path = write_sway_json(path, result.export_dir)
2172 except SwayJsonExportError as exc:
2173 console.print(f"[red]sway-json:[/red] {exc}")
2174 raise typer.Exit(code=1) from exc
2175 console.print(f"[green]sway.yaml:[/green] {sway_yaml_path}")
2176 console.print(" next: sway run " + str(sway_yaml_path))
2177 if resolved_target.name == "llama-server":
2178 assert llama_server_result.launch_script_path is not None
2179 assert llama_server_result.config_path is not None
2180 console.print(f"target: {result.target}")
2181 console.print(f"launch: {llama_server_result.launch_script_path.name}")
2182 console.print(f"template: {llama_server_result.config_path.name}")
2183 if llama_server_smoke is not None and llama_server_smoke.detail:
2184 console.print(f"smoke: {llama_server_smoke.detail}")
2185 return
2186 if result.ollama_name:
2187 console.print(f"ollama: {result.ollama_name} (v{result.ollama_version})")
2188 if result.smoke_output_first_line:
2189 console.print(f"smoke: {result.smoke_output_first_line}")
2190
2191
2192 def pack_cmd(
2193 path: Annotated[Path, typer.Argument(help=".dlm file to pack.")],
2194 out: Annotated[
2195 Path | None,
2196 typer.Option("--out", help="Output .dlm.pack path."),
2197 ] = None,
2198 include_exports: Annotated[
2199 bool,
2200 typer.Option("--include-exports", help="Bundle all GGUF exports into the pack."),
2201 ] = False,
2202 include_base: Annotated[
2203 bool,
2204 typer.Option(
2205 "--include-base",
2206 help="Bundle the base model snapshot (license rules still apply).",
2207 ),
2208 ] = False,
2209 include_logs: Annotated[
2210 bool,
2211 typer.Option("--include-logs", help="Bundle per-run JSONL logs."),
2212 ] = False,
2213 licensee: Annotated[
2214 str | None,
2215 typer.Option(
2216 "--i-am-the-licensee",
2217 help="URL acknowledging separate acceptance of a non-redistributable base (required for --include-base on gated models).",
2218 ),
2219 ] = None,
2220 ) -> None:
2221 """Produce a portable .dlm.pack bundle."""
2222 from rich.console import Console
2223
2224 from dlm.doc.errors import DlmParseError
2225 from dlm.pack.errors import BaseLicenseRefusedError, PackError
2226 from dlm.pack.packer import pack
2227
2228 console = Console(stderr=True)
2229
2230 try:
2231 result = pack(
2232 path,
2233 out=out,
2234 include_exports=include_exports,
2235 include_base=include_base,
2236 include_logs=include_logs,
2237 licensee_acceptance_url=licensee,
2238 )
2239 except BaseLicenseRefusedError as exc:
2240 console.print(f"[red]pack:[/red] {exc}")
2241 raise typer.Exit(code=1) from exc
2242 except PackError as exc:
2243 console.print(f"[red]pack:[/red] {exc}")
2244 raise typer.Exit(code=1) from exc
2245 except DlmParseError as exc:
2246 console.print(f"[red]parse:[/red] {exc}")
2247 raise typer.Exit(code=1) from exc
2248
2249 size_mb = result.bytes_written / (1024 * 1024)
2250 console.print(
2251 f"[green]packed:[/green] {result.path} "
2252 f"({size_mb:.2f} MB, content_type={result.content_type})"
2253 )
2254
2255
2256 def unpack_cmd(
2257 path: Annotated[Path, typer.Argument(help=".dlm.pack to install.")],
2258 force: Annotated[
2259 bool,
2260 typer.Option("--force", help="Overwrite an existing store with the same dlm_id."),
2261 ] = False,
2262 out: Annotated[
2263 Path | None,
2264 typer.Option(
2265 "--out", help="Directory to place the restored .dlm (default: alongside the pack)."
2266 ),
2267 ] = None,
2268 ) -> None:
2269 """Install a .dlm.pack into the local store."""
2270 from rich.console import Console
2271
2272 from dlm.pack.errors import (
2273 PackFormatVersionError,
2274 PackIntegrityError,
2275 PackLayoutError,
2276 )
2277 from dlm.pack.unpacker import unpack
2278
2279 console = Console(stderr=True)
2280
2281 try:
2282 result = unpack(path, force=force, out_dir=out)
2283 except PackFormatVersionError as exc:
2284 console.print(f"[red]unpack:[/red] {exc}")
2285 raise typer.Exit(code=1) from exc
2286 except PackIntegrityError as exc:
2287 console.print(f"[red]unpack:[/red] {exc}")
2288 raise typer.Exit(code=1) from exc
2289 except PackLayoutError as exc:
2290 console.print(f"[red]unpack:[/red] {exc}")
2291 raise typer.Exit(code=1) from exc
2292
2293 console.print(f"[green]unpacked:[/green] {result.dlm_path}")
2294 console.print(f" store: {result.store_path}")
2295 console.print(f" dlm_id: {result.dlm_id}")
2296 if result.applied_migrations:
2297 steps = " → ".join(
2298 f"v{v}" for v in (*result.applied_migrations, result.header.pack_format_version + 1)
2299 )
2300 console.print(f" migrated: {steps}")
2301
2302
2303 def verify_cmd(
2304 path: Annotated[Path, typer.Argument(help=".dlm.pack to verify.")],
2305 trust_on_first_use: Annotated[
2306 bool,
2307 typer.Option(
2308 "--trust-on-first-use",
2309 help=(
2310 "Record the signer's public key under ~/.dlm/trusted-keys/ "
2311 "on first verify. Without this flag an unknown signer is "
2312 "rejected with exit code 2."
2313 ),
2314 ),
2315 ] = False,
2316 trusted_keys_dir: Annotated[
2317 Path | None,
2318 typer.Option(
2319 "--trusted-keys-dir",
2320 help="Override ~/.dlm/trusted-keys/ (useful for scripted verify).",
2321 hidden=True,
2322 ),
2323 ] = None,
2324 ) -> None:
2325 """Verify a .dlm.pack's provenance chain.
2326
2327 Exit codes: 0 verified, 1 broken chain (or missing provenance),
2328 2 untrusted signer, 3 signature rejected.
2329 """
2330 from rich.console import Console
2331
2332 from dlm.pack.errors import PackLayoutError
2333 from dlm.pack.layout import PROVENANCE_FILENAME
2334 from dlm.pack.unpacker import read_pack_member_bytes
2335 from dlm.share.errors import ShareError
2336 from dlm.share.provenance import (
2337 ProvenanceChainBroken,
2338 ProvenanceSchemaError,
2339 UnknownSignerError,
2340 load_provenance_json,
2341 verify_provenance,
2342 )
2343
2344 console = Console(stderr=True)
2345 keys_dir = trusted_keys_dir or (Path.home() / ".dlm" / "trusted-keys")
2346
2347 try:
2348 payload = read_pack_member_bytes(path, PROVENANCE_FILENAME)
2349 except PackLayoutError as exc:
2350 console.print(f"[red]verify:[/red] {exc}")
2351 raise typer.Exit(code=1) from exc
2352 except OSError as exc:
2353 console.print(f"[red]verify:[/red] cannot read {path}: {exc}")
2354 raise typer.Exit(code=1) from exc
2355
2356 if payload is None:
2357 console.print(f"[red]verify:[/red] {path} is unsigned — no {PROVENANCE_FILENAME} inside.")
2358 raise typer.Exit(code=1)
2359
2360 # Write the in-pack JSON to a temp file so `load_provenance_json`
2361 # can use its normal filesystem path. Keeps the parser single-
2362 # sourced and the error messages consistent with the filesystem
2363 # call-site.
2364 import tempfile
2365
2366 with tempfile.NamedTemporaryFile("wb", suffix=".json", delete=False) as fh:
2367 fh.write(payload)
2368 tmp_path = Path(fh.name)
2369 try:
2370 provenance = load_provenance_json(tmp_path)
2371 except ProvenanceSchemaError as exc:
2372 console.print(f"[red]verify:[/red] malformed provenance.json: {exc}")
2373 raise typer.Exit(code=1) from exc
2374 finally:
2375 tmp_path.unlink(missing_ok=True)
2376
2377 try:
2378 result = verify_provenance(
2379 provenance,
2380 trusted_keys_dir=keys_dir,
2381 tofu=trust_on_first_use,
2382 )
2383 except UnknownSignerError as exc:
2384 console.print(f"[red]verify:[/red] {exc}")
2385 raise typer.Exit(code=2) from exc
2386 except ProvenanceChainBroken as exc:
2387 console.print(f"[red]verify:[/red] chain broken: {exc}")
2388 raise typer.Exit(code=1) from exc
2389 except ShareError as exc:
2390 console.print(f"[red]verify:[/red] signature rejected: {exc}")
2391 raise typer.Exit(code=3) from exc
2392
2393 out = Console()
2394 out.print(f"[green]verified:[/green] {path.name}")
2395 out.print(f" signer: {result.signer_fingerprint}")
2396 out.print(f" trusted-key: {result.trusted_key_path}")
2397 out.print(f" adapter_sha256: {provenance.adapter_sha256[:12]}...")
2398 out.print(f" base_revision: {provenance.base_revision}")
2399 out.print(f" corpus_root: {provenance.corpus_root_sha256[:12]}...")
2400 out.print(f" signed_at: {provenance.signed_at}")
2401 if result.tofu_recorded:
2402 out.print(
2403 f"[yellow]note:[/yellow] recorded new trust entry "
2404 f"at {result.trusted_key_path}; subsequent verifies use strict mode."
2405 )
2406
2407
2408 def repl_cmd(
2409 path: Annotated[Path, typer.Argument(help=".dlm file to start a REPL against.")],
2410 adapter: Annotated[
2411 str | None,
2412 typer.Option(
2413 "--adapter",
2414 help=(
2415 "Named adapter to load. Required on multi-adapter "
2416 "documents; rejected on single-adapter documents."
2417 ),
2418 ),
2419 ] = None,
2420 backend: Annotated[
2421 str,
2422 typer.Option(
2423 "--backend",
2424 help="Inference backend: `auto`, `pytorch`, or `mlx`.",
2425 ),
2426 ] = "auto",
2427 ) -> None:
2428 """Interactive REPL against the trained adapter."""
2429 from rich.console import Console
2430
2431 from dlm.base_models import GatedModelError
2432 from dlm.base_models import resolve as resolve_base_model
2433 from dlm.doc.parser import parse_file
2434 from dlm.hardware import doctor
2435 from dlm.inference import AdapterNotFoundError
2436 from dlm.inference.backends import (
2437 UnsupportedBackendError,
2438 build_backend,
2439 select_backend,
2440 )
2441 from dlm.repl.session import ReplSession
2442 from dlm.store.paths import for_dlm
2443
2444 console = Console(stderr=True)
2445
2446 if backend not in ("auto", "pytorch", "mlx"):
2447 console.print(
2448 f"[red]repl:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})."
2449 )
2450 raise typer.Exit(code=2)
2451
2452 parsed = parse_file(path)
2453 declared = parsed.frontmatter.training.adapters
2454 if adapter is not None:
2455 if declared is None:
2456 console.print(
2457 "[red]repl:[/red] --adapter is only valid on multi-adapter "
2458 "documents (this doc does not declare `training.adapters`)."
2459 )
2460 raise typer.Exit(code=2)
2461 if adapter not in declared:
2462 console.print(
2463 f"[red]repl:[/red] --adapter {adapter!r} is not declared "
2464 f"(declared: {sorted(declared)!r})."
2465 )
2466 raise typer.Exit(code=2)
2467
2468 store = for_dlm(parsed.frontmatter.dlm_id)
2469 already_accepted = _previously_accepted(store.manifest)
2470 try:
2471 spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
2472 except GatedModelError as exc:
2473 console.print(
2474 f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has "
2475 "no recorded acceptance in this store; run `dlm train --i-accept-license` first."
2476 )
2477 raise typer.Exit(code=1) from exc
2478 caps = doctor().capabilities
2479
2480 try:
2481 backend_name = select_backend(backend, caps) # type: ignore[arg-type]
2482 except UnsupportedBackendError as exc:
2483 console.print(f"[red]repl:[/red] {exc}")
2484 raise typer.Exit(code=2) from exc
2485 backend_obj = build_backend(backend_name, caps)
2486
2487 try:
2488 backend_obj.load(spec, store, adapter_name=adapter)
2489 except AdapterNotFoundError as exc:
2490 console.print(f"[red]repl:[/red] {exc}")
2491 raise typer.Exit(code=1) from exc
2492
2493 tokenizer = getattr(backend_obj, "_loaded", None)
2494 tokenizer = tokenizer.tokenizer if tokenizer is not None else None
2495
2496 session = ReplSession(
2497 backend=backend_obj,
2498 tokenizer=tokenizer,
2499 active_adapter=adapter,
2500 declared_adapters=tuple(sorted(declared)) if declared else (),
2501 )
2502
2503 from dlm.repl.app import run_repl
2504
2505 raise typer.Exit(code=run_repl(session, console=console))
2506
2507
2508 def metrics_cmd(
2509 path: Annotated[Path, typer.Argument(help=".dlm file whose store we query.")],
2510 json_out: Annotated[bool, typer.Option("--json", help="Emit JSON.")] = False,
2511 csv_out: Annotated[bool, typer.Option("--csv", help="Emit CSV.")] = False,
2512 run_id: Annotated[
2513 int | None,
2514 typer.Option("--run-id", help="Only show this run (drill-down)."),
2515 ] = None,
2516 phase: Annotated[
2517 str | None,
2518 typer.Option("--phase", help="Filter by phase: sft|dpo|orpo|cpt."),
2519 ] = None,
2520 since: Annotated[
2521 str | None,
2522 typer.Option(
2523 "--since",
2524 help="Time window (e.g. `24h`, `7d`, `30m`). Filters `started_at`.",
2525 ),
2526 ] = None,
2527 limit: Annotated[int, typer.Option("--limit")] = 20,
2528 ) -> None:
2529 """Query the per-store metrics database."""
2530 import csv
2531 import json
2532 import sys
2533
2534 from rich.console import Console
2535
2536 from dlm.doc.parser import parse_file
2537 from dlm.metrics.queries import (
2538 evals_for_run,
2539 evals_to_dict,
2540 preference_mining_for_run,
2541 preference_mining_to_dict,
2542 recent_runs,
2543 runs_to_dict,
2544 steps_for_run,
2545 steps_to_dict,
2546 )
2547 from dlm.store.paths import for_dlm
2548
2549 console = Console(stderr=True)
2550
2551 if json_out and csv_out:
2552 console.print("[red]metrics:[/red] --json and --csv are mutually exclusive")
2553 raise typer.Exit(code=2)
2554
2555 since_delta = _parse_since_arg(since, console) if since else None
2556
2557 parsed = parse_file(path)
2558 store = for_dlm(parsed.frontmatter.dlm_id)
2559
2560 runs = recent_runs(store.root, limit=limit, phase=phase, since=since_delta, run_id=run_id)
2561
2562 if run_id is not None:
2563 # Drill-down: show this run's steps + evals.
2564 if not runs:
2565 console.print(f"[red]metrics:[/red] no run with run_id={run_id}")
2566 raise typer.Exit(code=1)
2567 run = runs[0]
2568 steps = steps_for_run(store.root, run_id)
2569 evals = evals_for_run(store.root, run_id)
2570 preference_rows = preference_mining_for_run(store.root, run_id)
2571
2572 if json_out:
2573 payload = {
2574 "run": runs_to_dict([run])[0],
2575 "steps": steps_to_dict(steps),
2576 "evals": evals_to_dict(evals),
2577 "preference_mining": preference_mining_to_dict(preference_rows),
2578 }
2579 sys.stdout.write(json.dumps(payload, indent=2) + "\n")
2580 return
2581 if csv_out:
2582 writer = csv.writer(sys.stdout)
2583 writer.writerow(["step", "loss", "lr", "grad_norm", "val_loss"])
2584 eval_by_step = {e.step: e.val_loss for e in evals}
2585 for s in steps:
2586 writer.writerow([s.step, s.loss, s.lr, s.grad_norm, eval_by_step.get(s.step)])
2587 return
2588 console.print(
2589 f"[green]run_id={run.run_id}[/green] phase={run.phase} "
2590 f"seed={run.seed} status={run.status} steps={len(steps)} "
2591 f"evals={len(evals)}"
2592 )
2593 if evals:
2594 last = evals[-1]
2595 console.print(
2596 f" last eval: step={last.step} val_loss={last.val_loss} "
2597 f"perplexity={last.perplexity}"
2598 )
2599 if preference_rows:
2600 last_pref = preference_rows[-1]
2601 console.print(
2602 " preference mining: "
2603 f"events={len(preference_rows)} "
2604 f"mined_pairs={sum(row.mined_pairs for row in preference_rows)} "
2605 f"skipped_prompts={sum(row.skipped_prompts for row in preference_rows)} "
2606 f"last_mode={last_pref.write_mode} "
2607 f"judge={last_pref.judge_name}"
2608 )
2609 return
2610
2611 # Top-level: list runs.
2612 if json_out:
2613 sys.stdout.write(json.dumps({"runs": runs_to_dict(runs)}, indent=2) + "\n")
2614 return
2615 if csv_out:
2616 writer = csv.writer(sys.stdout)
2617 writer.writerow(["run_id", "phase", "seed", "status", "started_at", "ended_at"])
2618 for r in runs:
2619 writer.writerow([r.run_id, r.phase, r.seed, r.status, r.started_at, r.ended_at])
2620 return
2621
2622 if not runs:
2623 console.print("[dim]metrics:[/dim] no runs found (hint: train first, or adjust filters)")
2624 return
2625 console.print(f"[bold]Runs: {len(runs)}[/bold]")
2626 for r in runs:
2627 console.print(
2628 f" run_id={r.run_id} phase={r.phase} seed={r.seed} "
2629 f"status={r.status} started={r.started_at}"
2630 )
2631
2632
2633 def metrics_watch_cmd(
2634 path: Annotated[Path, typer.Argument(help=".dlm file whose store we tail.")],
2635 poll_seconds: Annotated[
2636 float,
2637 typer.Option("--poll-seconds", help="How often to re-read the metrics DB."),
2638 ] = 1.0,
2639 ) -> None:
2640 """Tail the metrics DB: print new steps/evals as they land."""
2641 import time
2642
2643 from rich.console import Console
2644
2645 from dlm.doc.parser import parse_file
2646 from dlm.metrics.queries import evals_for_run, latest_run_id, steps_for_run
2647 from dlm.store.paths import for_dlm
2648
2649 console = Console()
2650
2651 parsed = parse_file(path)
2652 store = for_dlm(parsed.frontmatter.dlm_id)
2653
2654 console.print(
2655 f"[dim]metrics watch:[/dim] polling {store.root} every {poll_seconds}s (Ctrl-C to exit)"
2656 )
2657
2658 current_run: int | None = None
2659 last_step_seen = 0
2660 last_eval_step_seen = 0
2661 try:
2662 while True:
2663 run_id = latest_run_id(store.root)
2664 if run_id is None:
2665 time.sleep(poll_seconds)
2666 continue
2667 if run_id != current_run:
2668 current_run = run_id
2669 last_step_seen = 0
2670 last_eval_step_seen = 0
2671 console.print(f"[green]→ following run_id={run_id}[/green]")
2672
2673 new_steps = steps_for_run(store.root, run_id, since_step=last_step_seen)
2674 for s in new_steps:
2675 console.print(
2676 f" step {s.step:>5} loss={s.loss} lr={s.lr} grad_norm={s.grad_norm}"
2677 )
2678 last_step_seen = s.step
2679
2680 new_evals = evals_for_run(store.root, run_id, since_step=last_eval_step_seen)
2681 for e in new_evals:
2682 console.print(
2683 f" [yellow]eval @ step {e.step}[/yellow] "
2684 f"val_loss={e.val_loss} perplexity={e.perplexity}"
2685 )
2686 last_eval_step_seen = e.step
2687
2688 time.sleep(poll_seconds)
2689 except KeyboardInterrupt:
2690 console.print("[dim]metrics watch:[/dim] bye")
2691
2692
2693 def _parse_since_arg(since: str, console: object) -> timedelta:
2694 """Parse `24h` / `7d` / `30m` / `10s` into a timedelta."""
2695 from datetime import timedelta
2696
2697 from rich.console import Console
2698
2699 assert isinstance(console, Console)
2700
2701 if not since:
2702 raise typer.Exit(code=2)
2703 unit = since[-1].lower()
2704 try:
2705 value = int(since[:-1])
2706 except ValueError:
2707 console.print(f"[red]metrics:[/red] --since {since!r} not an integer+unit")
2708 raise typer.Exit(code=2) from None
2709 if unit == "s":
2710 return timedelta(seconds=value)
2711 if unit == "m":
2712 return timedelta(minutes=value)
2713 if unit == "h":
2714 return timedelta(hours=value)
2715 if unit == "d":
2716 return timedelta(days=value)
2717 console.print(f"[red]metrics:[/red] --since {since!r} unit must be s/m/h/d")
2718 raise typer.Exit(code=2)
2719
2720
2721 def doctor_cmd(
2722 json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable output.")] = False,
2723 ) -> None:
2724 """Inspect hardware and print the resolved training plan."""
2725 import json
2726
2727 from dlm.hardware import doctor, render_text
2728
2729 result = doctor()
2730 if json_out:
2731 typer.echo(json.dumps(result.to_dict(), indent=2, default=str))
2732 else:
2733 typer.echo(render_text(result))
2734
2735
2736 def show_cmd(
2737 path: Annotated[Path, typer.Argument(help=".dlm file to inspect.")],
2738 json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable JSON.")] = False,
2739 ) -> None:
2740 """Show training history, exports, and adapter state."""
2741 import json as _json
2742 import sys
2743
2744 from rich.console import Console
2745
2746 from dlm.doc.errors import DlmParseError
2747 from dlm.doc.parser import parse_file
2748 from dlm.store.errors import ManifestCorruptError
2749 from dlm.store.inspect import inspect_store
2750 from dlm.store.paths import for_dlm
2751
2752 console = Console(stderr=True)
2753 out_console = Console()
2754
2755 try:
2756 parsed = parse_file(path)
2757 except (DlmParseError, OSError) as exc:
2758 console.print(f"[red]show:[/red] {exc}")
2759 raise typer.Exit(code=1) from exc
2760
2761 store = for_dlm(parsed.frontmatter.dlm_id)
2762 training_sources, discovered_configs = _summarize_training_sources_and_discovered(
2763 parsed, path.resolve().parent
2764 )
2765 # The per-document cache config comes from frontmatter, not on-disk
2766 # state — report it on both the pre-train and initialized-store paths
2767 # so authors can sanity-check the knobs before `dlm train` runs.
2768 cache_cfg = parsed.frontmatter.training.cache
2769 training_cache_config: dict[str, object] = {
2770 "enabled": cache_cfg.enabled,
2771 "max_bytes": cache_cfg.max_bytes,
2772 "prune_older_than_days": cache_cfg.prune_older_than_days,
2773 }
2774
2775 # Store may not exist yet (no `dlm train` run). Treat that as an
2776 # informational state rather than an error — useful after `dlm init`.
2777 if not store.manifest.exists():
2778 if json_out:
2779 payload: dict[str, object] = {
2780 "dlm_id": parsed.frontmatter.dlm_id,
2781 "base_model": parsed.frontmatter.base_model,
2782 "store_initialized": False,
2783 "source_path": str(path.resolve()),
2784 "training_cache_config": training_cache_config,
2785 }
2786 if training_sources is not None:
2787 payload["training_sources"] = training_sources
2788 if discovered_configs:
2789 payload["discovered_training_configs"] = discovered_configs
2790 sys.stdout.write(_json.dumps(payload, indent=2) + "\n")
2791 else:
2792 out_console.print(f"[bold]{path}[/bold]")
2793 out_console.print(f" dlm_id: {parsed.frontmatter.dlm_id}")
2794 out_console.print(f" base_model: {parsed.frontmatter.base_model}")
2795 out_console.print(" store: [dim]not yet initialized (run `dlm train`)[/dim]")
2796 if training_sources:
2797 _render_training_sources_text(out_console, training_sources)
2798 return
2799
2800 try:
2801 inspection = inspect_store(store, source_path=path.resolve())
2802 except ManifestCorruptError as exc:
2803 console.print(f"[red]show:[/red] {exc}")
2804 raise typer.Exit(code=1) from exc
2805
2806 training_cache = _summarize_training_cache(store.tokenized_cache_dir, store.root)
2807 gate = _summarize_gate(store)
2808 preference_mining = _summarize_preference_mining(store.root)
2809 base_security = _summarize_base_security(parsed.frontmatter.base_model)
2810
2811 if json_out:
2812 payload_full = _inspection_to_dict(inspection)
2813 if training_sources is not None:
2814 payload_full["training_sources"] = training_sources
2815 if discovered_configs:
2816 payload_full["discovered_training_configs"] = discovered_configs
2817 if training_cache is not None:
2818 payload_full["training_cache"] = training_cache
2819 payload_full["training_cache_config"] = training_cache_config
2820 if gate is not None:
2821 payload_full["gate"] = gate
2822 if preference_mining is not None:
2823 payload_full["preference_mining"] = preference_mining
2824 payload_full["preference_mining_runs"] = preference_mining["run_count"]
2825 payload_full["total_auto_mined_pairs"] = preference_mining["total_mined_pairs"]
2826 if base_security is not None:
2827 payload_full["base_security"] = base_security
2828 # Write JSON to raw stdout — Rich's Console wraps lines at the
2829 # terminal width and would corrupt the JSON.
2830 sys.stdout.write(_json.dumps(payload_full, indent=2, default=str) + "\n")
2831 return
2832
2833 _render_inspection_text(out_console, path, inspection)
2834 if training_sources:
2835 _render_training_sources_text(out_console, training_sources)
2836 if training_cache is not None and training_cache.get("entry_count", 0):
2837 _render_training_cache_text(out_console, training_cache)
2838 if gate is not None:
2839 _render_gate_text(out_console, gate)
2840 if base_security is not None and base_security.get("trust_remote_code"):
2841 _render_base_security_text(out_console, base_security)
2842
2843
2844 def _inspection_to_dict(inspection: object) -> dict[str, object]:
2845 """Flatten a StoreInspection into a JSON-safe dict.
2846
2847 Schema is the v1 contract for `dlm show --json`; any reshape is a
2848 version bump (recorded in tests/golden/cli-json/).
2849 """
2850 from dlm.store.inspect import StoreInspection
2851
2852 assert isinstance(inspection, StoreInspection)
2853 return {
2854 "dlm_id": inspection.dlm_id,
2855 "path": str(inspection.path),
2856 "base_model": inspection.base_model,
2857 "base_model_revision": inspection.base_model_revision,
2858 "adapter_version": inspection.adapter_version,
2859 "training_runs": inspection.training_runs,
2860 "last_trained_at": inspection.last_trained_at,
2861 "has_adapter_current": inspection.has_adapter_current,
2862 "replay_size_bytes": inspection.replay_size_bytes,
2863 "total_size_bytes": inspection.total_size_bytes,
2864 "source_path": str(inspection.source_path) if inspection.source_path else None,
2865 "orphaned": inspection.orphaned,
2866 "exports": [e.model_dump(mode="json") for e in inspection.exports],
2867 "content_hashes": dict(inspection.content_hashes),
2868 "pinned_versions": dict(inspection.pinned_versions),
2869 "named_adapters": [
2870 {
2871 "name": a.name,
2872 "has_current": a.has_current,
2873 "latest_version": a.latest_version,
2874 }
2875 for a in inspection.named_adapters
2876 ],
2877 }
2878
2879
2880 def _render_inspection_text(console: object, path: Path, inspection: object) -> None:
2881 """Human-readable `dlm show` output."""
2882 from rich.console import Console
2883
2884 from dlm.store.inspect import StoreInspection
2885
2886 assert isinstance(console, Console)
2887 assert isinstance(inspection, StoreInspection)
2888
2889 console.print(f"[bold]{path}[/bold]")
2890 console.print(f" dlm_id: {inspection.dlm_id}")
2891 rev = inspection.base_model_revision
2892 rev_str = f" (revision {rev[:7]})" if rev else ""
2893 console.print(f" base_model: {inspection.base_model}{rev_str}")
2894 console.print(
2895 f" store: {inspection.path} ({_human_size(inspection.total_size_bytes)})"
2896 )
2897 if inspection.named_adapters:
2898 # Multi-adapter store: render the per-adapter pointers rather
2899 # than the flat field (which stays 0 on multi-adapter docs).
2900 console.print(" adapters:")
2901 for adapter in inspection.named_adapters:
2902 if adapter.has_current:
2903 console.print(f" {adapter.name:16}v{adapter.latest_version:04d}")
2904 else:
2905 console.print(f" {adapter.name:16}[dim]no current pointer[/dim]")
2906 elif inspection.has_adapter_current:
2907 console.print(f" adapter: v{inspection.adapter_version:04d}")
2908 else:
2909 console.print(" adapter: [dim]none (no `dlm train` yet)[/dim]")
2910 last = inspection.last_trained_at
2911 last_str = f" — last {last.isoformat(timespec='seconds')}" if last else ""
2912 console.print(f" training runs: {inspection.training_runs}{last_str}")
2913 console.print(f" exports: {len(inspection.exports)}")
2914 for exp in inspection.exports:
2915 tag = f" — {exp.ollama_name}" if exp.ollama_name else ""
2916 console.print(f" {exp.quant}{tag}")
2917 if inspection.orphaned:
2918 console.print(" [yellow]orphaned:[/yellow] source .dlm is missing or mismatched")
2919
2920
2921 def _human_size(n: int) -> str:
2922 for unit in ("B", "KB", "MB", "GB", "TB"):
2923 if n < 1024:
2924 return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
2925 n //= 1024
2926 return f"{n} PB"
2927
2928
2929 def _summarize_training_sources(parsed: object, base_path: Path) -> list[dict[str, object]] | None:
2930 """Best-effort resolution of `training.sources` for `dlm show`.
2931
2932 Returns None when the frontmatter declares no directives; returns
2933 a list of per-source dicts otherwise. Failures to expand (missing
2934 paths, policy escapes) fall back to declared-only records so the
2935 show output stays useful for debugging a misconfigured directive.
2936 """
2937 records, _ = _summarize_training_sources_and_discovered(parsed, base_path)
2938 return records
2939
2940
2941 def _summarize_training_sources_and_discovered(
2942 parsed: object, base_path: Path
2943 ) -> tuple[list[dict[str, object]] | None, list[dict[str, object]]]:
2944 """Like `_summarize_training_sources` but also returns the per-anchor
2945 `.dlm/training.yaml` + `.dlm/ignore` discovery records.
2946
2947 Returns `(training_sources, discovered_configs)`. `discovered_configs`
2948 is always a list (empty when nothing was found or the expansion
2949 failed); `training_sources` matches the single-value helper's
2950 contract.
2951 """
2952 from dlm.directives import DirectiveError, expand_sources
2953 from dlm.doc.parser import ParsedDlm
2954
2955 assert isinstance(parsed, ParsedDlm)
2956 directives = parsed.frontmatter.training.sources
2957 if not directives:
2958 return None, []
2959
2960 declared: list[dict[str, object]] = [
2961 {
2962 "path": d.path,
2963 "include": list(d.include),
2964 "exclude": list(d.exclude),
2965 "max_files": d.max_files,
2966 "max_bytes_per_file": d.max_bytes_per_file,
2967 }
2968 for d in directives
2969 ]
2970
2971 try:
2972 result = expand_sources(parsed, base_path=base_path)
2973 except (DirectiveError, OSError):
2974 return declared, []
2975
2976 records: list[dict[str, object]] = []
2977 for decl, prov in zip(declared, result.provenance, strict=False):
2978 records.append(
2979 {
2980 **decl,
2981 "file_count": prov.file_count,
2982 "total_bytes": prov.total_bytes,
2983 "skipped_binary": prov.skipped_binary,
2984 "skipped_encoding": prov.skipped_encoding,
2985 "skipped_over_size": prov.skipped_over_size,
2986 }
2987 )
2988 # If the expander returned fewer entries than declared (shouldn't
2989 # happen on success but defensive), pad with declared-only.
2990 if len(records) < len(declared):
2991 records.extend(declared[len(records) :])
2992
2993 discovered_records: list[dict[str, object]] = []
2994 for dc in result.discovered:
2995 discovered_records.append(
2996 {
2997 "anchor": str(dc.anchor),
2998 "has_training_yaml": dc.config is not None,
2999 "has_ignore": bool(dc.ignore_rules),
3000 "include": list(dc.config.include) if dc.config else [],
3001 "exclude": list(dc.config.exclude) if dc.config else [],
3002 "exclude_defaults": (dc.config.exclude_defaults if dc.config else True),
3003 "metadata": dict(dc.config.metadata) if dc.config else {},
3004 "ignore_rules": len(dc.ignore_rules),
3005 }
3006 )
3007 return records, discovered_records
3008
3009
3010 def _summarize_training_cache(cache_dir: Path, store_root: Path) -> dict[str, object] | None:
3011 """Return a JSON-friendly snapshot of the tokenized-section cache.
3012
3013 None when the cache dir doesn't exist (store never trained with
3014 the cache, or pre-Sprint-31 layout). Cheap — reads the manifest
3015 only, not the entry files.
3016 """
3017 if not cache_dir.is_dir():
3018 return None
3019 from dlm.directives.cache import TokenizedCache
3020 from dlm.metrics import queries as _queries
3021
3022 cache = TokenizedCache.open(cache_dir)
3023 last = _queries.latest_tokenization(store_root)
3024 return {
3025 "path": str(cache_dir),
3026 "entry_count": cache.entry_count,
3027 "bytes": cache.total_bytes,
3028 "last_run_hit_rate": last.hit_rate if last else None,
3029 "last_run_id": last.run_id if last else None,
3030 }
3031
3032
3033 def _summarize_gate(store: object) -> dict[str, object] | None:
3034 """Return a JSON-friendly snapshot of the learned adapter gate.
3035
3036 None when the store has no gate config (pre-Sprint-34 runs, or
3037 `training.gate.enabled` was false). Reads two sources: the
3038 on-disk `gate_config.json` for mode + adapter order, and the
3039 metrics `gate_events` table for per-adapter mean weight from the
3040 most recent run that recorded a gate.
3041 """
3042 import json as _json
3043
3044 from dlm.store.paths import StorePath
3045 from dlm.train.gate.paths import gate_config_path
3046
3047 assert isinstance(store, StorePath)
3048 cfg_path = gate_config_path(store)
3049
3050 from dlm.metrics import queries as _queries
3051 from dlm.train.gate.module import GateMetadata
3052
3053 events = _queries.latest_gate_events(store.root)
3054 # Divergence path: training raised before writing a config, but we
3055 # still emit one GateEvent per adapter with mode="diverged" so
3056 # operators can see the failure. Surface it even when the config
3057 # file is absent.
3058 if not cfg_path.exists():
3059 if events and events[0].mode == "diverged":
3060 return {
3061 "mode": "diverged",
3062 "adapter_names": [e.adapter_name for e in events],
3063 "input_dim": None,
3064 "hidden_proj_dim": None,
3065 "last_run_id": events[0].run_id,
3066 "per_adapter": [
3067 {
3068 "adapter_name": e.adapter_name,
3069 "mean_weight": e.mean_weight,
3070 "sample_count": e.sample_count,
3071 "mode": e.mode,
3072 }
3073 for e in events
3074 ],
3075 }
3076 return None
3077
3078 raw = _json.loads(cfg_path.read_text(encoding="utf-8"))
3079 meta = GateMetadata.from_json(raw)
3080 per_adapter: list[dict[str, object]] = []
3081 run_id: int | None = None
3082 if events:
3083 run_id = events[0].run_id
3084 per_adapter = [
3085 {
3086 "adapter_name": e.adapter_name,
3087 "mean_weight": e.mean_weight,
3088 "sample_count": e.sample_count,
3089 "mode": e.mode,
3090 }
3091 for e in events
3092 ]
3093 else:
3094 # No recorded events yet; fall back to the config so `dlm show`
3095 # still reports that a gate exists and in which mode.
3096 per_adapter = [{"adapter_name": name} for name in meta.adapter_names]
3097 return {
3098 "mode": meta.mode,
3099 "adapter_names": list(meta.adapter_names),
3100 "input_dim": meta.input_dim,
3101 "hidden_proj_dim": meta.hidden_proj_dim,
3102 "last_run_id": run_id,
3103 "per_adapter": per_adapter,
3104 }
3105
3106
3107 def _summarize_preference_mining(store_root: Path) -> dict[str, object] | None:
3108 """Return the latest preference-mine summary for `dlm show --json`."""
3109 from dlm.metrics import queries as _queries
3110
3111 totals = _queries.preference_mining_totals(store_root)
3112 if totals is None:
3113 return None
3114 last = _queries.latest_preference_mining(store_root)
3115 assert last is not None
3116 rows = _queries.preference_mining_for_run(store_root, last.run_id)
3117 return {
3118 "run_count": totals.run_count,
3119 "event_count": totals.event_count,
3120 "total_mined_pairs": totals.total_mined_pairs,
3121 "total_skipped_prompts": totals.total_skipped_prompts,
3122 "last_run_id": last.run_id,
3123 "last_run_event_count": len(rows),
3124 "last_event": _queries.preference_mining_to_dict([last])[0],
3125 }
3126
3127
3128 def _summarize_base_security(base_model_key: str) -> dict[str, object] | None:
3129 """Surface security-sensitive base-model flags for `dlm show`.
3130
3131 Today that's just `trust_remote_code` — a flag that causes the HF
3132 loader to execute Python from the model repo. We resolve the spec
3133 out of the in-process registry (no network: the resolver reads a
3134 frozen Python dict) so users can see which bases opt in without
3135 grepping source. Returns None when the key doesn't resolve (an
3136 `hf:...` escape hatch that isn't in the registry); the caller
3137 silently skips in that case.
3138 """
3139 from dlm.base_models import resolve as resolve_base_model
3140 from dlm.base_models.errors import BaseModelError
3141
3142 try:
3143 spec = resolve_base_model(base_model_key, accept_license=True)
3144 except BaseModelError:
3145 return None
3146 return {
3147 "base_model": spec.key,
3148 "architecture": spec.architecture,
3149 "trust_remote_code": bool(spec.trust_remote_code),
3150 }
3151
3152
3153 def _render_base_security_text(console: object, snap: dict[str, object]) -> None:
3154 from rich.console import Console
3155
3156 assert isinstance(console, Console)
3157 arch = snap.get("architecture", "?")
3158 console.print(
3159 f" [yellow]security:[/yellow] base uses [red]trust_remote_code=True[/red] "
3160 f"(arch={arch}) — HF loader will execute Python from the model repo"
3161 )
3162
3163
3164 def _render_gate_text(console: object, snap: dict[str, object]) -> None:
3165 from rich.console import Console
3166
3167 assert isinstance(console, Console)
3168 mode = snap.get("mode", "?")
3169 if mode == "diverged":
3170 console.print(" adapter gate ([red]diverged[/red]):")
3171 console.print(
3172 " [yellow]gate training produced a non-finite loss; "
3173 "store fell back to gate-less routing[/yellow]"
3174 )
3175 else:
3176 console.print(f" adapter gate ({mode}):")
3177 per_adapter = snap.get("per_adapter", [])
3178 if isinstance(per_adapter, list):
3179 for entry in per_adapter:
3180 if not isinstance(entry, dict):
3181 continue
3182 name = entry.get("adapter_name", "?")
3183 weight = entry.get("mean_weight")
3184 count = entry.get("sample_count")
3185 if weight is None:
3186 console.print(f" {name} [dim](no recorded events)[/dim]")
3187 else:
3188 w = float(weight) if isinstance(weight, (int, float)) else 0.0
3189 c = count if isinstance(count, int) else 0
3190 console.print(f" {name:<16} weight={w:.3f} samples={c}")
3191
3192
3193 def _render_training_cache_text(console: object, snap: dict[str, object]) -> None:
3194 from rich.console import Console
3195
3196 assert isinstance(console, Console)
3197 ec_raw = snap.get("entry_count", 0)
3198 by_raw = snap.get("bytes", 0)
3199 entry_count = ec_raw if isinstance(ec_raw, int) else 0
3200 byte_count = by_raw if isinstance(by_raw, int) else 0
3201 console.print(" tokenized cache:")
3202 console.print(f" entries: {entry_count}")
3203 console.print(f" size: {_human_size(byte_count)}")
3204 rate = snap.get("last_run_hit_rate")
3205 if isinstance(rate, (int, float)):
3206 console.print(f" last hit rate: {float(rate):.1%}")
3207
3208
3209 def _render_training_sources_text(console: object, records: list[dict[str, object]]) -> None:
3210 from rich.console import Console
3211
3212 assert isinstance(console, Console)
3213 console.print(" training sources:")
3214 for rec in records:
3215 path = rec["path"]
3216 fc = rec.get("file_count")
3217 tb = rec.get("total_bytes")
3218 if fc is None:
3219 console.print(f" {path} [dim](not expanded)[/dim]")
3220 else:
3221 size = int(tb) if isinstance(tb, int) else 0
3222 console.print(f" {path} {fc} file(s), {_human_size(size)}")
3223
3224
3225 def migrate_cmd(
3226 path: Annotated[Path, typer.Argument(help=".dlm file to migrate.")],
3227 dry_run: Annotated[bool, typer.Option("--dry-run")] = False,
3228 no_backup: Annotated[bool, typer.Option("--no-backup")] = False,
3229 ) -> None:
3230 """Migrate a .dlm frontmatter to the current schema version."""
3231 from rich.console import Console
3232
3233 from dlm.doc.errors import DlmParseError
3234 from dlm.doc.migrate import migrate_file
3235
3236 console = Console(stderr=True)
3237
3238 try:
3239 result = migrate_file(path, dry_run=dry_run, no_backup=no_backup)
3240 except DlmParseError as exc:
3241 console.print(f"[red]migrate:[/red] {exc}")
3242 raise typer.Exit(code=1) from exc
3243
3244 if not result.applied:
3245 console.print(
3246 f"[green]migrate:[/green] {path} already at v{result.target_version} "
3247 "(no migrations needed)."
3248 )
3249 return
3250
3251 applied_str = " → ".join(f"v{v}" for v in (*result.applied, result.target_version))
3252 if dry_run:
3253 console.print(
3254 f"[yellow]dry-run:[/yellow] {path} would migrate {applied_str} "
3255 "(re-run without --dry-run to apply)."
3256 )
3257 return
3258
3259 if result.backup_path is not None:
3260 console.print(f"[dim]backup:[/dim] {result.backup_path}")
3261 console.print(f"[green]migrated:[/green] {path} {applied_str}")
3262
3263
3264 def templates_list_cmd(
3265 json_out: Annotated[
3266 bool,
3267 typer.Option("--json", help="Emit a JSON array of template metadata."),
3268 ] = False,
3269 refresh: Annotated[
3270 bool,
3271 typer.Option(
3272 "--refresh",
3273 help=(
3274 "Refresh from the upstream template gallery. Currently a no-op — "
3275 "upstream repo + signing key are deferred."
3276 ),
3277 ),
3278 ] = False,
3279 accept_unsigned: Annotated[
3280 bool,
3281 typer.Option(
3282 "--accept-unsigned",
3283 help=(
3284 "Bypass signed-tag verification on --refresh. Reserved; takes effect "
3285 "once the upstream gallery signs its releases."
3286 ),
3287 ),
3288 ] = False,
3289 ) -> None:
3290 """List the bundled (and, one day, remote) template gallery."""
3291
3292 import json as _json
3293
3294 from rich.console import Console
3295
3296 from dlm.templates import list_bundled
3297
3298 console_out = Console()
3299 console_err = Console(stderr=True)
3300
3301 if refresh:
3302 from dlm.templates.fetcher import RemoteFetchUnavailable, cache_dir, fetch_all
3303
3304 try:
3305 fetch_all(cache_dir(), remote="")
3306 except RemoteFetchUnavailable as exc:
3307 console_err.print(
3308 f"[yellow]templates:[/yellow] {exc} Falling back to the bundled gallery."
3309 )
3310 # --accept-unsigned is reserved for when the live fetcher lands;
3311 # touching it here silences ARG001 without ceremony.
3312 _ = accept_unsigned
3313
3314 templates = list_bundled()
3315
3316 if json_out:
3317 payload = [
3318 {
3319 "name": t.name,
3320 "title": t.meta.title,
3321 "domain_tags": list(t.meta.domain_tags),
3322 "recommended_base": t.meta.recommended_base,
3323 "expected_steps": t.meta.expected_steps,
3324 "expected_duration": dict(t.meta.expected_duration),
3325 "summary": t.meta.summary,
3326 "sample_prompts": list(t.meta.sample_prompts),
3327 }
3328 for t in templates
3329 ]
3330 console_out.print_json(_json.dumps(payload))
3331 return
3332
3333 if not templates:
3334 console_err.print("[yellow]templates:[/yellow] no bundled templates found.")
3335 raise typer.Exit(code=1)
3336
3337 name_width = max(len(t.name) for t in templates)
3338 for t in templates:
3339 console_out.print(
3340 f"[bold]{t.name:<{name_width}}[/bold] {t.meta.title} "
3341 f"[dim]({t.meta.recommended_base})[/dim]"
3342 )
3343
3344
3345 def push_cmd(
3346 path: Annotated[Path, typer.Argument(help=".dlm or .dlm.pack to push.")],
3347 to: Annotated[
3348 str,
3349 typer.Option(
3350 "--to",
3351 help=(
3352 "Destination. `hf:<org>/<repo>` for HuggingFace Hub, "
3353 "`https://...` for a generic HTTPS endpoint, or a local path."
3354 ),
3355 ),
3356 ],
3357 sign: Annotated[
3358 bool,
3359 typer.Option("--sign", help="Sign the pack with minisign before upload."),
3360 ] = False,
3361 include_exports: Annotated[bool, typer.Option("--include-exports")] = False,
3362 include_base: Annotated[bool, typer.Option("--include-base")] = False,
3363 include_logs: Annotated[bool, typer.Option("--include-logs")] = False,
3364 licensee: Annotated[
3365 str | None,
3366 typer.Option(
3367 "--i-am-the-licensee",
3368 help="URL ack for --include-base on non-redistributable bases.",
3369 ),
3370 ] = None,
3371 ) -> None:
3372 """Upload a .dlm or .dlm.pack to an HF repo, URL endpoint, or local path."""
3373 from rich.console import Console
3374
3375 from dlm.share import ShareError, push
3376 from dlm.share.signing import MinisignNotAvailableError
3377
3378 console = Console(stderr=True)
3379
3380 try:
3381 result = push(
3382 path,
3383 to,
3384 sign=sign,
3385 include_exports=include_exports,
3386 include_base=include_base,
3387 include_logs=include_logs,
3388 licensee_acceptance_url=licensee,
3389 )
3390 except MinisignNotAvailableError as exc:
3391 console.print(f"[red]push:[/red] {exc}")
3392 raise typer.Exit(code=1) from exc
3393 except ShareError as exc:
3394 console.print(f"[red]push:[/red] {exc}")
3395 raise typer.Exit(code=1) from exc
3396
3397 size_mb = result.bytes_sent / (1024 * 1024)
3398 console.print(f"[green]pushed:[/green] {result.destination} ({size_mb:.2f} MB)")
3399 if result.sink_kind.value == "hf":
3400 console.print(f"[dim]install:[/dim] dlm pull {result.destination}")
3401 if result.detail:
3402 console.print(f"[dim]{result.detail}[/dim]")
3403
3404
3405 def pull_cmd(
3406 source: Annotated[
3407 str,
3408 typer.Argument(
3409 help=(
3410 "Source: `hf:<org>/<repo>`, `https://...`, "
3411 "`peer://host:port/<id>?token=...`, or a local path."
3412 )
3413 ),
3414 ],
3415 out: Annotated[
3416 Path | None,
3417 typer.Option("--out", help="Directory for the restored .dlm (default: CWD)."),
3418 ] = None,
3419 force: Annotated[
3420 bool,
3421 typer.Option("--force", help="Overwrite an existing store with the same dlm_id."),
3422 ] = False,
3423 ) -> None:
3424 """Download + verify + unpack a .dlm.pack from a remote source."""
3425 from rich.console import Console
3426
3427 from dlm.pack.errors import PackError
3428 from dlm.share import ShareError, pull
3429 from dlm.share.signing import VerifyStatus
3430
3431 console = Console(stderr=True)
3432
3433 try:
3434 result = pull(source, out_dir=out, force=force)
3435 except ShareError as exc:
3436 console.print(f"[red]pull:[/red] {exc}")
3437 raise typer.Exit(code=1) from exc
3438 except PackError as exc:
3439 console.print(f"[red]pull:[/red] pack integrity: {exc}")
3440 raise typer.Exit(code=1) from exc
3441
3442 size_mb = result.bytes_received / (1024 * 1024)
3443 console.print(f"[green]pulled:[/green] {result.source}{result.dlm_path} ({size_mb:.2f} MB)")
3444
3445 status = result.verification.status
3446 if status == VerifyStatus.VERIFIED:
3447 console.print(
3448 f"[green]verified:[/green] signature matches "
3449 f"[bold]{result.verification.key_path}[/bold]"
3450 )
3451 elif status == VerifyStatus.UNVERIFIED:
3452 console.print(
3453 f"[yellow]unverified:[/yellow] signature present but "
3454 f"not matched ({result.verification.detail}); sha256 still validated"
3455 )
3456 else:
3457 console.print("[dim]unsigned[/dim] (sha256 integrity still validated)")
3458
3459
3460 def serve_cmd(
3461 path: Annotated[Path, typer.Argument(help=".dlm file to serve.")],
3462 port: Annotated[int, typer.Option("--port")] = 7337,
3463 public: Annotated[
3464 bool,
3465 typer.Option(
3466 "--public",
3467 help="Bind 0.0.0.0 (requires --i-know-this-is-public); otherwise 127.0.0.1.",
3468 ),
3469 ] = False,
3470 i_know_public: Annotated[
3471 bool,
3472 typer.Option(
3473 "--i-know-this-is-public",
3474 help="Confirm binding 0.0.0.0 is safe on this network.",
3475 ),
3476 ] = False,
3477 max_concurrency: Annotated[
3478 int,
3479 typer.Option("--max-concurrency", help="Max concurrent connections per token."),
3480 ] = 4,
3481 rate_limit: Annotated[
3482 int,
3483 typer.Option("--rate-limit", help="Max requests per minute per token."),
3484 ] = 30,
3485 token_ttl_minutes: Annotated[
3486 int, typer.Option("--token-ttl-minutes", help="Token lifetime in minutes.")
3487 ] = 15,
3488 ) -> None:
3489 """Serve a .dlm's pack over LAN for peers to pull."""
3490 from rich.console import Console
3491
3492 from dlm.doc.parser import parse_file
3493 from dlm.pack.packer import pack as pack_fn
3494 from dlm.share import ServeOptions, serve
3495 from dlm.store.paths import for_dlm
3496
3497 console = Console(stderr=True)
3498
3499 parsed = parse_file(path)
3500 dlm_id = parsed.frontmatter.dlm_id
3501
3502 # pack() calls load_manifest(), which crashes with an unhelpful
3503 # "store manifest corrupt" error on a .dlm that's never been
3504 # trained. Surface the true cause instead.
3505 store = for_dlm(dlm_id)
3506 if not store.manifest.exists():
3507 console.print(
3508 f"[red]serve:[/red] no training state for {dlm_id} — run [bold]dlm train[/bold] first."
3509 )
3510 raise typer.Exit(code=1)
3511
3512 # Pack into a temp file that lives as long as the server does.
3513 import tempfile
3514
3515 tmp_dir = Path(tempfile.mkdtemp(prefix="dlm-serve-"))
3516 tmp_pack = tmp_dir / f"{path.stem}.dlm.pack"
3517 pack_fn(path, out=tmp_pack)
3518 console.print(f"[dim]packed:[/dim] {tmp_pack} ({tmp_pack.stat().st_size} bytes)")
3519
3520 opts = ServeOptions(
3521 port=port,
3522 public=public,
3523 i_know_this_is_public=i_know_public,
3524 max_concurrency=max_concurrency,
3525 rate_limit_per_min=rate_limit,
3526 token_ttl_seconds=token_ttl_minutes * 60,
3527 )
3528 handle = serve(dlm_id, tmp_pack, opts)
3529
3530 console.print(
3531 f"[green]serving:[/green] {path.name} (dlm_id {dlm_id}) on "
3532 f"[bold]http://{handle.bind_host}:{handle.port}/{dlm_id}[/bold]"
3533 )
3534 console.print(f"[bold]peer URL:[/bold] {handle.peer_url}")
3535 console.print(f"[dim]token valid for {token_ttl_minutes} min. Ctrl-C to stop.[/dim]")
3536
3537 try:
3538 handle.wait_shutdown()
3539 finally:
3540 import shutil
3541
3542 shutil.rmtree(tmp_dir, ignore_errors=True)
3543 console.print("[dim]stopped.[/dim]")
3544
3545
3546 # ---- Cache Commands --------------------------------------------------
3547
3548
3549 def cache_show_cmd(
3550 path: Annotated[Path, typer.Argument(help=".dlm file to inspect the cache for.")],
3551 json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable JSON.")] = False,
3552 ) -> None:
3553 """Show tokenized-section cache size, entry count, last-run hit rate."""
3554 import json as _json
3555 import sys as _sys
3556
3557 from rich.console import Console
3558
3559 from dlm.directives.cache import TokenizedCache
3560 from dlm.doc.errors import DlmParseError
3561 from dlm.doc.parser import parse_file
3562 from dlm.metrics import queries as _queries
3563 from dlm.store.paths import for_dlm
3564
3565 console = Console(stderr=True)
3566 out_console = Console()
3567
3568 try:
3569 parsed = parse_file(path)
3570 except (DlmParseError, OSError) as exc:
3571 console.print(f"[red]cache:[/red] {exc}")
3572 raise typer.Exit(code=1) from exc
3573
3574 store = for_dlm(parsed.frontmatter.dlm_id)
3575 cache = TokenizedCache.open(store.tokenized_cache_dir)
3576 last = _queries.latest_tokenization(store.root)
3577
3578 payload: dict[str, object] = {
3579 "dlm_id": parsed.frontmatter.dlm_id,
3580 "cache_path": str(store.tokenized_cache_dir),
3581 "entry_count": cache.entry_count,
3582 "bytes": cache.total_bytes,
3583 "last_run_hit_rate": last.hit_rate if last else None,
3584 "last_run_id": last.run_id if last else None,
3585 }
3586 if json_out:
3587 _sys.stdout.write(_json.dumps(payload, indent=2) + "\n")
3588 return
3589
3590 out_console.print(f"[bold]Cache for {parsed.frontmatter.dlm_id}[/bold]")
3591 out_console.print(f" path: {store.tokenized_cache_dir}")
3592 out_console.print(f" entries: {cache.entry_count}")
3593 out_console.print(f" size: {_human_size(cache.total_bytes)}")
3594 if last is not None:
3595 out_console.print(
3596 f" last-run hit rate: {last.hit_rate:.1%} "
3597 f"({last.cache_hits}/{last.cache_hits + last.cache_misses})"
3598 )
3599 else:
3600 out_console.print(" last-run hit rate: [dim]no tokenization runs yet[/dim]")
3601
3602
3603 def cache_prune_cmd(
3604 path: Annotated[Path, typer.Argument(help=".dlm file to prune the cache for.")],
3605 older_than: Annotated[
3606 str | None,
3607 typer.Option(
3608 "--older-than",
3609 help=(
3610 "Drop entries not accessed in this duration. "
3611 "Format: `30d`, `12h`, `45m`. When omitted, defaults to "
3612 "the document's `training.cache.prune_older_than_days` "
3613 "(90d pre-v9 docs inherit)."
3614 ),
3615 ),
3616 ] = None,
3617 ) -> None:
3618 """Remove tokenized-cache entries not accessed within a cutoff."""
3619 from rich.console import Console
3620
3621 from dlm.directives.cache import TokenizedCache
3622 from dlm.doc.errors import DlmParseError
3623 from dlm.doc.parser import parse_file
3624 from dlm.store.paths import for_dlm
3625
3626 console = Console(stderr=True)
3627
3628 # Parse the doc first — we need it either way (for dlm_id) AND
3629 # for the frontmatter default when --older-than is absent.
3630 try:
3631 parsed = parse_file(path)
3632 except (DlmParseError, OSError) as exc:
3633 console.print(f"[red]cache:[/red] {exc}")
3634 raise typer.Exit(code=1) from exc
3635
3636 if older_than is not None:
3637 seconds = _parse_duration(older_than)
3638 if seconds is None:
3639 console.print(
3640 f"[red]cache:[/red] invalid --older-than {older_than!r} "
3641 "(expected e.g. 30d, 12h, 45m)"
3642 )
3643 raise typer.Exit(code=2)
3644 cutoff_label = older_than
3645 else:
3646 # Fall back to the frontmatter's per-doc default. Pre-v9 docs
3647 # get the CacheConfig default of 90 days via the Pydantic
3648 # factory on parse.
3649 days = parsed.frontmatter.training.cache.prune_older_than_days
3650 seconds = float(days) * 86400.0
3651 cutoff_label = f"{days}d"
3652
3653 store = for_dlm(parsed.frontmatter.dlm_id)
3654 cache = TokenizedCache.open(store.tokenized_cache_dir)
3655 removed = cache.prune(older_than_seconds=seconds)
3656 cache.save_manifest()
3657 console.print(f"[green]cache:[/green] pruned {removed} entr(y/ies) older than {cutoff_label}")
3658
3659
3660 def cache_clear_cmd(
3661 path: Annotated[Path, typer.Argument(help=".dlm file to wipe the cache for.")],
3662 force: Annotated[
3663 bool,
3664 typer.Option("--force", help="Skip the confirmation prompt."),
3665 ] = False,
3666 ) -> None:
3667 """Wipe every entry in the tokenized-section cache for this store."""
3668 from rich.console import Console
3669
3670 from dlm.directives.cache import TokenizedCache
3671 from dlm.doc.errors import DlmParseError
3672 from dlm.doc.parser import parse_file
3673 from dlm.store.paths import for_dlm
3674
3675 console = Console(stderr=True)
3676
3677 try:
3678 parsed = parse_file(path)
3679 except (DlmParseError, OSError) as exc:
3680 console.print(f"[red]cache:[/red] {exc}")
3681 raise typer.Exit(code=1) from exc
3682
3683 store = for_dlm(parsed.frontmatter.dlm_id)
3684 cache = TokenizedCache.open(store.tokenized_cache_dir)
3685
3686 if not force and cache.entry_count > 0:
3687 confirmed = typer.confirm(
3688 f"wipe {cache.entry_count} entries ({_human_size(cache.total_bytes)})?"
3689 )
3690 if not confirmed:
3691 console.print("[yellow]cache:[/yellow] clear cancelled")
3692 raise typer.Exit(code=0)
3693
3694 removed = cache.clear()
3695 cache.save_manifest()
3696 console.print(f"[green]cache:[/green] cleared {removed} entr(y/ies)")
3697
3698
3699 def _parse_duration(spec: str) -> float | None:
3700 """Parse a duration like `30d`, `12h`, `45m` → seconds. None on
3701 malformed input."""
3702 if not spec or not spec[:-1].isdigit():
3703 return None
3704 n = int(spec[:-1])
3705 unit = spec[-1].lower()
3706 if unit == "s":
3707 return float(n)
3708 if unit == "m":
3709 return float(n) * 60
3710 if unit == "h":
3711 return float(n) * 3600
3712 if unit == "d":
3713 return float(n) * 86400
3714 return None
3715
3716
3717 # --- preference -----------------------------------------------------------
3718
3719
3720 def preference_mine_cmd(
3721 path: Annotated[Path, typer.Argument(help=".dlm file to mine preferences from.")],
3722 samples: Annotated[
3723 int,
3724 typer.Option("--samples", help="Candidate responses to sample per prompt.", min=2),
3725 ] = 4,
3726 judge: Annotated[
3727 str,
3728 typer.Option(
3729 "--judge",
3730 help="Judge selector: sway, hf:<model>, or cli:<cmd>.",
3731 ),
3732 ] = "sway",
3733 threshold: Annotated[
3734 float | None,
3735 typer.Option(
3736 "--threshold",
3737 help="Minimum chosen-vs-rejected score margin. Defaults to the judge's native threshold.",
3738 min=0.0,
3739 ),
3740 ] = None,
3741 max_pairs: Annotated[
3742 int | None,
3743 typer.Option(
3744 "--max-pairs",
3745 help="Maximum mined preference pairs to keep from this run.",
3746 min=1,
3747 ),
3748 ] = None,
3749 temp: Annotated[
3750 float,
3751 typer.Option("--temp", help="Sampling temperature for candidate generation.", min=0.0),
3752 ] = 0.7,
3753 top_p: Annotated[
3754 float | None,
3755 typer.Option(
3756 "--top-p",
3757 help="Optional nucleus-sampling cutoff for candidate generation.",
3758 min=0.0,
3759 max=1.0,
3760 ),
3761 ] = None,
3762 backend: Annotated[
3763 str,
3764 typer.Option(
3765 "--backend",
3766 help="Generation backend: auto, pytorch, or mlx.",
3767 ),
3768 ] = "auto",
3769 adapter: Annotated[
3770 str | None,
3771 typer.Option(
3772 "--adapter",
3773 help=(
3774 "Named adapter to mine from on multi-adapter documents. "
3775 "Required there; invalid on single-adapter documents."
3776 ),
3777 ),
3778 ] = None,
3779 apply: Annotated[
3780 bool,
3781 typer.Option(
3782 "--apply",
3783 help=(
3784 "Write mined preference sections directly to the .dlm. "
3785 "Default stages them for `dlm preference apply`."
3786 ),
3787 ),
3788 ] = False,
3789 ) -> None:
3790 """Sample + stage auto-mined preference sections from the current adapter."""
3791 from rich.console import Console
3792
3793 from dlm.base_models import GatedModelError
3794 from dlm.base_models import resolve as resolve_base_model
3795 from dlm.doc.errors import DlmParseError
3796 from dlm.doc.parser import parse_file
3797 from dlm.hardware import doctor
3798 from dlm.inference import AdapterNotFoundError
3799 from dlm.inference.backends import (
3800 UnsupportedBackendError,
3801 build_backend,
3802 select_backend,
3803 )
3804 from dlm.metrics import MetricsRecorder, PreferenceMineEvent
3805 from dlm.metrics.events import PreferenceMineWriteMode
3806 from dlm.modality import modality_for
3807 from dlm.preference import (
3808 InvalidJudgeSpecError,
3809 JudgeUnavailableError,
3810 build_apply_plan,
3811 build_judge,
3812 build_mine_plan,
3813 render_apply_plan,
3814 render_mine_plan,
3815 )
3816 from dlm.preference.apply import apply_plan as apply_preference_plan
3817 from dlm.preference.pending import clear_pending_plan, save_pending_plan
3818 from dlm.store.paths import for_dlm
3819
3820 console = Console(stderr=True)
3821 out_console = Console()
3822
3823 if backend not in ("auto", "pytorch", "mlx"):
3824 console.print(
3825 f"[red]preference:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})."
3826 )
3827 raise typer.Exit(code=2)
3828
3829 try:
3830 parsed = parse_file(path)
3831 except (DlmParseError, OSError) as exc:
3832 console.print(f"[red]preference:[/red] {exc}")
3833 raise typer.Exit(code=1) from exc
3834
3835 adapters_declared = parsed.frontmatter.training.adapters
3836 if adapter is not None:
3837 if adapters_declared is None:
3838 console.print(
3839 "[red]preference:[/red] --adapter is only valid on multi-adapter "
3840 "documents (this doc does not declare `training.adapters`)."
3841 )
3842 raise typer.Exit(code=2)
3843 if adapter not in adapters_declared:
3844 declared = sorted(adapters_declared)
3845 console.print(
3846 f"[red]preference:[/red] --adapter {adapter!r} is not declared "
3847 f"(declared: {declared})."
3848 )
3849 raise typer.Exit(code=2)
3850 elif adapters_declared is not None:
3851 console.print(
3852 "[red]preference:[/red] multi-adapter documents require --adapter "
3853 "so mining knows which adapter to sample."
3854 )
3855 raise typer.Exit(code=2)
3856
3857 judge_kind = judge.split(":", 1)[0].strip()
3858 if adapter is not None and judge_kind == "sway":
3859 console.print(
3860 "[red]preference:[/red] --judge sway is not yet wired for named adapters; "
3861 "use `hf:<model>` or `cli:<cmd>` for multi-adapter mining."
3862 )
3863 raise typer.Exit(code=2)
3864
3865 store = for_dlm(parsed.frontmatter.dlm_id)
3866 run_id = _latest_training_run_id(store)
3867 if run_id is None:
3868 console.print(
3869 "[red]preference:[/red] mining requires a prior training run (run `dlm train` first)."
3870 )
3871 raise typer.Exit(code=1)
3872
3873 already_accepted = _previously_accepted(store.manifest)
3874 try:
3875 spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
3876 except GatedModelError as exc:
3877 console.print(
3878 f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has "
3879 "no recorded acceptance in this store; run `dlm train --i-accept-license` first."
3880 )
3881 raise typer.Exit(code=1) from exc
3882
3883 dispatch = modality_for(spec)
3884 if dispatch.accepts_images or dispatch.accepts_audio:
3885 console.print(
3886 f"[red]preference:[/red] preference mining currently supports text bases only; "
3887 f"base {spec.key!r} is modality='{spec.modality}'."
3888 )
3889 raise typer.Exit(code=2)
3890
3891 caps = doctor().capabilities
3892 try:
3893 backend_name = select_backend(backend, caps) # type: ignore[arg-type]
3894 except UnsupportedBackendError as exc:
3895 console.print(f"[red]preference:[/red] {exc}")
3896 raise typer.Exit(code=2) from exc
3897 backend_obj = build_backend(backend_name, caps)
3898
3899 try:
3900 backend_obj.load(spec, store, adapter_name=adapter)
3901 except AdapterNotFoundError as exc:
3902 console.print(f"[red]preference:[/red] {exc}")
3903 raise typer.Exit(code=1) from exc
3904
3905 try:
3906 judge_obj = build_judge(judge, dlm_path=path)
3907 plan = build_mine_plan(
3908 parsed,
3909 backend_obj,
3910 judge_obj,
3911 mined_run_id=run_id,
3912 samples=samples,
3913 max_pairs=max_pairs,
3914 threshold=threshold,
3915 temperature=temp,
3916 top_p=top_p,
3917 )
3918 except InvalidJudgeSpecError as exc:
3919 console.print(f"[red]preference:[/red] {exc}")
3920 raise typer.Exit(code=2) from exc
3921 except JudgeUnavailableError as exc:
3922 console.print(f"[red]preference:[/red] {exc}")
3923 raise typer.Exit(code=1) from exc
3924 except ValueError as exc:
3925 console.print(f"[red]preference:[/red] {exc}")
3926 raise typer.Exit(code=2) from exc
3927 finally:
3928 backend_obj.unload()
3929
3930 recorder = MetricsRecorder(store.root)
3931
3932 def _record_preference_mine(write_mode: PreferenceMineWriteMode) -> None:
3933 recorder.record_preference_mine(
3934 PreferenceMineEvent(
3935 run_id=run_id,
3936 judge_name=judge_obj.name,
3937 sample_count=samples,
3938 mined_pairs=len(plan.additions),
3939 skipped_prompts=len(plan.skipped),
3940 write_mode=write_mode,
3941 )
3942 )
3943
3944 out_console.print(render_mine_plan(plan))
3945
3946 if not plan.additions:
3947 clear_pending_plan(store)
3948 _record_preference_mine("empty")
3949 out_console.print(
3950 "\n[yellow]no candidates to mine[/yellow] — either instruction prompts "
3951 "did not yield a confident pair, or the matching preference sections "
3952 "already exist in the document."
3953 )
3954 raise typer.Exit(code=2)
3955
3956 sections = [addition.section for addition in plan.additions]
3957
3958 if apply:
3959 apply_plan = build_apply_plan(parsed, sections)
3960 out_console.print("")
3961 out_console.print(render_apply_plan(apply_plan))
3962 summary = apply_preference_plan(parsed, apply_plan, target=path)
3963 clear_pending_plan(store)
3964 _record_preference_mine("applied")
3965 out_console.print(
3966 f"\n[green]preference:[/green] wrote {summary.added} section(s) to {path} "
3967 f"({summary.skipped} skipped)"
3968 )
3969 return
3970
3971 pending = save_pending_plan(store, source_path=path.resolve(), sections=sections)
3972 _record_preference_mine("staged")
3973 out_console.print(
3974 f"\n[green]preference:[/green] staged {len(pending.sections)} mined preference "
3975 f"section(s). Run [bold]dlm preference apply {path}[/bold] to write them."
3976 )
3977
3978
3979 def preference_apply_cmd(
3980 path: Annotated[Path, typer.Argument(help=".dlm file to apply staged preferences into.")],
3981 ) -> None:
3982 """Write the staged preference-mine plan into the `.dlm`."""
3983 from rich.console import Console
3984
3985 from dlm.doc.errors import DlmParseError
3986 from dlm.doc.parser import parse_file
3987 from dlm.preference import build_apply_plan, render_apply_plan
3988 from dlm.preference.apply import apply_plan as apply_preference_plan
3989 from dlm.preference.pending import (
3990 PendingPreferencePlanError,
3991 clear_pending_plan,
3992 load_pending_plan,
3993 )
3994 from dlm.store.paths import for_dlm
3995
3996 console = Console(stderr=True)
3997 out_console = Console()
3998
3999 try:
4000 parsed = parse_file(path)
4001 except (DlmParseError, OSError) as exc:
4002 console.print(f"[red]preference:[/red] {exc}")
4003 raise typer.Exit(code=1) from exc
4004
4005 store = for_dlm(parsed.frontmatter.dlm_id)
4006 try:
4007 pending = load_pending_plan(store)
4008 except PendingPreferencePlanError as exc:
4009 console.print(f"[red]preference:[/red] {exc}")
4010 raise typer.Exit(code=1) from exc
4011
4012 if pending is None:
4013 console.print(
4014 "[red]preference:[/red] no staged mined preferences found; "
4015 "run `dlm preference mine` first."
4016 )
4017 raise typer.Exit(code=1)
4018
4019 plan = build_apply_plan(parsed, list(pending.sections))
4020 out_console.print(render_apply_plan(plan))
4021
4022 if not plan.additions:
4023 clear_pending_plan(store)
4024 out_console.print(
4025 "\n[yellow]no staged preferences to write[/yellow] — the pending plan was "
4026 "already present in the document."
4027 )
4028 raise typer.Exit(code=2)
4029
4030 summary = apply_preference_plan(parsed, plan, target=path)
4031 clear_pending_plan(store)
4032 out_console.print(
4033 f"\n[green]preference:[/green] wrote {summary.added} section(s) to {path} "
4034 f"({summary.skipped} skipped)"
4035 )
4036
4037
4038 def preference_revert_cmd(
4039 path: Annotated[Path, typer.Argument(help=".dlm file to strip auto-mined preferences from.")],
4040 ) -> None:
4041 """Remove every `auto_mined: true` preference section from the `.dlm`."""
4042 from rich.console import Console
4043
4044 from dlm.doc.errors import DlmParseError
4045 from dlm.doc.parser import parse_file
4046 from dlm.preference import revert_all_auto_mined
4047
4048 console = Console(stderr=True)
4049 out_console = Console()
4050
4051 try:
4052 parsed = parse_file(path)
4053 except (DlmParseError, OSError) as exc:
4054 console.print(f"[red]preference:[/red] {exc}")
4055 raise typer.Exit(code=1) from exc
4056
4057 summary = revert_all_auto_mined(parsed, target=path)
4058 out_console.print(
4059 f"[green]preference:[/green] stripped {len(summary.added_section_ids)} "
4060 f"auto-mined preference section(s) from {path}"
4061 )
4062
4063
4064 def preference_list_cmd(
4065 path: Annotated[Path, typer.Argument(help=".dlm file whose auto-mined preferences we list.")],
4066 ) -> None:
4067 """List applied + staged auto-mined preference sections."""
4068 from rich.console import Console
4069
4070 from dlm.doc.errors import DlmParseError
4071 from dlm.doc.parser import parse_file
4072 from dlm.doc.sections import SectionType
4073 from dlm.preference.pending import PendingPreferencePlanError, load_pending_plan
4074 from dlm.store.paths import for_dlm
4075
4076 console = Console(stderr=True)
4077 out_console = Console()
4078
4079 try:
4080 parsed = parse_file(path)
4081 except (DlmParseError, OSError) as exc:
4082 console.print(f"[red]preference:[/red] {exc}")
4083 raise typer.Exit(code=1) from exc
4084
4085 store = for_dlm(parsed.frontmatter.dlm_id)
4086 try:
4087 pending = load_pending_plan(store)
4088 except PendingPreferencePlanError as exc:
4089 console.print(f"[red]preference:[/red] {exc}")
4090 raise typer.Exit(code=1) from exc
4091
4092 applied = [
4093 section
4094 for section in parsed.sections
4095 if section.type is SectionType.PREFERENCE and section.auto_mined
4096 ]
4097
4098 out_console.print(f"[bold]{path}[/bold]")
4099 out_console.print(f" applied auto-mined: {len(applied)}")
4100 out_console.print(f" staged pending: {len(pending.sections) if pending else 0}")
4101
4102 if not applied and pending is None:
4103 out_console.print(" [dim]no auto-mined preference sections yet[/dim]")
4104 return
4105
4106 if applied:
4107 out_console.print("\n[bold]Applied[/bold]")
4108 for section in applied:
4109 prompt = _preference_prompt_summary(section.content, section_id=section.section_id)
4110 judge_name = section.judge_name or "unknown"
4111 run_id = section.mined_run_id if section.mined_run_id is not None else "?"
4112 out_console.print(
4113 f" - {section.section_id} judge={judge_name} run={run_id} prompt={prompt}"
4114 )
4115
4116 if pending is not None:
4117 out_console.print("\n[bold]Pending[/bold]")
4118 for section in pending.sections:
4119 prompt = _preference_prompt_summary(section.content, section_id=section.section_id)
4120 judge_name = section.judge_name or "unknown"
4121 run_id = section.mined_run_id if section.mined_run_id is not None else "?"
4122 out_console.print(
4123 f" - {section.section_id} judge={judge_name} run={run_id} prompt={prompt}"
4124 )
4125
4126
4127 def _latest_training_run_id(store: object) -> int | None:
4128 """Most recent run id from metrics DB or manifest."""
4129 from dlm.metrics.queries import latest_run_id
4130 from dlm.store.errors import ManifestCorruptError
4131 from dlm.store.manifest import load_manifest
4132 from dlm.store.paths import StorePath
4133
4134 assert isinstance(store, StorePath)
4135
4136 metrics_run_id = latest_run_id(store.root)
4137 if metrics_run_id is not None:
4138 return metrics_run_id
4139 if not store.manifest.exists():
4140 return None
4141 try:
4142 manifest = load_manifest(store.manifest)
4143 except (ManifestCorruptError, OSError):
4144 return None
4145 if not manifest.training_runs:
4146 return None
4147 return max(run.run_id for run in manifest.training_runs)
4148
4149
4150 def _preference_prompt_summary(content: str, *, section_id: str) -> str:
4151 """Best-effort prompt summary for `preference list`."""
4152 from dlm.data.errors import PreferenceParseError
4153 from dlm.data.preference_parser import parse_preference_body
4154
4155 try:
4156 triples = parse_preference_body(content, section_id=section_id)
4157 except PreferenceParseError:
4158 return "<unparseable>"
4159 if not triples:
4160 return "<empty>"
4161 prompt = triples[0].prompt.splitlines()[0].strip()
4162 return prompt or "<blank>"
4163
4164
4165 # --- synth -----------------------------------------------------------------
4166
4167
4168 def synth_instructions_cmd(
4169 path: Annotated[
4170 Path, typer.Argument(help=".dlm file to synthesize instruction sections from.")
4171 ],
4172 teacher: Annotated[
4173 str,
4174 typer.Option(
4175 "--teacher",
4176 help=(
4177 "Teacher selector: self, hf:<model>, openai:<model>, "
4178 "anthropic:<model>, or vllm-server:<url>."
4179 ),
4180 ),
4181 ] = "self",
4182 per_section: Annotated[
4183 int,
4184 typer.Option(
4185 "--per-section",
4186 help="Instruction pairs to generate per prose section.",
4187 min=1,
4188 ),
4189 ] = 3,
4190 strategy: Annotated[
4191 str,
4192 typer.Option(
4193 "--strategy",
4194 help="Synthesis strategy: extraction, expansion, or both.",
4195 ),
4196 ] = "extraction",
4197 filter_kind: Annotated[
4198 str,
4199 typer.Option(
4200 "--filter",
4201 help="Filter pipeline: sway, none, or dedup-only.",
4202 ),
4203 ] = "sway",
4204 threshold: Annotated[
4205 float | None,
4206 typer.Option(
4207 "--threshold",
4208 help="Optional minimum sway-judge margin when --filter=sway.",
4209 min=0.0,
4210 ),
4211 ] = None,
4212 max_pairs: Annotated[
4213 int | None,
4214 typer.Option(
4215 "--max-pairs",
4216 help="Maximum accepted synth pairs to keep from this run.",
4217 min=1,
4218 ),
4219 ] = None,
4220 max_new_tokens: Annotated[
4221 int,
4222 typer.Option(
4223 "--max-new-tokens",
4224 help="Maximum new tokens the teacher may emit per prompt.",
4225 min=1,
4226 ),
4227 ] = 512,
4228 temp: Annotated[
4229 float,
4230 typer.Option("--temp", help="Teacher sampling temperature.", min=0.0),
4231 ] = 0.0,
4232 top_p: Annotated[
4233 float | None,
4234 typer.Option(
4235 "--top-p",
4236 help="Optional top-p cutoff for teacher sampling.",
4237 min=0.0,
4238 max=1.0,
4239 ),
4240 ] = None,
4241 seed: Annotated[
4242 int | None,
4243 typer.Option("--seed", help="Optional teacher sampling seed."),
4244 ] = None,
4245 apply: Annotated[
4246 bool,
4247 typer.Option(
4248 "--apply",
4249 help="Write accepted auto-synth sections directly to the .dlm.",
4250 ),
4251 ] = False,
4252 dry_run: Annotated[
4253 bool,
4254 typer.Option(
4255 "--dry-run",
4256 help="Preview the synth plan without staging or writing anything.",
4257 ),
4258 ] = False,
4259 ) -> None:
4260 """Generate, stage, or apply auto-synth instruction sections."""
4261 from rich.console import Console
4262
4263 from dlm.doc.errors import DlmParseError
4264 from dlm.doc.parser import parse_file
4265 from dlm.preference import JudgeUnavailableError, build_judge
4266 from dlm.store.paths import for_dlm
4267 from dlm.synth import (
4268 InvalidTeacherSpecError,
4269 TeacherInvocationError,
4270 TeacherUnavailableError,
4271 build_synth_plan,
4272 build_teacher,
4273 clear_pending_plan,
4274 filter_synth_plan,
4275 render_filter_report,
4276 render_synth_plan,
4277 save_pending_plan,
4278 )
4279 from dlm.synth import (
4280 apply_plan as apply_synth_plan,
4281 )
4282 from dlm.synth import (
4283 build_apply_plan as build_synth_apply_plan,
4284 )
4285 from dlm.synth import (
4286 render_apply_plan as render_synth_apply_plan,
4287 )
4288
4289 console = Console(stderr=True)
4290 out_console = Console()
4291
4292 if strategy not in ("extraction", "expansion", "both"):
4293 console.print(
4294 "[red]synth:[/red] --strategy must be one of extraction|expansion|both "
4295 f"(got {strategy!r})."
4296 )
4297 raise typer.Exit(code=2)
4298 if filter_kind not in ("sway", "none", "dedup-only"):
4299 console.print(
4300 f"[red]synth:[/red] --filter must be one of sway|none|dedup-only (got {filter_kind!r})."
4301 )
4302 raise typer.Exit(code=2)
4303 if apply and dry_run:
4304 console.print("[red]synth:[/red] --apply and --dry-run are mutually exclusive.")
4305 raise typer.Exit(code=2)
4306 if threshold is not None and filter_kind != "sway":
4307 console.print("[red]synth:[/red] --threshold is only valid when --filter is `sway`.")
4308 raise typer.Exit(code=2)
4309
4310 try:
4311 parsed = parse_file(path)
4312 except (DlmParseError, OSError) as exc:
4313 console.print(f"[red]synth:[/red] {exc}")
4314 raise typer.Exit(code=1) from exc
4315
4316 store = for_dlm(parsed.frontmatter.dlm_id)
4317
4318 try:
4319 strategy_value = cast(Literal["extraction", "expansion", "both"], strategy)
4320 teacher_obj = build_teacher(teacher, dlm_path=path)
4321 plan = build_synth_plan(
4322 parsed,
4323 teacher_obj,
4324 per_section=per_section,
4325 strategy=strategy_value,
4326 max_pairs=max_pairs,
4327 max_new_tokens=max_new_tokens,
4328 temperature=temp,
4329 top_p=top_p,
4330 seed=seed,
4331 )
4332 except InvalidTeacherSpecError as exc:
4333 console.print(f"[red]synth:[/red] {exc}")
4334 raise typer.Exit(code=2) from exc
4335 except TeacherUnavailableError as exc:
4336 console.print(f"[red]synth:[/red] {exc}")
4337 raise typer.Exit(code=1) from exc
4338 except TeacherInvocationError as exc:
4339 console.print(f"[red]synth:[/red] {exc}")
4340 raise typer.Exit(code=1) from exc
4341 except ValueError as exc:
4342 console.print(f"[red]synth:[/red] {exc}")
4343 raise typer.Exit(code=2) from exc
4344
4345 judge_obj = None
4346 if filter_kind == "sway":
4347 try:
4348 judge_obj = build_judge("sway", dlm_path=path)
4349 except JudgeUnavailableError as exc:
4350 console.print(f"[red]synth:[/red] {exc}")
4351 raise typer.Exit(code=1) from exc
4352
4353 try:
4354 filter_value = cast(Literal["sway", "none", "dedup-only"], filter_kind)
4355 filtered = filter_synth_plan(
4356 plan,
4357 filter_kind=filter_value,
4358 judge=judge_obj,
4359 threshold=threshold,
4360 )
4361 except ValueError as exc:
4362 console.print(f"[red]synth:[/red] {exc}")
4363 raise typer.Exit(code=2) from exc
4364
4365 out_console.print(render_synth_plan(plan))
4366 out_console.print("")
4367 out_console.print(render_filter_report(filtered))
4368
4369 if not filtered.additions:
4370 if not dry_run:
4371 clear_pending_plan(store)
4372 out_console.print(
4373 "\n[yellow]no synth additions accepted[/yellow] — either generation "
4374 "yielded no valid pairs, dedup removed them, or the filter rejected them."
4375 )
4376 raise typer.Exit(code=2)
4377
4378 sections = [addition.addition.section for addition in filtered.additions]
4379
4380 if apply:
4381 apply_plan = build_synth_apply_plan(parsed, sections)
4382 out_console.print("")
4383 out_console.print(render_synth_apply_plan(apply_plan))
4384 summary = apply_synth_plan(parsed, apply_plan, target=path)
4385 clear_pending_plan(store)
4386 out_console.print(
4387 f"\n[green]synth:[/green] wrote {summary.added} section(s) to {path} "
4388 f"({summary.skipped} skipped)"
4389 )
4390 return
4391
4392 if dry_run:
4393 out_console.print("\n[green]synth:[/green] dry-run only — nothing staged.")
4394 return
4395
4396 pending = save_pending_plan(store, source_path=path.resolve(), sections=sections)
4397 out_console.print(
4398 f"\n[green]synth:[/green] staged {len(pending.sections)} auto-synth instruction "
4399 f"section(s). Run [bold]dlm synth list {path}[/bold] to inspect them."
4400 )
4401
4402
4403 def synth_revert_cmd(
4404 path: Annotated[Path, typer.Argument(help=".dlm file to strip auto-synth instructions from.")],
4405 ) -> None:
4406 """Remove every `auto_synth: true` instruction section from the `.dlm`."""
4407 from rich.console import Console
4408
4409 from dlm.doc.errors import DlmParseError
4410 from dlm.doc.parser import parse_file
4411 from dlm.synth import revert_all_auto_synth
4412
4413 console = Console(stderr=True)
4414 out_console = Console()
4415
4416 try:
4417 parsed = parse_file(path)
4418 except (DlmParseError, OSError) as exc:
4419 console.print(f"[red]synth:[/red] {exc}")
4420 raise typer.Exit(code=1) from exc
4421
4422 summary = revert_all_auto_synth(parsed, target=path)
4423 out_console.print(
4424 f"[green]synth:[/green] stripped {len(summary.added_section_ids)} "
4425 f"auto-synth instruction section(s) from {path}"
4426 )
4427
4428
4429 def synth_list_cmd(
4430 path: Annotated[Path, typer.Argument(help=".dlm file whose auto-synth instructions we list.")],
4431 ) -> None:
4432 """List applied + staged auto-synth instruction sections."""
4433 from rich.console import Console
4434
4435 from dlm.doc.errors import DlmParseError
4436 from dlm.doc.parser import parse_file
4437 from dlm.doc.sections import SectionType
4438 from dlm.store.paths import for_dlm
4439 from dlm.synth import PendingSynthPlanError, load_pending_plan
4440
4441 console = Console(stderr=True)
4442 out_console = Console()
4443
4444 try:
4445 parsed = parse_file(path)
4446 except (DlmParseError, OSError) as exc:
4447 console.print(f"[red]synth:[/red] {exc}")
4448 raise typer.Exit(code=1) from exc
4449
4450 store = for_dlm(parsed.frontmatter.dlm_id)
4451 try:
4452 pending = load_pending_plan(store)
4453 except PendingSynthPlanError as exc:
4454 console.print(f"[red]synth:[/red] {exc}")
4455 raise typer.Exit(code=1) from exc
4456
4457 applied = [
4458 section
4459 for section in parsed.sections
4460 if section.type is SectionType.INSTRUCTION and section.auto_synth
4461 ]
4462
4463 out_console.print(f"[bold]{path}[/bold]")
4464 out_console.print(f" applied auto-synth: {len(applied)}")
4465 out_console.print(f" staged pending: {len(pending.sections) if pending else 0}")
4466
4467 if not applied and pending is None:
4468 out_console.print(" [dim]no auto-synth instruction sections yet[/dim]")
4469 return
4470
4471 if applied:
4472 _render_synth_listing(out_console, "Applied", applied)
4473 if pending is not None:
4474 _render_synth_listing(out_console, "Pending", pending.sections)
4475
4476
4477 def _render_synth_listing(
4478 out_console: object,
4479 heading: str,
4480 sections: Sequence[object],
4481 ) -> None:
4482 from collections import Counter
4483
4484 from rich.console import Console
4485
4486 from dlm.doc.sections import Section
4487
4488 assert isinstance(out_console, Console)
4489 typed_sections = [section for section in sections if isinstance(section, Section)]
4490
4491 out_console.print(f"\n[bold]{heading}[/bold]")
4492
4493 teacher_counts = Counter(section.synth_teacher or "unknown" for section in typed_sections)
4494 strategy_counts = Counter(section.synth_strategy or "unknown" for section in typed_sections)
4495 source_counts = Counter(section.source_section_id or "unknown" for section in typed_sections)
4496
4497 out_console.print(" by teacher:")
4498 for teacher_name in sorted(teacher_counts):
4499 out_console.print(f" - {teacher_name}: {teacher_counts[teacher_name]}")
4500
4501 out_console.print(" by strategy:")
4502 for strategy_name in sorted(strategy_counts):
4503 out_console.print(f" - {strategy_name}: {strategy_counts[strategy_name]}")
4504
4505 out_console.print(" by source section:")
4506 for source_id in sorted(source_counts):
4507 out_console.print(f" - {source_id}: {source_counts[source_id]}")
4508
4509 out_console.print(" sections:")
4510 for section in typed_sections:
4511 prompt = _synth_prompt_summary(section.content, section_id=section.section_id)
4512 out_console.print(
4513 " - "
4514 f"{section.section_id} teacher={section.synth_teacher or 'unknown'} "
4515 f"strategy={section.synth_strategy or 'unknown'} "
4516 f"source={section.source_section_id or 'unknown'} "
4517 f"prompt={prompt}"
4518 )
4519
4520
4521 def _synth_prompt_summary(content: str, *, section_id: str) -> str:
4522 """Best-effort prompt summary for `synth list`."""
4523 from dlm.data.errors import InstructionParseError
4524 from dlm.data.instruction_parser import parse_instruction_body
4525
4526 try:
4527 pairs = parse_instruction_body(content, section_id=section_id)
4528 except InstructionParseError:
4529 return "<unparseable>"
4530 if not pairs:
4531 return "<empty>"
4532 prompt = pairs[0].question.splitlines()[0].strip()
4533 return prompt or "<blank>"
4534
4535
4536 # --- harvest --------------------------------------------------------------
4537
4538
4539 def harvest_cmd(
4540 path: Annotated[Path, typer.Argument(help=".dlm file to harvest into.")],
4541 sway_json: Annotated[
4542 Path | None,
4543 typer.Option(
4544 "--sway-json",
4545 help="Path to a sway JSON report. Required unless --revert is set.",
4546 ),
4547 ] = None,
4548 apply: Annotated[
4549 bool,
4550 typer.Option(
4551 "--apply",
4552 help="Write harvested sections to the .dlm. Default is dry-run (review only).",
4553 ),
4554 ] = False,
4555 tag: Annotated[
4556 str,
4557 typer.Option(
4558 "--tag",
4559 help="Prefix for the synthesized section's harvest_source metadata.",
4560 ),
4561 ] = "auto-harvest",
4562 min_confidence: Annotated[
4563 float,
4564 typer.Option(
4565 "--min-confidence",
4566 help="Drop candidates whose sway evidence.confidence is below this.",
4567 min=0.0,
4568 max=1.0,
4569 ),
4570 ] = 0.0,
4571 strict: Annotated[
4572 bool,
4573 typer.Option(
4574 "--strict/--lax",
4575 help=(
4576 "Strict (default): refuse if any failing probe lacks a "
4577 "reference. Lax: log a warning and skip those probes."
4578 ),
4579 ),
4580 ] = True,
4581 revert: Annotated[
4582 bool,
4583 typer.Option(
4584 "--revert",
4585 help=(
4586 "Strip every auto-harvested section from the document. "
4587 "Mutually exclusive with --sway-json / --apply."
4588 ),
4589 ),
4590 ] = False,
4591 ) -> None:
4592 """Adversarial replay: harvest failing sway probes back into the .dlm.
4593
4594 Default mode is `--dry-run`-style preview; pass `--apply` to write.
4595 """
4596 from rich.console import Console
4597
4598 from dlm.doc.errors import DlmParseError
4599 from dlm.doc.parser import parse_file
4600 from dlm.harvest import (
4601 HarvestError,
4602 MalformedSwayReportError,
4603 NoReferenceError,
4604 apply_plan,
4605 build_plan,
4606 read_sway_report,
4607 render_plan,
4608 revert_all_auto_harvests,
4609 )
4610
4611 console = Console(stderr=True)
4612 out_console = Console()
4613
4614 if revert and (sway_json is not None or apply):
4615 console.print(
4616 "[red]harvest:[/red] --revert is mutually exclusive with --sway-json / --apply"
4617 )
4618 raise typer.Exit(code=1)
4619 if not revert and sway_json is None:
4620 console.print(
4621 "[red]harvest:[/red] --sway-json is required (or pass --revert "
4622 "to strip auto-harvested sections)"
4623 )
4624 raise typer.Exit(code=1)
4625
4626 try:
4627 parsed = parse_file(path)
4628 except (DlmParseError, OSError) as exc:
4629 console.print(f"[red]harvest:[/red] {exc}")
4630 raise typer.Exit(code=1) from exc
4631
4632 if revert:
4633 summary = revert_all_auto_harvests(parsed, target=path)
4634 out_console.print(
4635 f"[green]harvest:[/green] stripped {len(summary.added_section_ids)} "
4636 f"auto-harvested section(s) from {path} (all harvest runs, not just last)"
4637 )
4638 return
4639
4640 assert sway_json is not None # narrowed by the check above
4641 try:
4642 candidates = read_sway_report(
4643 sway_json,
4644 strict=strict,
4645 min_confidence=min_confidence,
4646 )
4647 except MalformedSwayReportError as exc:
4648 console.print(f"[red]harvest:[/red] {exc}")
4649 raise typer.Exit(code=1) from exc
4650 except NoReferenceError as exc:
4651 console.print(f"[red]harvest:[/red] {exc}")
4652 console.print(" Pass [bold]--lax[/bold] to skip probes without references instead.")
4653 raise typer.Exit(code=1) from exc
4654 except HarvestError as exc:
4655 console.print(f"[red]harvest:[/red] {exc}")
4656 raise typer.Exit(code=1) from exc
4657
4658 plan = build_plan(parsed, candidates, tag=tag)
4659 out_console.print(render_plan(plan))
4660
4661 if not plan.additions:
4662 out_console.print(
4663 "\n[yellow]no candidates to harvest[/yellow] — either the sway "
4664 "report had no failing probes with references, or all matched "
4665 "sections already exist in the document."
4666 )
4667 raise typer.Exit(code=2)
4668
4669 if not apply:
4670 out_console.print("\n[dim]dry-run — re-run with [bold]--apply[/bold] to write.[/dim]")
4671 return
4672
4673 summary = apply_plan(parsed, plan, target=path)
4674 out_console.print(
4675 f"\n[green]harvest:[/green] wrote {summary.added} section(s) to {path} "
4676 f"({summary.skipped} skipped)"
4677 )