Python · 52765 bytes Raw Blame History
1 """Command implementations for the ``sway`` CLI.
2
3 Each function here is wired to a subcommand in :mod:`dlm_sway.cli.app`.
4 Commands deliberately do as little as possible themselves — the real
5 work lives in :mod:`dlm_sway.suite`, :mod:`dlm_sway.backends`, and the
6 probes package.
7 """
8
9 from __future__ import annotations
10
11 import json
12 import sys
13 from enum import StrEnum
14 from pathlib import Path
15 from typing import Annotated, Any
16
17 import typer
18 from rich.console import Console
19
20 from dlm_sway import __version__
21 from dlm_sway.core.errors import SwayError
22 from dlm_sway.core.result import SuiteResult, SwayScore, Verdict
23
24
25 def run_cmd(
26 spec: Annotated[Path, typer.Argument(help="Path to a sway.yaml spec.")],
27 json_out: Annotated[
28 Path | None,
29 typer.Option(
30 "--json",
31 "-j",
32 help="Write the JSON report to this path in addition to the terminal render.",
33 ),
34 ] = None,
35 markdown_out: Annotated[
36 Path | None,
37 typer.Option("--markdown", "-m", help="Write a markdown report to this path."),
38 ] = None,
39 weights: Annotated[
40 str | None,
41 typer.Option(
42 "--weights",
43 help=(
44 "Override composite-score category weights. Format: "
45 "'adherence=0.4,attribution=0.3,calibration=0.2,ablation=0.1'. "
46 "Unspecified categories keep their defaults."
47 ),
48 ),
49 ] = None,
50 dry_run: Annotated[
51 bool,
52 typer.Option(
53 "--dry-run",
54 help=(
55 "Validate the spec, list the probes that would run with their "
56 "category, and exit 0 — no backend is built (D6)."
57 ),
58 ),
59 ] = False,
60 trace: Annotated[
61 Path | None,
62 typer.Option(
63 "--trace",
64 help=(
65 "Write a forward-pass trace (JSONL) to this path — one event "
66 "per backend scoring call with probe / view / cache-hit info. "
67 "Useful for perf investigation; zero overhead when unset."
68 ),
69 ),
70 ] = None,
71 ) -> None:
72 """Execute a suite and render a terminal report."""
73 if dry_run:
74 _print_dry_run(spec)
75 return
76 try:
77 weights_override = _parse_weights_flag(weights)
78 result, score_obj = _execute_spec(spec, weights_override=weights_override, trace_path=trace)
79 except SwayError as exc:
80 typer.secho(f"error: {exc}", err=True, fg=typer.colors.RED)
81 raise typer.Exit(code=2) from exc
82
83 from dlm_sway.suite import report
84
85 console = Console()
86 report.to_terminal(result, score_obj, console=console)
87
88 if json_out is not None:
89 json_out.write_text(report.to_json(result, score_obj), encoding="utf-8")
90 console.print(f"\n[dim]wrote JSON → {json_out}[/dim]")
91 if markdown_out is not None:
92 markdown_out.write_text(report.to_markdown(result, score_obj), encoding="utf-8")
93 console.print(f"[dim]wrote markdown → {markdown_out}[/dim]")
94
95
96 def _print_dry_run(spec_path: Path) -> None:
97 """D6: load + validate the spec, print the probe table, exit cleanly.
98
99 No backend construction — useful for fast feedback on spec edits
100 before paying for a model load.
101 """
102 from rich.table import Table
103
104 from dlm_sway.probes.base import build_probe, registry, validate_all_probes
105 from dlm_sway.suite.loader import load_spec
106
107 try:
108 spec = load_spec(spec_path)
109 validate_all_probes(spec.suite)
110 except SwayError as exc:
111 typer.secho(f"error: {exc}", err=True, fg=typer.colors.RED)
112 raise typer.Exit(code=2) from exc
113
114 console = Console()
115 console.print(f"[bold]dry-run for {spec_path}[/bold] — {len(spec.suite)} probe(s)")
116 console.print()
117
118 table = Table(show_header=True, header_style="bold", box=None, padding=(0, 1))
119 table.add_column("#", style="dim")
120 table.add_column("name", style="cyan")
121 table.add_column("kind")
122 table.add_column("category", style="dim")
123 table.add_column("enabled", style="dim")
124 registered = registry()
125 for idx, raw in enumerate(spec.suite, start=1):
126 probe, probe_spec = build_probe(raw)
127 cls = registered.get(probe.kind)
128 category = cls.category if cls is not None else "?"
129 table.add_row(
130 str(idx),
131 probe_spec.name,
132 probe.kind,
133 category,
134 "yes" if probe_spec.enabled else "no",
135 )
136 console.print(table)
137
138
139 def list_probes_cmd() -> None:
140 """List every shipped probe kind with its category + one-line summary (D6)."""
141 import sys
142
143 from rich.table import Table
144
145 # Make sure every probe module has been imported and registered.
146 import dlm_sway.probes # noqa: F401
147 from dlm_sway.probes.base import registry
148
149 table = Table(show_header=True, header_style="bold", box=None, padding=(0, 1))
150 table.add_column("kind", style="cyan")
151 table.add_column("category", style="dim")
152 table.add_column("summary")
153 for kind in sorted(registry()):
154 cls = registry()[kind]
155 # Prefer the class-level docstring, then fall back to the
156 # defining module's module-level docstring. Most probe modules
157 # lead with a solid one-liner at the top; the class body often
158 # skips a docstring to avoid repeating it.
159 summary = _first_doc_line(cls.__doc__)
160 if not summary:
161 module = sys.modules.get(cls.__module__)
162 summary = _first_doc_line(getattr(module, "__doc__", None))
163 table.add_row(kind, cls.category, summary)
164 Console().print(table)
165
166
167 def _first_doc_line(doc: str | None) -> str:
168 """Return the first non-empty line of ``doc``, stripped."""
169 if not doc:
170 return ""
171 for line in doc.splitlines():
172 stripped = line.strip()
173 if stripped:
174 return stripped
175 return ""
176
177
178 def gate_cmd(
179 spec: Annotated[Path, typer.Argument(help="Path to a sway.yaml spec.")],
180 junit_out: Annotated[
181 Path | None, typer.Option("--junit", help="Write JUnit XML for CI ingestion.")
182 ] = None,
183 coverage_threshold: Annotated[
184 float | None,
185 typer.Option(
186 "--threshold",
187 help="Override the spec's coverage_threshold. Exit non-zero below it.",
188 ),
189 ] = None,
190 weights: Annotated[
191 str | None,
192 typer.Option(
193 "--weights",
194 help=(
195 "Override composite-score category weights. Format: "
196 "'adherence=0.4,attribution=0.3,calibration=0.2,ablation=0.1'. "
197 "Unspecified categories keep their defaults."
198 ),
199 ),
200 ] = None,
201 ) -> None:
202 """Execute a suite and exit non-zero on failure (CI gate)."""
203 try:
204 weights_override = _parse_weights_flag(weights)
205 result, score_obj = _execute_spec(spec, weights_override=weights_override)
206 except SwayError as exc:
207 typer.secho(f"error: {exc}", err=True, fg=typer.colors.RED)
208 raise typer.Exit(code=2) from exc
209
210 from dlm_sway.suite import report
211 from dlm_sway.suite.loader import load_spec as _load_spec
212
213 console = Console()
214 report.to_terminal(result, score_obj, console=console)
215
216 if junit_out is not None:
217 junit_out.write_text(report.to_junit(result, score_obj), encoding="utf-8")
218 console.print(f"[dim]wrote JUnit → {junit_out}[/dim]")
219
220 threshold = (
221 coverage_threshold
222 if coverage_threshold is not None
223 else _load_spec(spec).defaults.coverage_threshold
224 )
225 has_failures = any(p.verdict == Verdict.FAIL for p in result.probes)
226 below_threshold = score_obj.overall < threshold
227 if has_failures or below_threshold:
228 console.print(
229 f"\n[red]gate FAILED[/red] — overall={score_obj.overall:.2f} < {threshold:.2f}"
230 if below_threshold
231 else "\n[red]gate FAILED[/red] — at least one probe reported FAIL"
232 )
233 raise typer.Exit(code=1)
234 console.print(f"\n[green]gate passed[/green] — overall={score_obj.overall:.2f}")
235
236
237 def _infer_base_from_adapter_config(adapter_dir: Path) -> str | None:
238 """Read ``base_model_name_or_path`` from ``adapter_config.json``.
239
240 Returns ``None`` when the file is missing, malformed, or doesn't
241 expose the field. Used by ``sway check`` to make ``--base`` optional
242 in the common case where PEFT already wrote the base id on training
243 (D4).
244 """
245 cfg_path = adapter_dir / "adapter_config.json"
246 if not cfg_path.exists():
247 return None
248 try:
249 data = json.loads(cfg_path.read_text(encoding="utf-8"))
250 except (OSError, json.JSONDecodeError):
251 return None
252 base = data.get("base_model_name_or_path")
253 if isinstance(base, str) and base:
254 return base
255 return None
256
257
258 def _check_banner(score_obj: SwayScore, result: SuiteResult) -> tuple[str, str]:
259 """Compute the (text, rich-style) check verdict banner (D12).
260
261 Calibrated on the delta_kl z-score: ≥3σ is green ("above noise"),
262 ≥1σ is yellow ("marginal"), and below that is red. When no z-score
263 is available (no null calibration ran), falls back to the raw
264 score band.
265 """
266 z = next(
267 (p.z_score for p in result.probes if p.kind == "delta_kl" and p.z_score is not None),
268 None,
269 )
270 if z is not None:
271 if z >= 3.0:
272 return f"✅ adapter is {z:+.2f}σ above noise", "bold green"
273 if z >= 1.0:
274 return f"⚠️ adapter is {z:+.2f}σ above noise — marginal", "bold yellow"
275 return f"❌ adapter is {z:+.2f}σ — indistinguishable from noise", "bold red"
276
277 # Fallback: composite score band.
278 if score_obj.overall >= 0.6:
279 return f"✅ adapter scored {score_obj.overall:.2f} — looks healthy", "bold green"
280 if score_obj.overall >= 0.3:
281 return f"⚠️ adapter scored {score_obj.overall:.2f} — partial fit", "bold yellow"
282 return f"❌ adapter scored {score_obj.overall:.2f} — noise band", "bold red"
283
284
285 def check_cmd(
286 adapter: Annotated[Path, typer.Argument(help="Path to a PEFT adapter directory.")],
287 base: Annotated[
288 str | None,
289 typer.Option(
290 "--base",
291 help=(
292 "HuggingFace base model id or local path. Inferred from "
293 "the adapter's ``adapter_config.json`` when omitted (D4)."
294 ),
295 ),
296 ] = None,
297 prompts: Annotated[
298 Path | None,
299 typer.Option(
300 "--prompts",
301 help="File with one prompt per line. Defaults to sway's built-in quick set.",
302 ),
303 ] = None,
304 ) -> None:
305 """<60s smoke test: "is this adapter doing anything at all?".
306
307 Runs A1 DeltaKL + C2 CalibrationDrift on a small prompt set. No
308 spec file required.
309
310 **Banner semantics (F20 clarification).** The ``+N.NNσ above noise``
311 header appears only when ``null_adapter`` actually calibrated this
312 run — i.e., when the backend implements ``NullCalibratedBackend``.
313 Without null calibration (non-HF backends like the HTTP API or MLX
314 inference), the banner falls back to the composite score band
315 ("healthy", "partial fit", "noise band") and the σ wording is
316 suppressed to avoid a false-precision claim.
317 """
318 from dlm_sway.backends import build as build_backend
319 from dlm_sway.core.model import ModelSpec
320 from dlm_sway.suite import report
321 from dlm_sway.suite.runner import run as run_suite
322 from dlm_sway.suite.score import compute as compute_score
323 from dlm_sway.suite.spec import SuiteDefaults, SuiteModels, SwaySpec
324
325 # D4: try to infer base model from adapter_config.json before
326 # erroring out on a missing --base.
327 if base is None:
328 inferred = _infer_base_from_adapter_config(adapter)
329 if inferred is None:
330 typer.secho(
331 f"error: --base not given and adapter at {adapter} doesn't carry a "
332 f"base_model_name_or_path in adapter_config.json. Pass --base "
333 f"explicitly.",
334 err=True,
335 fg=typer.colors.RED,
336 )
337 raise typer.Exit(code=2)
338 base = inferred
339 typer.secho(f"(inferred base model: {base})", err=True, fg=typer.colors.CYAN)
340
341 quick_prompts = _load_prompts(prompts) if prompts else _BUILTIN_QUICK_PROMPTS
342
343 base_spec = ModelSpec(base=base, kind="hf")
344 ft_spec = ModelSpec(base=base, kind="hf", adapter=adapter)
345 spec = SwaySpec(
346 version=1,
347 models=SuiteModels(base=base_spec, ft=ft_spec),
348 defaults=SuiteDefaults(seed=0),
349 suite=[
350 # S25: pre-run training-health check first. SKIPs cleanly
351 # when the adapter wasn't produced by dlm (no
352 # training_state.pt); FAILs loudly on severely-undertrained
353 # adapters with a banner before the rest of the output.
354 {
355 "name": "quick_gradient_ghost",
356 "kind": "gradient_ghost",
357 "adapter_path": str(adapter),
358 },
359 # Calibrate first so delta_kl can publish a z-score the
360 # banner reads off.
361 {"name": "quick_null", "kind": "null_adapter", "runs": 3},
362 {
363 "name": "quick_delta_kl",
364 "kind": "delta_kl",
365 "prompts": list(quick_prompts),
366 "assert_mean_gte": 0.01,
367 },
368 {
369 "name": "quick_calibration",
370 "kind": "calibration_drift",
371 "items_limit": 10,
372 },
373 ],
374 )
375 try:
376 backend = build_backend(ft_spec)
377 except SwayError as exc:
378 typer.secho(f"error: {exc}", err=True, fg=typer.colors.RED)
379 raise typer.Exit(code=2) from exc
380
381 try:
382 result = run_suite(spec, backend, spec_path="<check>")
383 finally:
384 _close_if_possible(backend)
385 score_obj = compute_score(result)
386
387 # D12: top-line banner before the full report so a user looking
388 # only at the first line still gets the verdict.
389 console = Console()
390
391 # S25 — pre-flight gradient_ghost banner. Fires BEFORE the verdict
392 # banner so the user sees "this adapter is undertrained" first;
393 # the rest of the check output stays for context (the user might
394 # still want to see how badly the other probes scored).
395 _emit_gradient_ghost_banner(result, console)
396
397 banner_text, banner_style = _check_banner(score_obj, result)
398 console.print()
399 console.print(banner_text, style=banner_style)
400 console.print()
401 report.to_terminal(result, score_obj, console=console)
402
403
404 def _emit_gradient_ghost_banner(result: object, console: Console) -> None:
405 """Print a yellow/red ⚠️ banner if gradient_ghost FAILed (S25 P6).
406
407 Reaches into ``result.probes`` for any probe with
408 ``kind=gradient_ghost`` and verdict FAIL. Informational — no
409 effect on exit code; the user might still want to inspect the
410 other probes' verdicts.
411 """
412 probes = getattr(result, "probes", ()) or ()
413 for p in probes:
414 if getattr(p, "kind", "") != "gradient_ghost":
415 continue
416 verdict_str = str(getattr(p, "verdict", "")).lower()
417 if verdict_str == "fail":
418 console.print()
419 console.print(
420 "⚠️ PRE-RUN ALERT — gradient_ghost flagged severe undertraining",
421 style="bold red",
422 )
423 msg = getattr(p, "message", "")
424 if msg:
425 console.print(f" {msg}", style="red")
426 console.print(
427 " The probe scores below may be unreliable. Consider retraining.",
428 style="dim red",
429 )
430 return
431 if verdict_str == "warn":
432 console.print()
433 console.print(
434 "⚠️ gradient_ghost: training may not have fully converged",
435 style="bold yellow",
436 )
437 msg = getattr(p, "message", "")
438 if msg:
439 console.print(f" {msg}", style="yellow")
440 return
441
442
443 def diff_cmd(
444 spec: Annotated[Path, typer.Argument(help="Path to a sway.yaml spec.")],
445 adapter_a: Annotated[Path, typer.Option("--a", help="First adapter path.")],
446 adapter_b: Annotated[Path, typer.Option("--b", help="Second adapter path.")],
447 ) -> None:
448 """Run the same suite against two adapters and show per-probe deltas."""
449 from dlm_sway.backends import build as build_backend
450 from dlm_sway.suite.loader import load_spec
451 from dlm_sway.suite.runner import run as run_suite
452 from dlm_sway.suite.score import compute as compute_score
453
454 sway_spec = load_spec(spec)
455 console = Console()
456
457 def _score_for(adapter_path: Path) -> tuple[float, dict[str, float]]:
458 ft_spec = sway_spec.models.ft.model_copy(update={"adapter": adapter_path})
459 backend = build_backend(ft_spec)
460 try:
461 result = run_suite(sway_spec, backend, spec_path=str(spec))
462 finally:
463 _close_if_possible(backend)
464 scored = compute_score(result)
465 per_probe = {p.name: (p.score or 0.0) for p in result.probes}
466 return scored.overall, per_probe
467
468 try:
469 overall_a, per_a = _score_for(adapter_a)
470 overall_b, per_b = _score_for(adapter_b)
471 except SwayError as exc:
472 typer.secho(f"error: {exc}", err=True, fg=typer.colors.RED)
473 raise typer.Exit(code=2) from exc
474
475 console.print(f"[bold]overall[/bold] A: {overall_a:.2f} B: {overall_b:.2f}")
476 console.print()
477 console.print("[bold]per-probe[/bold] (A → B, Δ):")
478 regressed_small = 0 # |Δ| > 0.10 in the wrong direction
479 regressed_large = 0 # |Δ| > 0.20 in the wrong direction
480 for name in sorted(per_a.keys() | per_b.keys()):
481 a = per_a.get(name, 0.0)
482 b = per_b.get(name, 0.0)
483 delta = b - a
484 sign = "+" if delta >= 0 else ""
485 console.print(f" {name:<30} {a:.2f}{b:.2f} ({sign}{delta:+.2f})")
486 if delta < -0.10:
487 regressed_small += 1
488 if delta < -0.20:
489 regressed_large += 1
490
491 # D13: regression summary line. The audit's example phrasing was
492 # "A→B: 3 probes regressed >0.10, 1 regressed >0.20, composite Δ=+0.02".
493 # Color cue tracks the composite delta: green for any improvement,
494 # red on regression, yellow on flat-with-regressions.
495 composite_delta = overall_b - overall_a
496 if composite_delta > 0.0:
497 summary_style = "bold green"
498 elif regressed_small or regressed_large:
499 summary_style = "bold red" if composite_delta < 0.0 else "bold yellow"
500 else:
501 summary_style = "dim"
502
503 console.print()
504 console.print(
505 f"A→B: {regressed_small} probe(s) regressed >0.10, "
506 f"{regressed_large} regressed >0.20, "
507 f"composite Δ={composite_delta:+.2f}",
508 style=summary_style,
509 )
510
511
512 def autogen_cmd(
513 dlm_path: Annotated[Path, typer.Argument(help="Path to a .dlm file.")],
514 out: Annotated[
515 Path,
516 typer.Option("--out", "-o", help="Where to write the generated sway.yaml."),
517 ] = Path("sway.yaml"),
518 ) -> None:
519 """Generate a sway.yaml from a .dlm file (requires the ``dlm-sway[dlm]`` extra)."""
520 import importlib
521
522 try:
523 autogen_mod = importlib.import_module("dlm_sway.integrations.dlm.autogen")
524 except ImportError as exc:
525 typer.secho(
526 "dlm integration not installed — run: pip install 'dlm-sway[dlm]'",
527 err=True,
528 fg=typer.colors.RED,
529 )
530 raise typer.Exit(code=2) from exc
531
532 try:
533 autogen_mod.write_sway_yaml(dlm_path, out)
534 except SwayError as exc:
535 typer.secho(f"error: {exc}", err=True, fg=typer.colors.RED)
536 raise typer.Exit(code=2) from exc
537
538 typer.echo(f"wrote {out}")
539
540
541 _DOCTOR_BACKENDS: dict[str, tuple[str, ...]] = {
542 "hf": ("torch", "transformers", "peft"),
543 "mlx": ("mlx", "mlx_lm"),
544 # ``sklearn`` is S16's cluster_kl dep; shipped under [semsim] so it
545 # rides the same 80 MB MiniLM load adapter_revert already pulls.
546 "semsim": ("sentence_transformers", "sklearn"),
547 "style": ("spacy", "textstat", "nlpaug"),
548 "dlm": ("dlm",),
549 # ``plotly`` is the load-bearing dep for ``sway report --format html``;
550 # S12 docs listed it but doctor never probed it before F04.
551 "viz": ("matplotlib", "plotly"),
552 # S13 API backend.
553 "api": ("httpx", "tenacity"),
554 "pytest": ("pytest",),
555 }
556
557
558 def _doctor_payload() -> dict[str, Any]:
559 """Build the JSON-friendly doctor payload (used by both render paths)."""
560 extras: dict[str, dict[str, str | None]] = {}
561 for extra, modules in _DOCTOR_BACKENDS.items():
562 extras[extra] = {mod: _module_version(mod) for mod in modules}
563 return {
564 "sway_version": __version__,
565 "python": sys.version.split()[0],
566 "platform": sys.platform,
567 "extras": extras,
568 }
569
570
571 def _module_version(name: str) -> str | None:
572 """Return the installed module's ``__version__`` string, or ``None``."""
573 import importlib
574
575 try:
576 mod = importlib.import_module(name)
577 except ImportError:
578 return None
579 return str(getattr(mod, "__version__", "installed"))
580
581
582 def doctor_cmd(
583 json_out: Annotated[
584 bool,
585 typer.Option(
586 "--json",
587 help=(
588 "Emit a machine-readable JSON payload instead of the rich "
589 "terminal layout (D7). CI-grep-friendly."
590 ),
591 ),
592 ] = False,
593 ) -> None:
594 """Print backend availability and version info."""
595 payload = _doctor_payload()
596 if json_out:
597 typer.echo(json.dumps(payload, indent=2, sort_keys=True))
598 return
599
600 console = Console()
601 console.print(f"[bold]sway[/bold] {payload['sway_version']}")
602 console.print(f" python: {payload['python']}")
603 console.print(f" platform: {payload['platform']}")
604 console.print()
605 console.print("[bold]backends[/bold]")
606 for extra, modules in payload["extras"].items():
607 parts = []
608 for mod, ver in modules.items():
609 if ver is None:
610 parts.append(f"[red]{mod}: missing[/red]")
611 else:
612 parts.append(f"[green]{mod}: {ver}[/green]")
613 console.print(f" {extra:<8} {' '.join(parts)}")
614
615
616 class ReportFormat(StrEnum):
617 """Allowed values for ``sway report --format`` (D11).
618
619 Typer enforces the enum at parse time, so unknown formats produce
620 a clear ``Invalid value`` error instead of silently falling back
621 to the terminal renderer.
622 """
623
624 TERMINAL = "terminal"
625 MARKDOWN = "md"
626 MARKDOWN_LONG = "markdown" # alias kept for muscle memory
627 JUNIT = "junit"
628 JSON = "json"
629 HTML = "html"
630
631
632 def report_cmd(
633 result_json: Annotated[Path, typer.Argument(help="Path to a saved result JSON.")],
634 format: Annotated[
635 ReportFormat,
636 typer.Option(
637 "--format",
638 help="Output format: terminal, md (alias: markdown), junit, json, or html.",
639 ),
640 ] = ReportFormat.TERMINAL,
641 out: Annotated[
642 Path | None,
643 typer.Option(
644 "--out",
645 "-o",
646 help=(
647 "Write the rendered output to this path instead of stdout. "
648 "Required for --format html (Plotly's inlined JS is ~3 MB)."
649 ),
650 ),
651 ] = None,
652 ) -> None:
653 """Re-render a previously saved run (for history tracking / dashboards).
654
655 The CLI deserializes the JSON back into the canonical
656 ``(SuiteResult, SwayScore)`` pair via :func:`report.from_json`,
657 then routes through the same renderers as a fresh ``sway run``.
658 Single source for every format keeps terminal / md / junit /
659 json / html output identical regardless of where they came from (B16).
660 """
661 from dlm_sway.suite import report
662
663 raw: dict[str, Any] = json.loads(result_json.read_text(encoding="utf-8"))
664
665 if format is ReportFormat.JSON:
666 # Pass-through: the saved file *is* the canonical JSON. Re-emit
667 # via to_json against the round-tripped pair so any schema
668 # additions land consistently.
669 suite, score = report.from_json(raw)
670 _emit(report.to_json(suite, score), out)
671 return
672
673 suite, score = report.from_json(raw)
674 if format in (ReportFormat.MARKDOWN, ReportFormat.MARKDOWN_LONG):
675 _emit(report.to_markdown(suite, score), out)
676 return
677 if format is ReportFormat.JUNIT:
678 _emit(report.to_junit(suite, score), out)
679 return
680 if format is ReportFormat.HTML:
681 try:
682 from dlm_sway.suite import report_html
683 except ImportError as exc: # pragma: no cover — graceful install hint
684 typer.echo(f"sway report --format html: {exc}", err=True)
685 raise typer.Exit(code=2) from exc
686 try:
687 html_text = report_html.to_html(suite, score)
688 except RuntimeError as exc:
689 typer.echo(f"sway report --format html: {exc}", err=True)
690 raise typer.Exit(code=2) from exc
691 if out is None:
692 # Refuse to dump 3 MB of HTML to stdout by default — the
693 # user almost always wants a file.
694 typer.echo(
695 "sway report --format html requires --out PATH "
696 "(Plotly JS bundle is ~3 MB; stdout is not an HTML viewer)",
697 err=True,
698 )
699 raise typer.Exit(code=2)
700 out.write_text(html_text, encoding="utf-8")
701 typer.echo(f"wrote HTML → {out}", err=True)
702 return
703 # ReportFormat.TERMINAL.
704 if out is not None:
705 typer.echo(
706 "sway report --format terminal does not support --out; "
707 "use --format md or --format html for file output.",
708 err=True,
709 )
710 raise typer.Exit(code=2)
711 report.to_terminal(suite, score, console=Console())
712
713
714 def _emit(text: str, out: Path | None) -> None:
715 """Either write to the target path or ``typer.echo`` to stdout."""
716 if out is None:
717 typer.echo(text)
718 else:
719 out.write_text(text, encoding="utf-8")
720 typer.echo(f"wrote {out}", err=True)
721
722
723 class CompareFormat(StrEnum):
724 """Allowed values for ``sway compare --format``."""
725
726 TERMINAL = "terminal"
727 MARKDOWN = "md"
728 MARKDOWN_LONG = "markdown" # alias kept for muscle memory
729 JSON = "json"
730
731
732 def compare_cmd(
733 result_jsons: Annotated[
734 list[Path],
735 typer.Argument(help="Two or more saved result JSONs, in chronological order."),
736 ],
737 format: Annotated[
738 CompareFormat,
739 typer.Option(
740 "--format",
741 help="Output format: terminal, md (alias: markdown), or json.",
742 ),
743 ] = CompareFormat.TERMINAL,
744 fail_on_regression: Annotated[
745 float,
746 typer.Option(
747 "--fail-on-regression",
748 help=(
749 "Exit non-zero when any probe's score in the newest run dropped "
750 "by ≥ this threshold vs the previous run. 0 disables the gate."
751 ),
752 ),
753 ] = 0.0,
754 ) -> None:
755 """Compare N saved runs side-by-side (regression dashboard).
756
757 Rehydrates each JSON via :func:`report.from_json`, folds the runs
758 into a :class:`CompareMatrix`, and renders the score table + delta
759 columns + composite timeline. Intended for CI: point at a history
760 directory (``sway-history/*.json``) and pipe the output into the
761 build's log, or set ``--fail-on-regression`` to make the build red
762 on a real drop.
763 """
764 from dlm_sway.suite import compare, report
765
766 if len(result_jsons) < 2:
767 typer.echo("sway compare: need at least two result JSONs", err=True)
768 raise typer.Exit(code=2)
769
770 pairs: list[tuple[SuiteResult, SwayScore]] = []
771 labels: list[str] = []
772 for path in result_jsons:
773 try:
774 raw: dict[str, Any] = json.loads(path.read_text(encoding="utf-8"))
775 except (OSError, json.JSONDecodeError) as exc:
776 typer.echo(f"sway compare: cannot read {path}: {exc}", err=True)
777 raise typer.Exit(code=2) from exc
778 pairs.append(report.from_json(raw))
779 # Short label — the filename without the ``.json`` suffix.
780 labels.append(path.stem)
781
782 matrix = compare.build_matrix(pairs, labels=labels)
783
784 if format is CompareFormat.JSON:
785 typer.echo(compare.render_json(matrix, regression_threshold=fail_on_regression))
786 elif format in (CompareFormat.MARKDOWN, CompareFormat.MARKDOWN_LONG):
787 typer.echo(compare.render_markdown(matrix, regression_threshold=fail_on_regression))
788 else:
789 compare.render_terminal(
790 matrix,
791 console=Console(),
792 regression_threshold=fail_on_regression,
793 )
794
795 # Exit-code gate: any probe whose last-run delta is ≤ -threshold is a
796 # regression. ``fail_on_regression=0`` disables the gate entirely.
797 if fail_on_regression > 0.0:
798 regressions = matrix.latest_regressions(fail_on_regression)
799 if regressions:
800 raise typer.Exit(code=1)
801
802
803 class TraceFormat(StrEnum):
804 """Allowed values for ``sway trace --format``."""
805
806 TERMINAL = "terminal"
807 MARKDOWN = "md"
808 MARKDOWN_LONG = "markdown" # alias kept for muscle memory
809 JSON = "json"
810
811
812 def trace_cmd(
813 trace_file: Annotated[
814 Path,
815 typer.Argument(
816 help=("Path to a forward-pass trace JSONL produced by `sway run --trace <path>`."),
817 ),
818 ],
819 format: Annotated[
820 TraceFormat,
821 typer.Option(
822 "--format",
823 help="Output format: terminal, md (alias: markdown), or json.",
824 ),
825 ] = TraceFormat.TERMINAL,
826 slowest: Annotated[
827 int,
828 typer.Option(
829 "--slowest",
830 help="How many slowest-events rows to show. 0 hides that table.",
831 ),
832 ] = 10,
833 ) -> None:
834 """Analyze a forward-pass trace JSONL.
835
836 Reads the per-event JSONL `sway run --trace` writes, aggregates
837 into per-probe + per-view summaries, and surfaces the top-N
838 slowest events. Intended for suite-performance investigation:
839 point at a captured trace and see which probe × view pair
840 dominated wall time, whether the S07 cache helped, and which
841 individual prompts took the longest.
842 """
843 from dlm_sway.suite import trace_analysis
844
845 try:
846 events = trace_analysis.load(trace_file)
847 except OSError as exc:
848 typer.echo(f"sway trace: cannot read {trace_file}: {exc}", err=True)
849 raise typer.Exit(code=2) from exc
850 if not events:
851 typer.echo(f"sway trace: no events in {trace_file}", err=True)
852 raise typer.Exit(code=1)
853
854 report = trace_analysis.build_report(events, slowest_k=max(0, slowest))
855
856 if format is TraceFormat.JSON:
857 typer.echo(trace_analysis.render_json(report))
858 elif format in (TraceFormat.MARKDOWN, TraceFormat.MARKDOWN_LONG):
859 typer.echo(trace_analysis.render_markdown(report))
860 else:
861 trace_analysis.render_terminal(report, console=Console())
862
863
864 class MineMode(StrEnum):
865 """``sway mine`` operates in one of two modes."""
866
867 PARAPHRASE = "paraphrase"
868 OUTLIERS = "outliers"
869
870
871 def mine_cmd(
872 spec: Annotated[Path, typer.Argument(help="Path to a sway.yaml spec.")],
873 mode: Annotated[
874 MineMode,
875 typer.Option(
876 "--mode",
877 help=(
878 "``paraphrase``: sharpen every paraphrase_invariance case with mined "
879 "adversarial paraphrases. ``outliers``: rank the spec's delta_kl prompts "
880 "(or a corpus-derived pool) by per-prompt raw."
881 ),
882 ),
883 ] = MineMode.PARAPHRASE,
884 out: Annotated[
885 Path | None,
886 typer.Option(
887 "--out",
888 "-o",
889 help=(
890 "Where to write the mined YAML fragment. Defaults to "
891 "``sway-mined-<mode>.yaml`` in the current directory."
892 ),
893 ),
894 ] = None,
895 top_k: Annotated[
896 int,
897 typer.Option(
898 "--top-k", help="Keep the top-K candidates per case (paraphrase) or pool (outliers)."
899 ),
900 ] = 10,
901 n_candidates: Annotated[
902 int,
903 typer.Option(
904 "--n-candidates",
905 help=(
906 "Paraphrase mode only — generate this many raw candidates before the "
907 "diversity filter. Higher = more coverage at more wall-time cost."
908 ),
909 ),
910 ] = 50,
911 from_corpus: Annotated[
912 str | None,
913 typer.Option(
914 "--from-corpus",
915 help=(
916 "Outliers mode — draw the candidate pool from a packaged corpus "
917 "(``public_domain_en``) instead of the spec's own prompts."
918 ),
919 ),
920 ] = None,
921 seed: Annotated[
922 int,
923 typer.Option(
924 "--seed",
925 help=(
926 "Seed for the generator + probe RNGs. Keep fixed to reproduce a "
927 "previous mining run — nlpaug's synonym and back-translation picks "
928 "are deterministic under this seed."
929 ),
930 ),
931 ] = 0,
932 ) -> None:
933 """Mine adversarial paraphrases or outlier prompts from a spec.
934
935 **Paraphrase mode** (``--mode paraphrase``). For each
936 ``paraphrase_invariance`` case in the spec, generate candidate
937 paraphrases, diversity-filter them, and rank by the gap between
938 verbatim and paraphrased lift. The emitted YAML fragment contains
939 updated ``cases:`` that you can paste over the originals in your
940 spec — a memorizing adapter that passed the hand-written list will
941 typically fail the mined list.
942
943 **Outliers mode** (``--mode outliers``). Rank the spec's
944 ``delta_kl`` prompt pool (or a corpus-derived pool via
945 ``--from-corpus``) by per-prompt raw divergence. Emitted fragment
946 lists top-K and bottom-K prompts, split into two blocks.
947
948 The mined output is paste-compatible with the spec loader — no
949 schema bumps. Re-run ``sway gate`` after merging the mined list
950 to confirm the gate's behavior changed as expected.
951 """
952 import yaml
953
954 from dlm_sway.mining.outlier_miner import corpus_prompts, mine_outliers
955 from dlm_sway.mining.paraphrase_miner import mine_paraphrases
956 from dlm_sway.suite.loader import load_spec
957
958 loaded_spec = load_spec(spec)
959 out_path = out or Path(f"sway-mined-{mode.value}.yaml")
960
961 # Materialize the backend. Reuses ``_execute_spec``'s factory so the
962 # HF / API / MLX selection matches what ``sway run`` would do.
963 from dlm_sway.backends import build as build_backend
964
965 backend = build_backend(loaded_spec.models.ft)
966
967 if mode is MineMode.PARAPHRASE:
968 payload = _mine_paraphrase_payload(
969 loaded_spec,
970 backend,
971 mine_paraphrases,
972 top_k=top_k,
973 n_candidates=n_candidates,
974 seed=seed,
975 )
976 else:
977 candidate_pool = (
978 corpus_prompts(from_corpus) if from_corpus else _collect_delta_kl_prompts(loaded_spec)
979 )
980 if not candidate_pool:
981 typer.secho(
982 "sway mine --outliers: no candidate prompts found. Either add delta_kl "
983 "prompts to the spec or pass --from-corpus.",
984 err=True,
985 fg=typer.colors.RED,
986 )
987 raise typer.Exit(code=2)
988 result = mine_outliers(
989 probe_kind="delta_kl",
990 candidate_prompts=candidate_pool,
991 backend=backend,
992 top_k=top_k,
993 seed=seed,
994 )
995 payload = _outlier_result_to_yaml(result)
996
997 out_path.write_text(yaml.safe_dump(payload, sort_keys=False), encoding="utf-8")
998 typer.echo(f"wrote {out_path}")
999
1000
1001 def _mine_paraphrase_payload(
1002 spec: Any,
1003 backend: Any,
1004 miner: Any,
1005 *,
1006 top_k: int,
1007 n_candidates: int,
1008 seed: int,
1009 ) -> dict[str, Any]:
1010 """Run paraphrase mining on every paraphrase_invariance entry; shape into YAML."""
1011 out_cases: list[dict[str, Any]] = []
1012 for entry in spec.suite:
1013 if entry.get("kind") != "paraphrase_invariance":
1014 continue
1015 for case in entry.get("cases", []):
1016 prompt = case.get("prompt")
1017 gold = case.get("gold")
1018 if not prompt or not gold:
1019 continue
1020 mined = miner(
1021 prompt=prompt,
1022 gold=gold,
1023 backend=backend,
1024 n_candidates=n_candidates,
1025 top_k=top_k,
1026 seed=seed,
1027 )
1028 out_cases.append(
1029 {
1030 "prompt": mined.seed_prompt,
1031 "gold": mined.gold,
1032 "paraphrases": [c.prompt for c in mined.candidates],
1033 "_mining_meta": {
1034 "top_gaps": [round(c.gap, 6) for c in mined.candidates],
1035 "verbatim_lift": (
1036 round(mined.candidates[0].verbatim_lift, 6)
1037 if mined.candidates
1038 else None
1039 ),
1040 },
1041 }
1042 )
1043 return {"mined_cases": out_cases}
1044
1045
1046 def _collect_delta_kl_prompts(spec: Any) -> list[str]:
1047 """Pull every ``delta_kl`` entry's prompt pool into one flat list."""
1048 seen: set[str] = set()
1049 out: list[str] = []
1050 for entry in spec.suite:
1051 if entry.get("kind") != "delta_kl":
1052 continue
1053 for p in entry.get("prompts", []):
1054 if p not in seen:
1055 seen.add(p)
1056 out.append(p)
1057 return out
1058
1059
1060 def _outlier_result_to_yaml(result: Any) -> dict[str, Any]:
1061 """Format an :class:`OutlierResult` as a YAML-friendly dict."""
1062 return {
1063 "mined_outliers": {
1064 "probe_kind": result.probe_kind,
1065 "top": [
1066 {"prompt": c.prompt, "raw": round(c.raw, 6), "index": c.index} for c in result.top
1067 ],
1068 "bottom": [
1069 {"prompt": c.prompt, "raw": round(c.raw, 6), "index": c.index}
1070 for c in result.bottom
1071 ],
1072 }
1073 }
1074
1075
1076 # -- helpers -----------------------------------------------------------
1077
1078
1079 _BUILTIN_QUICK_PROMPTS: tuple[str, ...] = (
1080 "The quick brown fox",
1081 "Once upon a time",
1082 "The answer to the question is",
1083 "One important lesson is",
1084 "In my opinion,",
1085 "The first step is to",
1086 "Remember that",
1087 "A common mistake is",
1088 )
1089
1090
1091 def _load_prompts(path: Path) -> tuple[str, ...]:
1092 return tuple(
1093 line.strip() for line in path.read_text(encoding="utf-8").splitlines() if line.strip()
1094 )
1095
1096
1097 def _execute_spec(
1098 path: Path,
1099 *,
1100 weights_override: dict[str, float] | None = None,
1101 trace_path: Path | None = None,
1102 ) -> tuple[SuiteResult, SwayScore]:
1103 """Load a spec, build a backend, run the suite, fold scores. Shared
1104 by ``run`` and ``gate``. Picks up .dlm-derived sections when the
1105 spec's ``dlm_source`` is set.
1106
1107 ``weights_override`` takes precedence over ``spec.defaults.score_weights``
1108 (which itself takes precedence over the compile-time defaults). The
1109 CLI hands through ``--weights k=v,k=v`` via this parameter.
1110 """
1111 from dlm_sway.backends import build as build_backend
1112 from dlm_sway.backends import build_two_separate
1113 from dlm_sway.probes.base import validate_all_probes
1114 from dlm_sway.suite.loader import load_spec
1115 from dlm_sway.suite.runner import run as run_suite
1116 from dlm_sway.suite.score import compute as compute_score
1117
1118 spec = load_spec(path)
1119 # B7: validate every probe entry before paying the cost of loading
1120 # a backend. A user with a typo in `kind:` shouldn't wait minutes
1121 # for the model to download just to learn they spelled the probe
1122 # name wrong.
1123 validate_all_probes(spec.suite)
1124 sections = None
1125 doc_text = None
1126 if spec.dlm_source is not None:
1127 import importlib
1128
1129 try:
1130 resolver = importlib.import_module("dlm_sway.integrations.dlm.resolver")
1131 handle = resolver.resolve_dlm(Path(spec.dlm_source))
1132 sections = handle.sections
1133 doc_text = handle.doc_text
1134 except ImportError:
1135 # D8: don't silently swallow. The user wrote ``dlm_source``
1136 # in their YAML expecting the bridge to populate sections;
1137 # warn loudly so they know why downstream attribution
1138 # probes are SKIPping.
1139 typer.secho(
1140 f"warning: spec sets dlm_source={spec.dlm_source!r} but the "
1141 f"[dlm] extra is not installed — sections not provided "
1142 f"(pip install 'dlm-sway[dlm]')",
1143 err=True,
1144 fg=typer.colors.YELLOW,
1145 )
1146 sections = None
1147 except SwayError as exc:
1148 # The bridge imported but failed (no adapter, malformed
1149 # .dlm, etc). Same surface — warn, don't crash the suite.
1150 typer.secho(
1151 f"warning: dlm_source={spec.dlm_source!r} did not resolve: {exc}",
1152 err=True,
1153 fg=typer.colors.YELLOW,
1154 )
1155 sections = None
1156 if spec.defaults.differential:
1157 backend: Any = build_backend(spec.models.ft)
1158 else:
1159 backend = build_two_separate(spec.models)
1160 try:
1161 result = run_suite(
1162 spec,
1163 backend,
1164 spec_path=str(path),
1165 sections=sections,
1166 doc_text=doc_text,
1167 trace_path=trace_path,
1168 )
1169 finally:
1170 _close_if_possible(backend)
1171 effective_weights = weights_override or spec.defaults.score_weights
1172 score_obj = compute_score(result, weights=effective_weights)
1173 return result, score_obj
1174
1175
1176 def _parse_weights_flag(raw: str | None) -> dict[str, float] | None:
1177 """Parse ``--weights k=v,k=v`` into a dict; pydantic validates on use.
1178
1179 Returns ``None`` when the flag is empty / unset. Pydantic's
1180 ``SuiteDefaults._validate_weights`` is re-invoked indirectly via
1181 ``SwayScore`` — so any unknown category or negative value surfaces
1182 the same error whether set in YAML or on the command line.
1183 """
1184 if not raw:
1185 return None
1186 out: dict[str, float] = {}
1187 for pair in raw.split(","):
1188 pair = pair.strip()
1189 if not pair:
1190 continue
1191 if "=" not in pair:
1192 raise typer.BadParameter(
1193 f"--weights: expected 'key=value' pairs, got {pair!r}. "
1194 "Example: --weights adherence=0.4,attribution=0.3"
1195 )
1196 key, _, value = pair.partition("=")
1197 key = key.strip()
1198 try:
1199 out[key] = float(value.strip())
1200 except ValueError as exc:
1201 raise typer.BadParameter(f"--weights: {value!r} for {key!r} is not a number") from exc
1202 return out or None
1203
1204
1205 def _close_if_possible(backend: object) -> None:
1206 close = getattr(backend, "close", None)
1207 if callable(close):
1208 close()
1209
1210
1211 # --- convert-adapter (S24, F01) ------------------------------------------
1212
1213
1214 class ConvertTarget(StrEnum):
1215 MLX = "mlx"
1216
1217
1218 def convert_adapter_cmd(
1219 src: Annotated[Path, typer.Argument(help="PEFT adapter directory to convert.")],
1220 dst: Annotated[Path, typer.Argument(help="Output directory for the converted adapter.")],
1221 target: Annotated[
1222 ConvertTarget,
1223 typer.Option("--target", help="Output adapter format. Currently only 'mlx'."),
1224 ] = ConvertTarget.MLX,
1225 overwrite: Annotated[
1226 bool,
1227 typer.Option("--overwrite", help="Replace any existing adapter at dst."),
1228 ] = False,
1229 ) -> None:
1230 """Convert a PEFT LoRA adapter to another backend's format.
1231
1232 Today the only target is ``mlx`` — converts ``adapter_model.safetensors`` +
1233 ``adapter_config.json`` (PEFT) to ``adapters.safetensors`` +
1234 ``adapter_config.json`` (mlx-lm). Closes the F01 doc-vs-code gap so
1235 the MLX backend works on dlm-trained / any PEFT-trained adapters
1236 without manual conversion.
1237 """
1238 from dlm_sway.backends._mlx_convert import MlxConvertError, convert_peft_to_mlx
1239
1240 if target is not ConvertTarget.MLX:
1241 raise typer.BadParameter(f"unsupported target {target!r}")
1242 try:
1243 report = convert_peft_to_mlx(src, dst, overwrite=overwrite)
1244 except MlxConvertError as exc:
1245 typer.secho(f"convert-adapter: {exc}", fg=typer.colors.RED, err=True)
1246 raise typer.Exit(code=1) from exc
1247 except SwayError as exc:
1248 typer.secho(f"convert-adapter: {exc}", fg=typer.colors.RED, err=True)
1249 raise typer.Exit(code=1) from exc
1250
1251 src_kb = report["src_bytes"] / 1024
1252 dst_kb = report["dst_bytes"] / 1024
1253 typer.echo(
1254 f"converted: {src}{dst} rank={report['rank']} "
1255 f"scale={report['scale']:.3f} num_keys={report['num_keys']} "
1256 f"({src_kb:.1f} KB → {dst_kb:.1f} KB)"
1257 )
1258 if report["modules_to_save_skipped"]:
1259 typer.secho(
1260 f"warning: {len(report['modules_to_save_skipped'])} modules_to_save tensor(s) "
1261 f"skipped (mlx-lm's LoRA loader doesn't apply full-weight overrides). "
1262 f"Sample: {report['modules_to_save_skipped'][:1]!r}",
1263 fg=typer.colors.YELLOW,
1264 err=True,
1265 )
1266
1267
1268 # --- pack / unpack (S26, X3) ----------------------------------------------
1269
1270
1271 def pack_cmd(
1272 spec_path: Annotated[Path, typer.Argument(help="Path to a sway.yaml to pack.")],
1273 out: Annotated[
1274 Path | None,
1275 typer.Option(
1276 "--out",
1277 "-o",
1278 help="Output tarball path. Defaults to <spec-stem>.swaypack.tar.gz next to spec.",
1279 ),
1280 ] = None,
1281 include_golden: Annotated[
1282 Path | None,
1283 typer.Option(
1284 "--include-golden",
1285 help=(
1286 "Path to a JSON sway-run report (from `sway run --json` or "
1287 "`sway report --format json`) to bundle for verification."
1288 ),
1289 ),
1290 ] = None,
1291 include_null_cache: Annotated[
1292 bool,
1293 typer.Option(
1294 "--include-null-cache/--no-include-null-cache",
1295 help=(
1296 "Bundle ~/.dlm-sway/null-stats/*.json into the pack so the "
1297 "consumer doesn't need to re-calibrate. Default: include."
1298 ),
1299 ),
1300 ] = True,
1301 max_size_mb: Annotated[
1302 int,
1303 typer.Option(
1304 "--max-size-mb",
1305 help="Refuse to write a pack larger than this (default 50 MB).",
1306 ),
1307 ] = 50,
1308 ) -> None:
1309 """Bundle a spec + its inputs + null-stats cache into a portable swaypack tarball.
1310
1311 The result is a single ``.swaypack.tar.gz`` you can share with a
1312 coworker or check into a release repo. The receiver runs
1313 ``sway unpack <pack>`` and then ``sway run`` against the unpacked
1314 spec — identical verdict to the original run, no live dlm or
1315 network needed.
1316 """
1317 from dlm_sway.cli._pack import PackError, pack_spec
1318
1319 if out is None:
1320 out = spec_path.with_suffix("").with_name(
1321 f"{spec_path.with_suffix('').name}.swaypack.tar.gz"
1322 )
1323
1324 try:
1325 report = pack_spec(
1326 spec_path,
1327 out_path=out,
1328 include_golden=include_golden,
1329 include_null_cache=include_null_cache,
1330 max_size_bytes=max_size_mb * 1024 * 1024,
1331 )
1332 except PackError as exc:
1333 typer.secho(f"pack: {exc}", fg=typer.colors.RED, err=True)
1334 raise typer.Exit(code=1) from exc
1335 except SwayError as exc:
1336 typer.secho(f"pack: {exc}", fg=typer.colors.RED, err=True)
1337 raise typer.Exit(code=1) from exc
1338
1339 size_kb = report.size_bytes / 1024
1340 typer.echo(
1341 f"wrote {report.out_path} ({size_kb:.1f} KB) "
1342 f"sections={report.section_bytes}b "
1343 f"null_stats={report.null_stats_count} "
1344 f"golden={'yes' if report.golden_included else 'no'}"
1345 )
1346
1347
1348 def unpack_cmd(
1349 pack_path: Annotated[Path, typer.Argument(help="Path to a *.swaypack.tar.gz.")],
1350 out: Annotated[
1351 Path | None,
1352 typer.Option(
1353 "--out",
1354 "-o",
1355 help="Parent directory to extract into. Default: cwd.",
1356 ),
1357 ] = None,
1358 ) -> None:
1359 """Extract a swaypack into ``out``; print the next ``sway run`` invocation.
1360
1361 The pack lands at ``<out>/swaypack/`` containing ``sway.yaml`` plus
1362 bundled artifacts. A ready-to-run command line is printed at the
1363 end including the ``SWAY_NULL_CACHE_DIR=...`` env var that
1364 redirects null-stats lookups at the bundled cache instead of the
1365 user's home directory.
1366 """
1367 from dlm_sway.cli._unpack import UnpackError, unpack_swaypack
1368
1369 target_dir = out if out is not None else Path.cwd()
1370 try:
1371 report = unpack_swaypack(pack_path, target_dir=target_dir)
1372 except UnpackError as exc:
1373 typer.secho(f"unpack: {exc}", fg=typer.colors.RED, err=True)
1374 raise typer.Exit(code=1) from exc
1375 except SwayError as exc:
1376 typer.secho(f"unpack: {exc}", fg=typer.colors.RED, err=True)
1377 raise typer.Exit(code=1) from exc
1378
1379 typer.echo(f"extracted: {report.out_dir}")
1380 typer.echo(f" spec_path: {report.spec_path}")
1381 typer.echo(f" null_stats: {report.null_stats_dir or '(none in pack)'}")
1382 typer.echo(f" swaypack_version: {report.manifest.get('swaypack_version')}")
1383 typer.echo(f" packed_at: {report.manifest.get('packed_at')}")
1384 typer.echo("")
1385 typer.echo("To run the bundled spec:")
1386 if report.null_stats_dir is not None:
1387 typer.echo(f" SWAY_NULL_CACHE_DIR={report.null_stats_dir} sway run {report.spec_path}")
1388 else:
1389 typer.echo(f" sway run {report.spec_path}")
1390
1391
1392 def serve_cmd(
1393 host: Annotated[
1394 str,
1395 typer.Option(
1396 "--host",
1397 help=(
1398 "Interface to bind. Default 127.0.0.1 (localhost only). "
1399 "Binding to 0.0.0.0 requires --api-key."
1400 ),
1401 ),
1402 ] = "127.0.0.1",
1403 port: Annotated[
1404 int,
1405 typer.Option("--port", help="TCP port to bind."),
1406 ] = 8787,
1407 max_loaded_models: Annotated[
1408 int,
1409 typer.Option(
1410 "--max-loaded-models",
1411 help=(
1412 "How many backends to keep warm in memory. Each loaded "
1413 "model holds its own VRAM/RAM; default 2 fits a 16 GB GPU "
1414 "with two ~1.5B fp16 adapters."
1415 ),
1416 ),
1417 ] = 2,
1418 api_key: Annotated[
1419 str | None,
1420 typer.Option(
1421 "--api-key",
1422 help=(
1423 "Bearer token required on every non-/health request. "
1424 "Required when --host is not loopback."
1425 ),
1426 ),
1427 ] = None,
1428 log_level: Annotated[
1429 str,
1430 typer.Option("--log-level", help="uvicorn log level."),
1431 ] = "info",
1432 ) -> None:
1433 """Run the warm-backend HTTP daemon (S36).
1434
1435 First call loads the backend (~15s); subsequent calls reuse it
1436 (~2s). See ``sway run`` for the equivalent one-shot CLI.
1437 """
1438 try:
1439 import uvicorn # noqa: F401 — presence check
1440 except ImportError as exc:
1441 typer.secho(
1442 "sway serve requires the [serve] extra: pip install 'dlm-sway[serve]'",
1443 fg=typer.colors.RED,
1444 err=True,
1445 )
1446 raise typer.Exit(code=2) from exc
1447
1448 from dlm_sway.serve.app import create_app, parse_host_port
1449 from dlm_sway.serve.cache import BackendCache
1450
1451 # Public-bind safety — refuse before any uvicorn startup work.
1452 loopback = host in ("127.0.0.1", "::1", "localhost")
1453 if not loopback and api_key is None:
1454 typer.secho(
1455 f"refusing to bind {host}:{port} without --api-key. "
1456 "Either pass --api-key <key> or use --host 127.0.0.1.",
1457 fg=typer.colors.RED,
1458 err=True,
1459 )
1460 raise typer.Exit(code=2)
1461
1462 parse_host_port(host, port)
1463 if max_loaded_models < 1:
1464 typer.secho("--max-loaded-models must be >= 1", fg=typer.colors.RED, err=True)
1465 raise typer.Exit(code=2)
1466
1467 cache = BackendCache(max_size=max_loaded_models)
1468 app = create_app(cache=cache, api_key=api_key)
1469
1470 typer.echo(f"sway serve {__version__} listening on http://{host}:{port}")
1471 typer.echo(f" max_loaded_models={max_loaded_models} auth={'yes' if api_key else 'no'}")
1472 if not loopback:
1473 typer.secho(
1474 " WARNING: bound to a non-loopback interface — anyone on "
1475 "this network with the API key can drive your GPU.",
1476 fg=typer.colors.YELLOW,
1477 )
1478
1479 import uvicorn as _uvicorn
1480
1481 _uvicorn.run(app, host=host, port=port, log_level=log_level)