Add synth CLI staging flow
- SHA
dd38498b41bba7bdcaf0b85daafe5659ae312cef- Parents
-
2b41f63 - Tree
38a764a
dd38498
dd38498b41bba7bdcaf0b85daafe5659ae312cef2b41f63
38a764a| Status | File | + | - |
|---|---|---|---|
| M |
docs/cli/reference.md
|
46 | 0 |
| M |
src/dlm/cli/app.py
|
11 | 0 |
| M |
src/dlm/cli/commands.py
|
373 | 2 |
| M |
src/dlm/synth/__init__.py
|
32 | 0 |
| A |
src/dlm/synth/apply.py
|
163 | 0 |
| A |
src/dlm/synth/pending.py
|
202 | 0 |
| M |
tests/integration/cli/test_reference_doc_parity.py
|
21 | 0 |
| A |
tests/unit/cli/test_synth_cmd.py
|
372 | 0 |
docs/cli/reference.mdmodified@@ -201,6 +201,52 @@ plan; `dlm preference apply` writes the staged plan into the `.dlm`, | ||
| 201 | 201 | section, and `dlm preference list` shows both applied and staged |
| 202 | 202 | sections. |
| 203 | 203 | |
| 204 | +### `dlm synth` | |
| 205 | + | |
| 206 | +Synthesize instruction or preference training data (Sprint 43). | |
| 207 | + | |
| 208 | +``` | |
| 209 | +dlm synth instructions <path> [--teacher T] [--per-section N] | |
| 210 | + [--strategy {extraction,expansion,both}] | |
| 211 | + [--filter {sway,none,dedup-only}] | |
| 212 | + [--threshold F] [--max-pairs N] | |
| 213 | + [--max-new-tokens N] [--temp F] [--top-p F] | |
| 214 | + [--seed N] [--apply | --dry-run] | |
| 215 | +dlm synth preferences <path> [--samples N] [--judge J] [--threshold F] | |
| 216 | + [--max-pairs N] [--temp F] [--top-p F] | |
| 217 | + [--backend {auto,pytorch,mlx}] [--adapter NAME] | |
| 218 | + [--apply] | |
| 219 | +dlm synth revert <path> | |
| 220 | +dlm synth list <path> | |
| 221 | +``` | |
| 222 | + | |
| 223 | +| Option | Default | Notes | | |
| 224 | +|---|---|---| | |
| 225 | +| `--teacher T` | `self` | Teacher selector: `self`, `hf:<model>`, `openai:<model>`, `anthropic:<model>`, or `vllm-server:<url>`. | | |
| 226 | +| `--per-section N` | `3` | Accepted instruction pairs to request per prose section before filtering. | | |
| 227 | +| `--strategy {extraction,expansion,both}` | `extraction` | `extraction` asks for questions answered directly by the prose, `expansion` extrapolates beyond it, and `both` splits the per-section budget across both prompts. | | |
| 228 | +| `--filter {sway,none,dedup-only}` | `sway` | Filter pipeline after generation. `sway` reuses Sprint 42's judge, `dedup-only` keeps near-duplicate suppression but skips judging, `none` accepts every deduped pair. | | |
| 229 | +| `--threshold F` | judge default | Minimum sway-judge margin. Only valid with `--filter sway`. | | |
| 230 | +| `--max-pairs N` | unlimited | Cap the number of accepted synth pairs from one invocation. | | |
| 231 | +| `--max-new-tokens N` | `512` | Teacher-side completion cap per prompt. | | |
| 232 | +| `--temp F` | `0.0` | Teacher sampling temperature. | | |
| 233 | +| `--top-p F` | None | Optional top-p cutoff for teacher sampling. | | |
| 234 | +| `--seed N` | None | Optional teacher sampling seed. | | |
| 235 | +| `--apply` | false | Write accepted auto-synth `::instruction::` sections directly to the `.dlm`. | | |
| 236 | +| `--dry-run` | false | Preview the synth plan without staging or writing anything. Default behavior stages the accepted plan under the store for inspection via `dlm synth list`. | | |
| 237 | + | |
| 238 | +`dlm synth instructions` prints the raw synth plan, then the filter | |
| 239 | +summary (`generated`, `dedup`, `judge passed`, `threshold/accepted`). | |
| 240 | +Without `--apply` or `--dry-run`, the accepted auto-synth | |
| 241 | +`::instruction::` sections are staged under the store root so `dlm | |
| 242 | +synth list` can show them before a later rerun. `dlm synth revert` | |
| 243 | +strips every `auto_synth: true` instruction section from the document. | |
| 244 | + | |
| 245 | +`dlm synth preferences` is an alias over `dlm preference mine` for the | |
| 246 | +same Sprint 42 preference-mining loop. Use it when you want the | |
| 247 | +umbrella synth surface but the output should be `::preference::` | |
| 248 | +sections instead of `::instruction::` sections. | |
| 249 | + | |
| 204 | 250 | ### `dlm templates` |
| 205 | 251 | |
| 206 | 252 | Browse the starter template gallery (Sprint 27). |
src/dlm/cli/app.pymodified@@ -115,6 +115,17 @@ app.command("show")(commands.show_cmd) | ||
| 115 | 115 | app.command("migrate")(commands.migrate_cmd) |
| 116 | 116 | app.command("harvest")(commands.harvest_cmd) |
| 117 | 117 | |
| 118 | +# `dlm synth instructions|preferences|revert|list` — synthetic data loop. | |
| 119 | +_synth_app = typer.Typer( | |
| 120 | + help="Synthesize instruction or preference training data.", | |
| 121 | + no_args_is_help=True, | |
| 122 | +) | |
| 123 | +_synth_app.command("instructions")(commands.synth_instructions_cmd) | |
| 124 | +_synth_app.command("preferences")(commands.preference_mine_cmd) | |
| 125 | +_synth_app.command("revert")(commands.synth_revert_cmd) | |
| 126 | +_synth_app.command("list")(commands.synth_list_cmd) | |
| 127 | +app.add_typer(_synth_app, name="synth") | |
| 128 | + | |
| 118 | 129 | # `dlm preference mine|apply|revert|list` — auto-mined preference loop. |
| 119 | 130 | _preference_app = typer.Typer( |
| 120 | 131 | help="Mine, stage, apply, and inspect auto-mined preference sections.", |
src/dlm/cli/commands.pymodified@@ -11,8 +11,9 @@ which is why `src/dlm/cli/commands.py` has a ruff per-file-ignore for | ||
| 11 | 11 | from __future__ import annotations |
| 12 | 12 | |
| 13 | 13 | import os |
| 14 | +from collections.abc import Sequence | |
| 14 | 15 | from pathlib import Path |
| 15 | -from typing import TYPE_CHECKING, Annotated, Any, Literal | |
| 16 | +from typing import TYPE_CHECKING, Annotated, Any, Literal, cast | |
| 16 | 17 | |
| 17 | 18 | import typer |
| 18 | 19 | |
@@ -1665,7 +1666,6 @@ def export_cmd( | ||
| 1665 | 1666 | ] = False, |
| 1666 | 1667 | ) -> None: |
| 1667 | 1668 | """Export the adapter to a runtime target.""" |
| 1668 | - from collections.abc import Sequence | |
| 1669 | 1669 | |
| 1670 | 1670 | from rich.console import Console |
| 1671 | 1671 | |
@@ -4135,6 +4135,377 @@ def _preference_prompt_summary(content: str, *, section_id: str) -> str: | ||
| 4135 | 4135 | return prompt or "<blank>" |
| 4136 | 4136 | |
| 4137 | 4137 | |
| 4138 | +# --- synth ----------------------------------------------------------------- | |
| 4139 | + | |
| 4140 | + | |
| 4141 | +def synth_instructions_cmd( | |
| 4142 | + path: Annotated[ | |
| 4143 | + Path, typer.Argument(help=".dlm file to synthesize instruction sections from.") | |
| 4144 | + ], | |
| 4145 | + teacher: Annotated[ | |
| 4146 | + str, | |
| 4147 | + typer.Option( | |
| 4148 | + "--teacher", | |
| 4149 | + help=( | |
| 4150 | + "Teacher selector: self, hf:<model>, openai:<model>, " | |
| 4151 | + "anthropic:<model>, or vllm-server:<url>." | |
| 4152 | + ), | |
| 4153 | + ), | |
| 4154 | + ] = "self", | |
| 4155 | + per_section: Annotated[ | |
| 4156 | + int, | |
| 4157 | + typer.Option( | |
| 4158 | + "--per-section", | |
| 4159 | + help="Instruction pairs to generate per prose section.", | |
| 4160 | + min=1, | |
| 4161 | + ), | |
| 4162 | + ] = 3, | |
| 4163 | + strategy: Annotated[ | |
| 4164 | + str, | |
| 4165 | + typer.Option( | |
| 4166 | + "--strategy", | |
| 4167 | + help="Synthesis strategy: extraction, expansion, or both.", | |
| 4168 | + ), | |
| 4169 | + ] = "extraction", | |
| 4170 | + filter_kind: Annotated[ | |
| 4171 | + str, | |
| 4172 | + typer.Option( | |
| 4173 | + "--filter", | |
| 4174 | + help="Filter pipeline: sway, none, or dedup-only.", | |
| 4175 | + ), | |
| 4176 | + ] = "sway", | |
| 4177 | + threshold: Annotated[ | |
| 4178 | + float | None, | |
| 4179 | + typer.Option( | |
| 4180 | + "--threshold", | |
| 4181 | + help="Optional minimum sway-judge margin when --filter=sway.", | |
| 4182 | + min=0.0, | |
| 4183 | + ), | |
| 4184 | + ] = None, | |
| 4185 | + max_pairs: Annotated[ | |
| 4186 | + int | None, | |
| 4187 | + typer.Option( | |
| 4188 | + "--max-pairs", | |
| 4189 | + help="Maximum accepted synth pairs to keep from this run.", | |
| 4190 | + min=1, | |
| 4191 | + ), | |
| 4192 | + ] = None, | |
| 4193 | + max_new_tokens: Annotated[ | |
| 4194 | + int, | |
| 4195 | + typer.Option( | |
| 4196 | + "--max-new-tokens", | |
| 4197 | + help="Maximum new tokens the teacher may emit per prompt.", | |
| 4198 | + min=1, | |
| 4199 | + ), | |
| 4200 | + ] = 512, | |
| 4201 | + temp: Annotated[ | |
| 4202 | + float, | |
| 4203 | + typer.Option("--temp", help="Teacher sampling temperature.", min=0.0), | |
| 4204 | + ] = 0.0, | |
| 4205 | + top_p: Annotated[ | |
| 4206 | + float | None, | |
| 4207 | + typer.Option( | |
| 4208 | + "--top-p", | |
| 4209 | + help="Optional top-p cutoff for teacher sampling.", | |
| 4210 | + min=0.0, | |
| 4211 | + max=1.0, | |
| 4212 | + ), | |
| 4213 | + ] = None, | |
| 4214 | + seed: Annotated[ | |
| 4215 | + int | None, | |
| 4216 | + typer.Option("--seed", help="Optional teacher sampling seed."), | |
| 4217 | + ] = None, | |
| 4218 | + apply: Annotated[ | |
| 4219 | + bool, | |
| 4220 | + typer.Option( | |
| 4221 | + "--apply", | |
| 4222 | + help="Write accepted auto-synth sections directly to the .dlm.", | |
| 4223 | + ), | |
| 4224 | + ] = False, | |
| 4225 | + dry_run: Annotated[ | |
| 4226 | + bool, | |
| 4227 | + typer.Option( | |
| 4228 | + "--dry-run", | |
| 4229 | + help="Preview the synth plan without staging or writing anything.", | |
| 4230 | + ), | |
| 4231 | + ] = False, | |
| 4232 | +) -> None: | |
| 4233 | + """Generate, stage, or apply auto-synth instruction sections.""" | |
| 4234 | + from rich.console import Console | |
| 4235 | + | |
| 4236 | + from dlm.doc.errors import DlmParseError | |
| 4237 | + from dlm.doc.parser import parse_file | |
| 4238 | + from dlm.preference import JudgeUnavailableError, build_judge | |
| 4239 | + from dlm.store.paths import for_dlm | |
| 4240 | + from dlm.synth import ( | |
| 4241 | + InvalidTeacherSpecError, | |
| 4242 | + TeacherInvocationError, | |
| 4243 | + TeacherUnavailableError, | |
| 4244 | + build_synth_plan, | |
| 4245 | + build_teacher, | |
| 4246 | + clear_pending_plan, | |
| 4247 | + filter_synth_plan, | |
| 4248 | + render_filter_report, | |
| 4249 | + render_synth_plan, | |
| 4250 | + save_pending_plan, | |
| 4251 | + ) | |
| 4252 | + from dlm.synth import ( | |
| 4253 | + apply_plan as apply_synth_plan, | |
| 4254 | + ) | |
| 4255 | + from dlm.synth import ( | |
| 4256 | + build_apply_plan as build_synth_apply_plan, | |
| 4257 | + ) | |
| 4258 | + from dlm.synth import ( | |
| 4259 | + render_apply_plan as render_synth_apply_plan, | |
| 4260 | + ) | |
| 4261 | + | |
| 4262 | + console = Console(stderr=True) | |
| 4263 | + out_console = Console() | |
| 4264 | + | |
| 4265 | + if strategy not in ("extraction", "expansion", "both"): | |
| 4266 | + console.print( | |
| 4267 | + "[red]synth:[/red] --strategy must be one of extraction|expansion|both " | |
| 4268 | + f"(got {strategy!r})." | |
| 4269 | + ) | |
| 4270 | + raise typer.Exit(code=2) | |
| 4271 | + if filter_kind not in ("sway", "none", "dedup-only"): | |
| 4272 | + console.print( | |
| 4273 | + f"[red]synth:[/red] --filter must be one of sway|none|dedup-only (got {filter_kind!r})." | |
| 4274 | + ) | |
| 4275 | + raise typer.Exit(code=2) | |
| 4276 | + if apply and dry_run: | |
| 4277 | + console.print("[red]synth:[/red] --apply and --dry-run are mutually exclusive.") | |
| 4278 | + raise typer.Exit(code=2) | |
| 4279 | + if threshold is not None and filter_kind != "sway": | |
| 4280 | + console.print("[red]synth:[/red] --threshold is only valid when --filter is `sway`.") | |
| 4281 | + raise typer.Exit(code=2) | |
| 4282 | + | |
| 4283 | + try: | |
| 4284 | + parsed = parse_file(path) | |
| 4285 | + except (DlmParseError, OSError) as exc: | |
| 4286 | + console.print(f"[red]synth:[/red] {exc}") | |
| 4287 | + raise typer.Exit(code=1) from exc | |
| 4288 | + | |
| 4289 | + store = for_dlm(parsed.frontmatter.dlm_id) | |
| 4290 | + | |
| 4291 | + try: | |
| 4292 | + strategy_value = cast(Literal["extraction", "expansion", "both"], strategy) | |
| 4293 | + teacher_obj = build_teacher(teacher, dlm_path=path) | |
| 4294 | + plan = build_synth_plan( | |
| 4295 | + parsed, | |
| 4296 | + teacher_obj, | |
| 4297 | + per_section=per_section, | |
| 4298 | + strategy=strategy_value, | |
| 4299 | + max_pairs=max_pairs, | |
| 4300 | + max_new_tokens=max_new_tokens, | |
| 4301 | + temperature=temp, | |
| 4302 | + top_p=top_p, | |
| 4303 | + seed=seed, | |
| 4304 | + ) | |
| 4305 | + except InvalidTeacherSpecError as exc: | |
| 4306 | + console.print(f"[red]synth:[/red] {exc}") | |
| 4307 | + raise typer.Exit(code=2) from exc | |
| 4308 | + except TeacherUnavailableError as exc: | |
| 4309 | + console.print(f"[red]synth:[/red] {exc}") | |
| 4310 | + raise typer.Exit(code=1) from exc | |
| 4311 | + except TeacherInvocationError as exc: | |
| 4312 | + console.print(f"[red]synth:[/red] {exc}") | |
| 4313 | + raise typer.Exit(code=1) from exc | |
| 4314 | + except ValueError as exc: | |
| 4315 | + console.print(f"[red]synth:[/red] {exc}") | |
| 4316 | + raise typer.Exit(code=2) from exc | |
| 4317 | + | |
| 4318 | + judge_obj = None | |
| 4319 | + if filter_kind == "sway": | |
| 4320 | + try: | |
| 4321 | + judge_obj = build_judge("sway", dlm_path=path) | |
| 4322 | + except JudgeUnavailableError as exc: | |
| 4323 | + console.print(f"[red]synth:[/red] {exc}") | |
| 4324 | + raise typer.Exit(code=1) from exc | |
| 4325 | + | |
| 4326 | + try: | |
| 4327 | + filter_value = cast(Literal["sway", "none", "dedup-only"], filter_kind) | |
| 4328 | + filtered = filter_synth_plan( | |
| 4329 | + plan, | |
| 4330 | + filter_kind=filter_value, | |
| 4331 | + judge=judge_obj, | |
| 4332 | + threshold=threshold, | |
| 4333 | + ) | |
| 4334 | + except ValueError as exc: | |
| 4335 | + console.print(f"[red]synth:[/red] {exc}") | |
| 4336 | + raise typer.Exit(code=2) from exc | |
| 4337 | + | |
| 4338 | + out_console.print(render_synth_plan(plan)) | |
| 4339 | + out_console.print("") | |
| 4340 | + out_console.print(render_filter_report(filtered)) | |
| 4341 | + | |
| 4342 | + if not filtered.additions: | |
| 4343 | + if not dry_run: | |
| 4344 | + clear_pending_plan(store) | |
| 4345 | + out_console.print( | |
| 4346 | + "\n[yellow]no synth additions accepted[/yellow] — either generation " | |
| 4347 | + "yielded no valid pairs, dedup removed them, or the filter rejected them." | |
| 4348 | + ) | |
| 4349 | + raise typer.Exit(code=2) | |
| 4350 | + | |
| 4351 | + sections = [addition.addition.section for addition in filtered.additions] | |
| 4352 | + | |
| 4353 | + if apply: | |
| 4354 | + apply_plan = build_synth_apply_plan(parsed, sections) | |
| 4355 | + out_console.print("") | |
| 4356 | + out_console.print(render_synth_apply_plan(apply_plan)) | |
| 4357 | + summary = apply_synth_plan(parsed, apply_plan, target=path) | |
| 4358 | + clear_pending_plan(store) | |
| 4359 | + out_console.print( | |
| 4360 | + f"\n[green]synth:[/green] wrote {summary.added} section(s) to {path} " | |
| 4361 | + f"({summary.skipped} skipped)" | |
| 4362 | + ) | |
| 4363 | + return | |
| 4364 | + | |
| 4365 | + if dry_run: | |
| 4366 | + out_console.print("\n[green]synth:[/green] dry-run only — nothing staged.") | |
| 4367 | + return | |
| 4368 | + | |
| 4369 | + pending = save_pending_plan(store, source_path=path.resolve(), sections=sections) | |
| 4370 | + out_console.print( | |
| 4371 | + f"\n[green]synth:[/green] staged {len(pending.sections)} auto-synth instruction " | |
| 4372 | + f"section(s). Run [bold]dlm synth list {path}[/bold] to inspect them." | |
| 4373 | + ) | |
| 4374 | + | |
| 4375 | + | |
| 4376 | +def synth_revert_cmd( | |
| 4377 | + path: Annotated[Path, typer.Argument(help=".dlm file to strip auto-synth instructions from.")], | |
| 4378 | +) -> None: | |
| 4379 | + """Remove every `auto_synth: true` instruction section from the `.dlm`.""" | |
| 4380 | + from rich.console import Console | |
| 4381 | + | |
| 4382 | + from dlm.doc.errors import DlmParseError | |
| 4383 | + from dlm.doc.parser import parse_file | |
| 4384 | + from dlm.synth import revert_all_auto_synth | |
| 4385 | + | |
| 4386 | + console = Console(stderr=True) | |
| 4387 | + out_console = Console() | |
| 4388 | + | |
| 4389 | + try: | |
| 4390 | + parsed = parse_file(path) | |
| 4391 | + except (DlmParseError, OSError) as exc: | |
| 4392 | + console.print(f"[red]synth:[/red] {exc}") | |
| 4393 | + raise typer.Exit(code=1) from exc | |
| 4394 | + | |
| 4395 | + summary = revert_all_auto_synth(parsed, target=path) | |
| 4396 | + out_console.print( | |
| 4397 | + f"[green]synth:[/green] stripped {len(summary.added_section_ids)} " | |
| 4398 | + f"auto-synth instruction section(s) from {path}" | |
| 4399 | + ) | |
| 4400 | + | |
| 4401 | + | |
| 4402 | +def synth_list_cmd( | |
| 4403 | + path: Annotated[Path, typer.Argument(help=".dlm file whose auto-synth instructions we list.")], | |
| 4404 | +) -> None: | |
| 4405 | + """List applied + staged auto-synth instruction sections.""" | |
| 4406 | + from rich.console import Console | |
| 4407 | + | |
| 4408 | + from dlm.doc.errors import DlmParseError | |
| 4409 | + from dlm.doc.parser import parse_file | |
| 4410 | + from dlm.doc.sections import SectionType | |
| 4411 | + from dlm.store.paths import for_dlm | |
| 4412 | + from dlm.synth import PendingSynthPlanError, load_pending_plan | |
| 4413 | + | |
| 4414 | + console = Console(stderr=True) | |
| 4415 | + out_console = Console() | |
| 4416 | + | |
| 4417 | + try: | |
| 4418 | + parsed = parse_file(path) | |
| 4419 | + except (DlmParseError, OSError) as exc: | |
| 4420 | + console.print(f"[red]synth:[/red] {exc}") | |
| 4421 | + raise typer.Exit(code=1) from exc | |
| 4422 | + | |
| 4423 | + store = for_dlm(parsed.frontmatter.dlm_id) | |
| 4424 | + try: | |
| 4425 | + pending = load_pending_plan(store) | |
| 4426 | + except PendingSynthPlanError as exc: | |
| 4427 | + console.print(f"[red]synth:[/red] {exc}") | |
| 4428 | + raise typer.Exit(code=1) from exc | |
| 4429 | + | |
| 4430 | + applied = [ | |
| 4431 | + section | |
| 4432 | + for section in parsed.sections | |
| 4433 | + if section.type is SectionType.INSTRUCTION and section.auto_synth | |
| 4434 | + ] | |
| 4435 | + | |
| 4436 | + out_console.print(f"[bold]{path}[/bold]") | |
| 4437 | + out_console.print(f" applied auto-synth: {len(applied)}") | |
| 4438 | + out_console.print(f" staged pending: {len(pending.sections) if pending else 0}") | |
| 4439 | + | |
| 4440 | + if not applied and pending is None: | |
| 4441 | + out_console.print(" [dim]no auto-synth instruction sections yet[/dim]") | |
| 4442 | + return | |
| 4443 | + | |
| 4444 | + if applied: | |
| 4445 | + _render_synth_listing(out_console, "Applied", applied) | |
| 4446 | + if pending is not None: | |
| 4447 | + _render_synth_listing(out_console, "Pending", pending.sections) | |
| 4448 | + | |
| 4449 | + | |
| 4450 | +def _render_synth_listing( | |
| 4451 | + out_console: object, | |
| 4452 | + heading: str, | |
| 4453 | + sections: Sequence[object], | |
| 4454 | +) -> None: | |
| 4455 | + from collections import Counter | |
| 4456 | + | |
| 4457 | + from rich.console import Console | |
| 4458 | + | |
| 4459 | + from dlm.doc.sections import Section | |
| 4460 | + | |
| 4461 | + assert isinstance(out_console, Console) | |
| 4462 | + typed_sections = [section for section in sections if isinstance(section, Section)] | |
| 4463 | + | |
| 4464 | + out_console.print(f"\n[bold]{heading}[/bold]") | |
| 4465 | + | |
| 4466 | + teacher_counts = Counter(section.synth_teacher or "unknown" for section in typed_sections) | |
| 4467 | + strategy_counts = Counter(section.synth_strategy or "unknown" for section in typed_sections) | |
| 4468 | + source_counts = Counter(section.source_section_id or "unknown" for section in typed_sections) | |
| 4469 | + | |
| 4470 | + out_console.print(" by teacher:") | |
| 4471 | + for teacher_name in sorted(teacher_counts): | |
| 4472 | + out_console.print(f" - {teacher_name}: {teacher_counts[teacher_name]}") | |
| 4473 | + | |
| 4474 | + out_console.print(" by strategy:") | |
| 4475 | + for strategy_name in sorted(strategy_counts): | |
| 4476 | + out_console.print(f" - {strategy_name}: {strategy_counts[strategy_name]}") | |
| 4477 | + | |
| 4478 | + out_console.print(" by source section:") | |
| 4479 | + for source_id in sorted(source_counts): | |
| 4480 | + out_console.print(f" - {source_id}: {source_counts[source_id]}") | |
| 4481 | + | |
| 4482 | + out_console.print(" sections:") | |
| 4483 | + for section in typed_sections: | |
| 4484 | + prompt = _synth_prompt_summary(section.content, section_id=section.section_id) | |
| 4485 | + out_console.print( | |
| 4486 | + " - " | |
| 4487 | + f"{section.section_id} teacher={section.synth_teacher or 'unknown'} " | |
| 4488 | + f"strategy={section.synth_strategy or 'unknown'} " | |
| 4489 | + f"source={section.source_section_id or 'unknown'} " | |
| 4490 | + f"prompt={prompt}" | |
| 4491 | + ) | |
| 4492 | + | |
| 4493 | + | |
| 4494 | +def _synth_prompt_summary(content: str, *, section_id: str) -> str: | |
| 4495 | + """Best-effort prompt summary for `synth list`.""" | |
| 4496 | + from dlm.data.errors import InstructionParseError | |
| 4497 | + from dlm.data.instruction_parser import parse_instruction_body | |
| 4498 | + | |
| 4499 | + try: | |
| 4500 | + pairs = parse_instruction_body(content, section_id=section_id) | |
| 4501 | + except InstructionParseError: | |
| 4502 | + return "<unparseable>" | |
| 4503 | + if not pairs: | |
| 4504 | + return "<empty>" | |
| 4505 | + prompt = pairs[0].question.splitlines()[0].strip() | |
| 4506 | + return prompt or "<blank>" | |
| 4507 | + | |
| 4508 | + | |
| 4138 | 4509 | # --- harvest -------------------------------------------------------------- |
| 4139 | 4510 | |
| 4140 | 4511 | |
src/dlm/synth/__init__.pymodified@@ -1,5 +1,16 @@ | ||
| 1 | 1 | """Sprint 43 synthetic-instruction generation substrate.""" |
| 2 | 2 | |
| 3 | +from dlm.synth.apply import ( | |
| 4 | + PlannedSynthAddition, | |
| 5 | + SkippedSynthAddition, | |
| 6 | + SynthApplyPlan, | |
| 7 | + SynthApplySkipReason, | |
| 8 | + SynthApplySummary, | |
| 9 | + apply_plan, | |
| 10 | + build_apply_plan, | |
| 11 | + render_apply_plan, | |
| 12 | + revert_all_auto_synth, | |
| 13 | +) | |
| 3 | 14 | from dlm.synth.errors import ( |
| 4 | 15 | InvalidTeacherSpecError, |
| 5 | 16 | SynthError, |
@@ -16,6 +27,13 @@ from dlm.synth.filter import ( | ||
| 16 | 27 | filter_synth_plan, |
| 17 | 28 | render_filter_report, |
| 18 | 29 | ) |
| 30 | +from dlm.synth.pending import ( | |
| 31 | + PendingSynthPlan, | |
| 32 | + PendingSynthPlanError, | |
| 33 | + clear_pending_plan, | |
| 34 | + load_pending_plan, | |
| 35 | + save_pending_plan, | |
| 36 | +) | |
| 19 | 37 | from dlm.synth.prompts import ( |
| 20 | 38 | DEFAULT_PROMPT_TEMPLATES, |
| 21 | 39 | PromptParserKind, |
@@ -55,11 +73,18 @@ __all__ = [ | ||
| 55 | 73 | "HfTeacher", |
| 56 | 74 | "InvalidTeacherSpecError", |
| 57 | 75 | "OpenAiTeacher", |
| 76 | + "PendingSynthPlan", | |
| 77 | + "PendingSynthPlanError", | |
| 78 | + "PlannedSynthAddition", | |
| 58 | 79 | "PromptParserKind", |
| 59 | 80 | "PlannedSynthInstruction", |
| 60 | 81 | "SelfTeacher", |
| 82 | + "SkippedSynthAddition", | |
| 61 | 83 | "SkippedSynthSection", |
| 62 | 84 | "SynthError", |
| 85 | + "SynthApplyPlan", | |
| 86 | + "SynthApplySkipReason", | |
| 87 | + "SynthApplySummary", | |
| 63 | 88 | "SynthFilterKind", |
| 64 | 89 | "SynthFilterReport", |
| 65 | 90 | "SynthFilterSkipReason", |
@@ -75,11 +100,18 @@ __all__ = [ | ||
| 75 | 100 | "TeacherRef", |
| 76 | 101 | "TeacherUnavailableError", |
| 77 | 102 | "VllmServerTeacher", |
| 103 | + "apply_plan", | |
| 78 | 104 | "build_synth_plan", |
| 105 | + "build_apply_plan", | |
| 79 | 106 | "build_teacher", |
| 107 | + "clear_pending_plan", | |
| 80 | 108 | "filter_synth_plan", |
| 81 | 109 | "get_prompt_template", |
| 110 | + "load_pending_plan", | |
| 82 | 111 | "parse_teacher_ref", |
| 83 | 112 | "render_filter_report", |
| 113 | + "render_apply_plan", | |
| 84 | 114 | "render_synth_plan", |
| 115 | + "revert_all_auto_synth", | |
| 116 | + "save_pending_plan", | |
| 85 | 117 | ] |
src/dlm/synth/apply.pyadded@@ -0,0 +1,163 @@ | ||
| 1 | +"""Apply/revert staged auto-synth instruction sections.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import dataclasses | |
| 6 | +from dataclasses import dataclass | |
| 7 | +from enum import StrEnum | |
| 8 | +from pathlib import Path | |
| 9 | + | |
| 10 | +from dlm.doc.parser import ParsedDlm | |
| 11 | +from dlm.doc.sections import Section, SectionType | |
| 12 | +from dlm.doc.serializer import serialize | |
| 13 | +from dlm.io.atomic import write_text as atomic_write_text | |
| 14 | + | |
| 15 | + | |
| 16 | +class SynthApplySkipReason(StrEnum): | |
| 17 | + """Why a staged synth section did not make it into the plan.""" | |
| 18 | + | |
| 19 | + ALREADY_PRESENT = "already_present" | |
| 20 | + NOT_INSTRUCTION = "not_instruction" | |
| 21 | + NOT_AUTO_SYNTH = "not_auto_synth" | |
| 22 | + | |
| 23 | + | |
| 24 | +@dataclass(frozen=True) | |
| 25 | +class PlannedSynthAddition: | |
| 26 | + """One section that survived plan-time validation and dedupe.""" | |
| 27 | + | |
| 28 | + section: Section | |
| 29 | + | |
| 30 | + | |
| 31 | +@dataclass(frozen=True) | |
| 32 | +class SkippedSynthAddition: | |
| 33 | + """One input section that did not make it into the plan.""" | |
| 34 | + | |
| 35 | + section: Section | |
| 36 | + reason: SynthApplySkipReason | |
| 37 | + detail: str = "" | |
| 38 | + | |
| 39 | + | |
| 40 | +@dataclass(frozen=True) | |
| 41 | +class SynthApplyPlan: | |
| 42 | + """What the applier would do if executed.""" | |
| 43 | + | |
| 44 | + additions: tuple[PlannedSynthAddition, ...] | |
| 45 | + skipped: tuple[SkippedSynthAddition, ...] | |
| 46 | + | |
| 47 | + | |
| 48 | +@dataclass(frozen=True) | |
| 49 | +class SynthApplySummary: | |
| 50 | + """Outcome of applying or reverting auto-synth instruction sections.""" | |
| 51 | + | |
| 52 | + target: Path | |
| 53 | + added: int | |
| 54 | + skipped: int | |
| 55 | + added_section_ids: tuple[str, ...] | |
| 56 | + | |
| 57 | + | |
| 58 | +def build_apply_plan(parsed: ParsedDlm, sections: list[Section]) -> SynthApplyPlan: | |
| 59 | + """Validate + dedupe staged synth sections against `parsed`.""" | |
| 60 | + existing = {section.section_id for section in parsed.sections} | |
| 61 | + additions: list[PlannedSynthAddition] = [] | |
| 62 | + skipped: list[SkippedSynthAddition] = [] | |
| 63 | + | |
| 64 | + for section in sections: | |
| 65 | + if section.type is not SectionType.INSTRUCTION: | |
| 66 | + skipped.append( | |
| 67 | + SkippedSynthAddition( | |
| 68 | + section=section, | |
| 69 | + reason=SynthApplySkipReason.NOT_INSTRUCTION, | |
| 70 | + detail="only instruction sections can be applied", | |
| 71 | + ) | |
| 72 | + ) | |
| 73 | + continue | |
| 74 | + if not section.auto_synth: | |
| 75 | + skipped.append( | |
| 76 | + SkippedSynthAddition( | |
| 77 | + section=section, | |
| 78 | + reason=SynthApplySkipReason.NOT_AUTO_SYNTH, | |
| 79 | + detail="section is not marked auto_synth=true", | |
| 80 | + ) | |
| 81 | + ) | |
| 82 | + continue | |
| 83 | + if section.section_id in existing: | |
| 84 | + skipped.append( | |
| 85 | + SkippedSynthAddition( | |
| 86 | + section=section, | |
| 87 | + reason=SynthApplySkipReason.ALREADY_PRESENT, | |
| 88 | + detail=f"section_id {section.section_id} already in document", | |
| 89 | + ) | |
| 90 | + ) | |
| 91 | + continue | |
| 92 | + additions.append(PlannedSynthAddition(section=section)) | |
| 93 | + existing.add(section.section_id) | |
| 94 | + | |
| 95 | + return SynthApplyPlan(additions=tuple(additions), skipped=tuple(skipped)) | |
| 96 | + | |
| 97 | + | |
| 98 | +def render_apply_plan(plan: SynthApplyPlan) -> str: | |
| 99 | + """Plain-text form for dry-run output and tests.""" | |
| 100 | + lines = [ | |
| 101 | + f"synth apply plan: {len(plan.additions)} add, {len(plan.skipped)} skip", | |
| 102 | + "", | |
| 103 | + ] | |
| 104 | + if plan.additions: | |
| 105 | + lines.append("=== additions ===") | |
| 106 | + for add in plan.additions: | |
| 107 | + lines.append("") | |
| 108 | + lines.append( | |
| 109 | + "+ ::instruction:: " | |
| 110 | + "[section_id=" | |
| 111 | + f"{add.section.section_id} teacher={add.section.synth_teacher} " | |
| 112 | + f"strategy={add.section.synth_strategy} source={add.section.source_section_id}]" | |
| 113 | + ) | |
| 114 | + if plan.skipped: | |
| 115 | + lines.append("") | |
| 116 | + lines.append("=== skipped ===") | |
| 117 | + for skip in plan.skipped: | |
| 118 | + lines.append(f"- {skip.section.section_id}: {skip.reason.value} ({skip.detail})") | |
| 119 | + return "\n".join(lines) | |
| 120 | + | |
| 121 | + | |
| 122 | +def apply_plan( | |
| 123 | + parsed: ParsedDlm, | |
| 124 | + plan: SynthApplyPlan, | |
| 125 | + *, | |
| 126 | + target: Path, | |
| 127 | +) -> SynthApplySummary: | |
| 128 | + """Append plan additions to `parsed.sections` and atomically write them.""" | |
| 129 | + new_sections = tuple(parsed.sections) + tuple(add.section for add in plan.additions) | |
| 130 | + updated = dataclasses.replace(parsed, sections=new_sections) | |
| 131 | + atomic_write_text(target, serialize(updated)) | |
| 132 | + return SynthApplySummary( | |
| 133 | + target=target, | |
| 134 | + added=len(plan.additions), | |
| 135 | + skipped=len(plan.skipped), | |
| 136 | + added_section_ids=tuple(add.section.section_id for add in plan.additions), | |
| 137 | + ) | |
| 138 | + | |
| 139 | + | |
| 140 | +def revert_all_auto_synth( | |
| 141 | + parsed: ParsedDlm, | |
| 142 | + *, | |
| 143 | + target: Path, | |
| 144 | +) -> SynthApplySummary: | |
| 145 | + """Strip every auto-synth instruction section and atomically rewrite `target`.""" | |
| 146 | + survivors = tuple( | |
| 147 | + section | |
| 148 | + for section in parsed.sections | |
| 149 | + if not (section.type is SectionType.INSTRUCTION and section.auto_synth) | |
| 150 | + ) | |
| 151 | + removed_ids = tuple( | |
| 152 | + section.section_id | |
| 153 | + for section in parsed.sections | |
| 154 | + if section.type is SectionType.INSTRUCTION and section.auto_synth | |
| 155 | + ) | |
| 156 | + updated = dataclasses.replace(parsed, sections=survivors) | |
| 157 | + atomic_write_text(target, serialize(updated)) | |
| 158 | + return SynthApplySummary( | |
| 159 | + target=target, | |
| 160 | + added=0, | |
| 161 | + skipped=0, | |
| 162 | + added_section_ids=removed_ids, | |
| 163 | + ) | |
src/dlm/synth/pending.pyadded@@ -0,0 +1,202 @@ | ||
| 1 | +"""Persist staged auto-synth instruction sections between CLI steps.""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import json | |
| 6 | +from dataclasses import dataclass | |
| 7 | +from datetime import UTC, datetime | |
| 8 | +from pathlib import Path | |
| 9 | +from typing import TYPE_CHECKING, Any | |
| 10 | + | |
| 11 | +from dlm.doc.sections import Section, SectionType | |
| 12 | +from dlm.io.atomic import write_text as atomic_write_text | |
| 13 | +from dlm.synth.errors import SynthError | |
| 14 | + | |
| 15 | +if TYPE_CHECKING: | |
| 16 | + from collections.abc import Sequence | |
| 17 | + | |
| 18 | + from dlm.store.paths import StorePath | |
| 19 | + | |
| 20 | + | |
| 21 | +class PendingSynthPlanError(SynthError): | |
| 22 | + """Raised when the staged synth plan cannot be read or validated.""" | |
| 23 | + | |
| 24 | + | |
| 25 | +@dataclass(frozen=True) | |
| 26 | +class PendingSynthPlan: | |
| 27 | + """One staged synth plan for a store.""" | |
| 28 | + | |
| 29 | + source_path: Path | |
| 30 | + created_at: str | |
| 31 | + sections: tuple[Section, ...] | |
| 32 | + | |
| 33 | + | |
| 34 | +def pending_plan_path(store: StorePath) -> Path: | |
| 35 | + """Path to the staged synth payload for `store`.""" | |
| 36 | + return store.root / "synth" / "pending.json" | |
| 37 | + | |
| 38 | + | |
| 39 | +def save_pending_plan( | |
| 40 | + store: StorePath, | |
| 41 | + *, | |
| 42 | + source_path: Path, | |
| 43 | + sections: Sequence[Section], | |
| 44 | +) -> PendingSynthPlan: | |
| 45 | + """Persist `sections` as the staged synth plan for `store`.""" | |
| 46 | + plan = PendingSynthPlan( | |
| 47 | + source_path=source_path.resolve(), | |
| 48 | + created_at=_utcnow(), | |
| 49 | + sections=tuple(sections), | |
| 50 | + ) | |
| 51 | + path = pending_plan_path(store) | |
| 52 | + path.parent.mkdir(parents=True, exist_ok=True) | |
| 53 | + payload = { | |
| 54 | + "schema_version": 1, | |
| 55 | + "source_path": str(plan.source_path), | |
| 56 | + "created_at": plan.created_at, | |
| 57 | + "sections": [_section_to_payload(section) for section in plan.sections], | |
| 58 | + } | |
| 59 | + atomic_write_text(path, json.dumps(payload, indent=2, sort_keys=True) + "\n") | |
| 60 | + return plan | |
| 61 | + | |
| 62 | + | |
| 63 | +def load_pending_plan(store: StorePath) -> PendingSynthPlan | None: | |
| 64 | + """Return the staged synth plan for `store`, or None when absent.""" | |
| 65 | + path = pending_plan_path(store) | |
| 66 | + if not path.exists(): | |
| 67 | + return None | |
| 68 | + try: | |
| 69 | + raw = json.loads(path.read_text(encoding="utf-8")) | |
| 70 | + except OSError as exc: | |
| 71 | + raise PendingSynthPlanError(f"could not read staged synth plan: {exc}") from exc | |
| 72 | + except json.JSONDecodeError as exc: | |
| 73 | + raise PendingSynthPlanError(f"staged synth plan is not valid JSON: {exc}") from exc | |
| 74 | + | |
| 75 | + if not isinstance(raw, dict): | |
| 76 | + raise PendingSynthPlanError("staged synth plan must be a JSON object") | |
| 77 | + if raw.get("schema_version") != 1: | |
| 78 | + raise PendingSynthPlanError( | |
| 79 | + f"unsupported staged synth plan schema_version={raw.get('schema_version')!r}" | |
| 80 | + ) | |
| 81 | + | |
| 82 | + source_path = raw.get("source_path") | |
| 83 | + created_at = raw.get("created_at") | |
| 84 | + sections_raw = raw.get("sections") | |
| 85 | + if not isinstance(source_path, str) or not source_path: | |
| 86 | + raise PendingSynthPlanError("staged synth plan is missing source_path") | |
| 87 | + if not isinstance(created_at, str) or not created_at: | |
| 88 | + raise PendingSynthPlanError("staged synth plan is missing created_at") | |
| 89 | + if not isinstance(sections_raw, list): | |
| 90 | + raise PendingSynthPlanError("staged synth plan is missing sections") | |
| 91 | + | |
| 92 | + sections: list[Section] = [] | |
| 93 | + for idx, entry in enumerate(sections_raw): | |
| 94 | + try: | |
| 95 | + sections.append(_section_from_payload(entry)) | |
| 96 | + except (TypeError, ValueError, KeyError) as exc: | |
| 97 | + raise PendingSynthPlanError(f"invalid section payload at index {idx}: {exc}") from exc | |
| 98 | + | |
| 99 | + return PendingSynthPlan( | |
| 100 | + source_path=Path(source_path), | |
| 101 | + created_at=created_at, | |
| 102 | + sections=tuple(sections), | |
| 103 | + ) | |
| 104 | + | |
| 105 | + | |
| 106 | +def clear_pending_plan(store: StorePath) -> bool: | |
| 107 | + """Delete the staged synth plan for `store`. Returns True iff it existed.""" | |
| 108 | + path = pending_plan_path(store) | |
| 109 | + if not path.exists(): | |
| 110 | + return False | |
| 111 | + path.unlink() | |
| 112 | + return True | |
| 113 | + | |
| 114 | + | |
| 115 | +def _utcnow() -> str: | |
| 116 | + return datetime.now(UTC).replace(microsecond=0).isoformat().replace("+00:00", "Z") | |
| 117 | + | |
| 118 | + | |
| 119 | +def _section_to_payload(section: Section) -> dict[str, Any]: | |
| 120 | + return { | |
| 121 | + "type": section.type.value, | |
| 122 | + "content": section.content, | |
| 123 | + "start_line": section.start_line, | |
| 124 | + "adapter": section.adapter, | |
| 125 | + "tags": dict(section.tags), | |
| 126 | + "auto_harvest": section.auto_harvest, | |
| 127 | + "harvest_source": section.harvest_source, | |
| 128 | + "auto_mined": section.auto_mined, | |
| 129 | + "judge_name": section.judge_name, | |
| 130 | + "judge_score_chosen": section.judge_score_chosen, | |
| 131 | + "judge_score_rejected": section.judge_score_rejected, | |
| 132 | + "mined_at": section.mined_at, | |
| 133 | + "mined_run_id": section.mined_run_id, | |
| 134 | + "auto_synth": section.auto_synth, | |
| 135 | + "synth_teacher": section.synth_teacher, | |
| 136 | + "synth_strategy": section.synth_strategy, | |
| 137 | + "synth_at": section.synth_at, | |
| 138 | + "source_section_id": section.source_section_id, | |
| 139 | + "media_path": section.media_path, | |
| 140 | + "media_alt": section.media_alt, | |
| 141 | + "media_blob_sha": section.media_blob_sha, | |
| 142 | + "media_transcript": section.media_transcript, | |
| 143 | + } | |
| 144 | + | |
| 145 | + | |
| 146 | +def _section_from_payload(raw: object) -> Section: | |
| 147 | + if not isinstance(raw, dict): | |
| 148 | + raise TypeError(f"expected object, got {type(raw).__name__}") | |
| 149 | + section_type = SectionType(str(raw["type"])) | |
| 150 | + tags = raw.get("tags", {}) | |
| 151 | + if not isinstance(tags, dict): | |
| 152 | + raise TypeError("tags must be an object") | |
| 153 | + if not all(isinstance(k, str) and isinstance(v, str) for k, v in tags.items()): | |
| 154 | + raise TypeError("tags keys and values must be strings") | |
| 155 | + return Section( | |
| 156 | + type=section_type, | |
| 157 | + content=str(raw["content"]), | |
| 158 | + start_line=int(raw.get("start_line", 0)), | |
| 159 | + adapter=_optional_str(raw.get("adapter")), | |
| 160 | + tags=dict(tags), | |
| 161 | + auto_harvest=bool(raw.get("auto_harvest", False)), | |
| 162 | + harvest_source=_optional_str(raw.get("harvest_source")), | |
| 163 | + auto_mined=bool(raw.get("auto_mined", False)), | |
| 164 | + judge_name=_optional_str(raw.get("judge_name")), | |
| 165 | + judge_score_chosen=_optional_float(raw.get("judge_score_chosen")), | |
| 166 | + judge_score_rejected=_optional_float(raw.get("judge_score_rejected")), | |
| 167 | + mined_at=_optional_str(raw.get("mined_at")), | |
| 168 | + mined_run_id=_optional_int(raw.get("mined_run_id")), | |
| 169 | + auto_synth=bool(raw.get("auto_synth", False)), | |
| 170 | + synth_teacher=_optional_str(raw.get("synth_teacher")), | |
| 171 | + synth_strategy=_optional_str(raw.get("synth_strategy")), | |
| 172 | + synth_at=_optional_str(raw.get("synth_at")), | |
| 173 | + source_section_id=_optional_str(raw.get("source_section_id")), | |
| 174 | + media_path=_optional_str(raw.get("media_path")), | |
| 175 | + media_alt=_optional_str(raw.get("media_alt")), | |
| 176 | + media_blob_sha=_optional_str(raw.get("media_blob_sha")), | |
| 177 | + media_transcript=_optional_str(raw.get("media_transcript")), | |
| 178 | + ) | |
| 179 | + | |
| 180 | + | |
| 181 | +def _optional_str(value: object) -> str | None: | |
| 182 | + if value is None: | |
| 183 | + return None | |
| 184 | + if not isinstance(value, str): | |
| 185 | + raise TypeError(f"expected string or null, got {type(value).__name__}") | |
| 186 | + return value | |
| 187 | + | |
| 188 | + | |
| 189 | +def _optional_float(value: object) -> float | None: | |
| 190 | + if value is None: | |
| 191 | + return None | |
| 192 | + if isinstance(value, bool) or not isinstance(value, int | float): | |
| 193 | + raise TypeError(f"expected float or null, got {type(value).__name__}") | |
| 194 | + return float(value) | |
| 195 | + | |
| 196 | + | |
| 197 | +def _optional_int(value: object) -> int | None: | |
| 198 | + if value is None: | |
| 199 | + return None | |
| 200 | + if isinstance(value, bool) or not isinstance(value, int): | |
| 201 | + raise TypeError(f"expected int or null, got {type(value).__name__}") | |
| 202 | + return value | |
tests/integration/cli/test_reference_doc_parity.pymodified@@ -86,3 +86,24 @@ def test_reference_doc_covers_preference_surface() -> None: | ||
| 86 | 86 | assert "dlm preference apply <path>" in section |
| 87 | 87 | assert "dlm preference revert <path>" in section |
| 88 | 88 | assert "dlm preference list <path>" in section |
| 89 | + | |
| 90 | + | |
| 91 | +def test_reference_doc_covers_synth_surface() -> None: | |
| 92 | + section = _section("synth") | |
| 93 | + help_text = _normalized_help("synth", "instructions") | |
| 94 | + | |
| 95 | + for flag in ( | |
| 96 | + "--teacher", | |
| 97 | + "--per-section", | |
| 98 | + "--strategy", | |
| 99 | + "--filter", | |
| 100 | + "--threshold", | |
| 101 | + "--apply", | |
| 102 | + "--dry-run", | |
| 103 | + ): | |
| 104 | + assert flag in help_text | |
| 105 | + assert flag in section | |
| 106 | + | |
| 107 | + assert "dlm synth preferences <path>" in section | |
| 108 | + assert "dlm synth revert <path>" in section | |
| 109 | + assert "dlm synth list <path>" in section | |
tests/unit/cli/test_synth_cmd.pyadded@@ -0,0 +1,372 @@ | ||
| 1 | +"""CLI tests for `dlm synth` (Sprint 43).""" | |
| 2 | + | |
| 3 | +from __future__ import annotations | |
| 4 | + | |
| 5 | +import re | |
| 6 | +from collections import deque | |
| 7 | +from datetime import datetime | |
| 8 | +from pathlib import Path | |
| 9 | + | |
| 10 | +import pytest | |
| 11 | +from typer.testing import CliRunner | |
| 12 | + | |
| 13 | +from dlm.base_models import BaseModelSpec | |
| 14 | +from dlm.cli.app import app | |
| 15 | +from dlm.doc.parser import parse_file | |
| 16 | +from dlm.doc.sections import SectionType | |
| 17 | +from dlm.preference.judge import PairScore | |
| 18 | +from dlm.preference.pending import load_pending_plan as load_pending_preference_plan | |
| 19 | +from dlm.store.manifest import Manifest, TrainingRunSummary, save_manifest | |
| 20 | +from dlm.store.paths import for_dlm | |
| 21 | +from dlm.synth.pending import load_pending_plan | |
| 22 | + | |
| 23 | +_ANSI_RE = re.compile(r"\x1b\[[0-9;?]*[ -/]*[@-~]") | |
| 24 | +_DLM_ID = "01KPQ9X1000000000000000000" | |
| 25 | +_REV = "0123456789abcdef0123456789abcdef01234567" | |
| 26 | + | |
| 27 | + | |
| 28 | +def _normalized_output(result: object) -> str: | |
| 29 | + text = getattr(result, "output", "") + getattr(result, "stderr", "") | |
| 30 | + return " ".join(_ANSI_RE.sub("", text).split()) | |
| 31 | + | |
| 32 | + | |
| 33 | +def _write_synth_doc(path: Path) -> None: | |
| 34 | + path.write_text( | |
| 35 | + "---\n" | |
| 36 | + f"dlm_id: {_DLM_ID}\n" | |
| 37 | + "dlm_version: 15\n" | |
| 38 | + "base_model: smollm2-135m\n" | |
| 39 | + "---\n" | |
| 40 | + "DGEMM multiplies two dense matrices and optionally accumulates the result.\n", | |
| 41 | + encoding="utf-8", | |
| 42 | + ) | |
| 43 | + | |
| 44 | + | |
| 45 | +def _write_preference_doc(path: Path) -> None: | |
| 46 | + path.write_text( | |
| 47 | + "---\n" | |
| 48 | + f"dlm_id: {_DLM_ID}\n" | |
| 49 | + "dlm_version: 15\n" | |
| 50 | + "base_model: smollm2-135m\n" | |
| 51 | + "---\n" | |
| 52 | + "::instruction::\n" | |
| 53 | + "### Q\n" | |
| 54 | + "What is DGEMM?\n" | |
| 55 | + "### A\n" | |
| 56 | + "A matrix multiply.\n", | |
| 57 | + encoding="utf-8", | |
| 58 | + ) | |
| 59 | + | |
| 60 | + | |
| 61 | +def _write_manifest(home: Path, doc: Path, *, run_id: int = 7) -> None: | |
| 62 | + store = for_dlm(_DLM_ID, home=home) | |
| 63 | + store.ensure_layout() | |
| 64 | + save_manifest( | |
| 65 | + store.manifest, | |
| 66 | + Manifest( | |
| 67 | + dlm_id=_DLM_ID, | |
| 68 | + base_model="smollm2-135m", | |
| 69 | + base_model_revision=_REV, | |
| 70 | + source_path=doc.resolve(), | |
| 71 | + training_runs=[ | |
| 72 | + TrainingRunSummary( | |
| 73 | + run_id=run_id, | |
| 74 | + started_at=datetime(2026, 4, 24, 12, 0, 0), | |
| 75 | + ended_at=datetime(2026, 4, 24, 12, 1, 0), | |
| 76 | + adapter_version=1, | |
| 77 | + seed=123, | |
| 78 | + steps=12, | |
| 79 | + ) | |
| 80 | + ], | |
| 81 | + ), | |
| 82 | + ) | |
| 83 | + | |
| 84 | + | |
| 85 | +def _spec() -> BaseModelSpec: | |
| 86 | + return BaseModelSpec.model_validate( | |
| 87 | + { | |
| 88 | + "key": "smollm2-135m", | |
| 89 | + "hf_id": "HuggingFaceTB/SmolLM2-135M-Instruct", | |
| 90 | + "revision": _REV, | |
| 91 | + "architecture": "LlamaForCausalLM", | |
| 92 | + "params": 135_000_000, | |
| 93 | + "target_modules": ["q_proj", "v_proj"], | |
| 94 | + "template": "chatml", | |
| 95 | + "gguf_arch": "llama", | |
| 96 | + "tokenizer_pre": "default", | |
| 97 | + "license_spdx": "Apache-2.0", | |
| 98 | + "license_url": None, | |
| 99 | + "requires_acceptance": False, | |
| 100 | + "redistributable": True, | |
| 101 | + "size_gb_fp16": 0.3, | |
| 102 | + "context_length": 4096, | |
| 103 | + "recommended_seq_len": 2048, | |
| 104 | + } | |
| 105 | + ) | |
| 106 | + | |
| 107 | + | |
| 108 | +class _FakeTeacher: | |
| 109 | + def __init__(self, name: str, payload: str) -> None: | |
| 110 | + self.name = name | |
| 111 | + self._payload = payload | |
| 112 | + | |
| 113 | + def generate(self, *_args: object, **_kwargs: object) -> str: | |
| 114 | + return self._payload | |
| 115 | + | |
| 116 | + | |
| 117 | +class _FakeJudge: | |
| 118 | + name = "sway:preference_judge" | |
| 119 | + suggested_threshold = 0.1 | |
| 120 | + | |
| 121 | + def score_pair(self, prompt: str, candidate_a: str, candidate_b: str) -> PairScore: | |
| 122 | + _ = prompt, candidate_a, candidate_b | |
| 123 | + return PairScore(score_a=0.8, score_b=0.2) | |
| 124 | + | |
| 125 | + | |
| 126 | +class _FakeBackend: | |
| 127 | + def __init__(self, responses: dict[str, list[str]]) -> None: | |
| 128 | + self._responses = {prompt: deque(items) for prompt, items in responses.items()} | |
| 129 | + | |
| 130 | + def load(self, spec: object, store: object, *, adapter_name: str | None = None) -> None: | |
| 131 | + _ = spec, store, adapter_name | |
| 132 | + | |
| 133 | + def generate(self, prompt: str, **_kwargs: object) -> str: | |
| 134 | + return self._responses[prompt].popleft() | |
| 135 | + | |
| 136 | + def unload(self) -> None: | |
| 137 | + return None | |
| 138 | + | |
| 139 | + | |
| 140 | +def _patch_synth_runtime(monkeypatch: pytest.MonkeyPatch) -> None: | |
| 141 | + payloads = { | |
| 142 | + "self": ('[{"question":"What does DGEMM do?","answer":"It multiplies dense matrices."}]'), | |
| 143 | + "hf:stub/model": ( | |
| 144 | + '[{"question":"When would you call DGEMM?","answer":"When you need a BLAS matrix multiplication."}]' | |
| 145 | + ), | |
| 146 | + } | |
| 147 | + | |
| 148 | + def _build_teacher(raw: str, **_kwargs: object) -> _FakeTeacher: | |
| 149 | + payload = payloads.get(raw, payloads["self"]) | |
| 150 | + return _FakeTeacher(raw, payload) | |
| 151 | + | |
| 152 | + monkeypatch.setattr("dlm.synth.build_teacher", _build_teacher) | |
| 153 | + monkeypatch.setattr("dlm.preference.build_judge", lambda *args, **kwargs: _FakeJudge()) | |
| 154 | + | |
| 155 | + | |
| 156 | +def _patch_preference_alias_runtime(monkeypatch: pytest.MonkeyPatch) -> None: | |
| 157 | + monkeypatch.setattr("dlm.base_models.resolve", lambda *args, **kwargs: _spec()) | |
| 158 | + monkeypatch.setattr( | |
| 159 | + "dlm.hardware.doctor", | |
| 160 | + lambda: type("R", (), {"capabilities": object()})(), | |
| 161 | + ) | |
| 162 | + monkeypatch.setattr( | |
| 163 | + "dlm.inference.backends.select_backend", | |
| 164 | + lambda *args, **kwargs: "pytorch", | |
| 165 | + ) | |
| 166 | + monkeypatch.setattr( | |
| 167 | + "dlm.inference.backends.build_backend", | |
| 168 | + lambda *args, **kwargs: _FakeBackend({"What is DGEMM?": ["bad answer", "good answer"]}), | |
| 169 | + ) | |
| 170 | + monkeypatch.setattr("dlm.preference.build_judge", lambda *args, **kwargs: _FakeJudge()) | |
| 171 | + | |
| 172 | + | |
| 173 | +class TestSynthCmd: | |
| 174 | + def test_instructions_stage_pending_plan_by_default( | |
| 175 | + self, | |
| 176 | + tmp_path: Path, | |
| 177 | + monkeypatch: pytest.MonkeyPatch, | |
| 178 | + ) -> None: | |
| 179 | + home = tmp_path / "home" | |
| 180 | + doc = tmp_path / "doc.dlm" | |
| 181 | + _write_synth_doc(doc) | |
| 182 | + _patch_synth_runtime(monkeypatch) | |
| 183 | + | |
| 184 | + runner = CliRunner() | |
| 185 | + result = runner.invoke( | |
| 186 | + app, | |
| 187 | + ["--home", str(home), "synth", "instructions", str(doc), "--per-section", "1"], | |
| 188 | + ) | |
| 189 | + | |
| 190 | + assert result.exit_code == 0, result.output | |
| 191 | + normalized = _normalized_output(result) | |
| 192 | + assert "synth plan: 1 add, 0 skip" in normalized | |
| 193 | + assert "synth filter: generated 1, dedup 1, judge passed 1, threshold 1" in normalized | |
| 194 | + assert "staged 1 auto-synth instruction section" in normalized | |
| 195 | + | |
| 196 | + pending = load_pending_plan(for_dlm(_DLM_ID, home=home)) | |
| 197 | + assert pending is not None | |
| 198 | + assert len(pending.sections) == 1 | |
| 199 | + assert pending.sections[0].auto_synth is True | |
| 200 | + assert pending.sections[0].synth_teacher == "self" | |
| 201 | + assert pending.sections[0].synth_strategy == "extraction" | |
| 202 | + | |
| 203 | + def test_apply_writes_auto_synth_sections_and_clears_pending( | |
| 204 | + self, | |
| 205 | + tmp_path: Path, | |
| 206 | + monkeypatch: pytest.MonkeyPatch, | |
| 207 | + ) -> None: | |
| 208 | + home = tmp_path / "home" | |
| 209 | + doc = tmp_path / "doc.dlm" | |
| 210 | + _write_synth_doc(doc) | |
| 211 | + _patch_synth_runtime(monkeypatch) | |
| 212 | + | |
| 213 | + runner = CliRunner() | |
| 214 | + result = runner.invoke( | |
| 215 | + app, | |
| 216 | + [ | |
| 217 | + "--home", | |
| 218 | + str(home), | |
| 219 | + "synth", | |
| 220 | + "instructions", | |
| 221 | + str(doc), | |
| 222 | + "--per-section", | |
| 223 | + "1", | |
| 224 | + "--apply", | |
| 225 | + ], | |
| 226 | + ) | |
| 227 | + | |
| 228 | + assert result.exit_code == 0, result.output | |
| 229 | + normalized = _normalized_output(result) | |
| 230 | + assert "synth apply plan: 1 add, 0 skip" in normalized | |
| 231 | + assert "wrote 1 section(s)" in normalized | |
| 232 | + assert load_pending_plan(for_dlm(_DLM_ID, home=home)) is None | |
| 233 | + | |
| 234 | + parsed = parse_file(doc) | |
| 235 | + assert any( | |
| 236 | + section.type is SectionType.INSTRUCTION and section.auto_synth | |
| 237 | + for section in parsed.sections | |
| 238 | + ) | |
| 239 | + | |
| 240 | + def test_revert_strips_auto_synth_sections( | |
| 241 | + self, | |
| 242 | + tmp_path: Path, | |
| 243 | + monkeypatch: pytest.MonkeyPatch, | |
| 244 | + ) -> None: | |
| 245 | + home = tmp_path / "home" | |
| 246 | + doc = tmp_path / "doc.dlm" | |
| 247 | + _write_synth_doc(doc) | |
| 248 | + _patch_synth_runtime(monkeypatch) | |
| 249 | + | |
| 250 | + runner = CliRunner() | |
| 251 | + apply_result = runner.invoke( | |
| 252 | + app, | |
| 253 | + [ | |
| 254 | + "--home", | |
| 255 | + str(home), | |
| 256 | + "synth", | |
| 257 | + "instructions", | |
| 258 | + str(doc), | |
| 259 | + "--per-section", | |
| 260 | + "1", | |
| 261 | + "--apply", | |
| 262 | + ], | |
| 263 | + ) | |
| 264 | + assert apply_result.exit_code == 0, apply_result.output | |
| 265 | + | |
| 266 | + revert_result = runner.invoke( | |
| 267 | + app, | |
| 268 | + ["--home", str(home), "synth", "revert", str(doc)], | |
| 269 | + ) | |
| 270 | + | |
| 271 | + assert revert_result.exit_code == 0, revert_result.output | |
| 272 | + assert "stripped 1 auto-synth instruction section" in _normalized_output(revert_result) | |
| 273 | + | |
| 274 | + parsed = parse_file(doc) | |
| 275 | + assert not any( | |
| 276 | + section.type is SectionType.INSTRUCTION and section.auto_synth | |
| 277 | + for section in parsed.sections | |
| 278 | + ) | |
| 279 | + | |
| 280 | + def test_list_shows_counts_for_applied_and_pending_sections( | |
| 281 | + self, | |
| 282 | + tmp_path: Path, | |
| 283 | + monkeypatch: pytest.MonkeyPatch, | |
| 284 | + ) -> None: | |
| 285 | + home = tmp_path / "home" | |
| 286 | + doc = tmp_path / "doc.dlm" | |
| 287 | + _write_synth_doc(doc) | |
| 288 | + _patch_synth_runtime(monkeypatch) | |
| 289 | + | |
| 290 | + runner = CliRunner() | |
| 291 | + apply_result = runner.invoke( | |
| 292 | + app, | |
| 293 | + [ | |
| 294 | + "--home", | |
| 295 | + str(home), | |
| 296 | + "synth", | |
| 297 | + "instructions", | |
| 298 | + str(doc), | |
| 299 | + "--per-section", | |
| 300 | + "1", | |
| 301 | + "--strategy", | |
| 302 | + "extraction", | |
| 303 | + "--apply", | |
| 304 | + ], | |
| 305 | + ) | |
| 306 | + assert apply_result.exit_code == 0, apply_result.output | |
| 307 | + | |
| 308 | + stage_result = runner.invoke( | |
| 309 | + app, | |
| 310 | + [ | |
| 311 | + "--home", | |
| 312 | + str(home), | |
| 313 | + "synth", | |
| 314 | + "instructions", | |
| 315 | + str(doc), | |
| 316 | + "--teacher", | |
| 317 | + "hf:stub/model", | |
| 318 | + "--per-section", | |
| 319 | + "1", | |
| 320 | + "--strategy", | |
| 321 | + "expansion", | |
| 322 | + ], | |
| 323 | + ) | |
| 324 | + assert stage_result.exit_code == 0, stage_result.output | |
| 325 | + | |
| 326 | + list_result = runner.invoke( | |
| 327 | + app, | |
| 328 | + ["--home", str(home), "synth", "list", str(doc)], | |
| 329 | + ) | |
| 330 | + | |
| 331 | + assert list_result.exit_code == 0, list_result.output | |
| 332 | + normalized = _normalized_output(list_result) | |
| 333 | + source_id = parse_file(doc).sections[0].section_id | |
| 334 | + assert "applied auto-synth: 1" in normalized | |
| 335 | + assert "staged pending: 1" in normalized | |
| 336 | + assert "self: 1" in normalized | |
| 337 | + assert "hf:stub/model: 1" in normalized | |
| 338 | + assert "extraction: 1" in normalized | |
| 339 | + assert "expansion: 1" in normalized | |
| 340 | + assert f"{source_id}: 1" in normalized | |
| 341 | + | |
| 342 | + def test_preferences_alias_routes_through_preference_mine( | |
| 343 | + self, | |
| 344 | + tmp_path: Path, | |
| 345 | + monkeypatch: pytest.MonkeyPatch, | |
| 346 | + ) -> None: | |
| 347 | + home = tmp_path / "home" | |
| 348 | + doc = tmp_path / "doc.dlm" | |
| 349 | + _write_preference_doc(doc) | |
| 350 | + _write_manifest(home, doc) | |
| 351 | + _patch_preference_alias_runtime(monkeypatch) | |
| 352 | + | |
| 353 | + runner = CliRunner() | |
| 354 | + result = runner.invoke( | |
| 355 | + app, | |
| 356 | + [ | |
| 357 | + "--home", | |
| 358 | + str(home), | |
| 359 | + "synth", | |
| 360 | + "preferences", | |
| 361 | + str(doc), | |
| 362 | + "--samples", | |
| 363 | + "2", | |
| 364 | + ], | |
| 365 | + ) | |
| 366 | + | |
| 367 | + assert result.exit_code == 0, result.output | |
| 368 | + assert "preference mine plan: 1 add, 0 skip" in _normalized_output(result) | |
| 369 | + pending = load_pending_preference_plan(for_dlm(_DLM_ID, home=home)) | |
| 370 | + assert pending is not None | |
| 371 | + assert len(pending.sections) == 1 | |
| 372 | + assert pending.sections[0].auto_mined is True | |