`1f790be`

Sweep sprint-N / audit-N references out of src/dlm

Audit 12 M12.3: 22+ historical sprint IDs in module docstrings and one user-visible CLI help string. Each rewritten to describe the code rather than the work-tracking artifact that birthed it.

Authored by mfwolffe <wolffemf@dukes.jmu.edu> 2 weeks ago

SHA: 1f790beb7a1751b0d59c372df9acb84bbe0c2074
Parents: 71ebaed
Tree: 11179f4

23 changed files

Status	File	+	-
M	`src/dlm/base_models/downloader.py`	3	3
M	`src/dlm/base_models/registry.py`	2	2
M	`src/dlm/base_models/schema.py`	2	2
M	`src/dlm/cli/commands.py`	1	2
M	`src/dlm/doc/migrations/v12.py`	5	5
M	`src/dlm/doc/migrations/v13.py`	4	4
M	`src/dlm/doc/migrations/v14.py`	6	6
M	`src/dlm/doc/parser.py`	3	3
M	`src/dlm/doc/schema.py`	1	1
M	`src/dlm/doc/sections.py`	3	3
M	`src/dlm/export/manifest.py`	1	1
M	`src/dlm/export/sway_json.py`	2	2
M	`src/dlm/export/targets/base.py`	3	4
M	`src/dlm/export/targets/ollama.py`	3	3
M	`src/dlm/export/vendoring.py`	1	1
M	`src/dlm/modality/__init__.py`	1	2
M	`src/dlm/preference/__init__.py`	3	3
M	`src/dlm/preference/errors.py`	1	1
M	`src/dlm/synth/__init__.py`	1	1
M	`src/dlm/synth/errors.py`	1	1
M	`src/dlm/synth/prompts.py`	1	1
M	`src/dlm/synth/run.py`	1	1
M	`src/dlm/synth/teachers.py`	1	1

src/dlm/base_models/downloader.pymodified

      """Fetch (or locate) the snapshot for `spec` and return a pinned reference.
      `cache_dir` overrides `HF_HOME`. `local_dir` copies the snapshot
 -    into a specific path (non-symlinked) — used by sprint 04's store
 -    when we want a per-`.dlm` cache. `local_files_only=True` refuses to
 -    hit the network (mirrors `HF_HUB_OFFLINE`).
 +    into a specific path (non-symlinked) — used by the per-`.dlm`
 +    store cache. `local_files_only=True` refuses to hit the network
 +    (mirrors `HF_HUB_OFFLINE`).
      """
      from huggingface_hub import snapshot_download
      from huggingface_hub.errors import (

src/dlm/base_models/registry.pymodified

      # Mistral Small 3.1 24B Instruct — Apache-2.0 multimodal base with
      # native vision support and 128k context.
+     #
 -    # The Sprint 40 draft treated this as text-only; the live HF
 -    # config is `Mistral3ForConditionalGeneration` with both text and
 +    # An earlier draft treated this as text-only; the live HF config
 +    # is `Mistral3ForConditionalGeneration` with both text and
      # vision towers, so we register it as vision-language. The current
      # processor config pins `[IMG]` as the image placeholder and a
      # longest edge of 1540 px. DLM's current `VlPreprocessorPlan`

src/dlm/base_models/schema.pymodified

      provenance_url: str | None = None
      provenance_match_text: str | None = None
 -    # Modality + multi-modal preprocessing (schema v10 + v11, plus
 -    # Sprint 40's additive `text-moe` discriminator).
 +    # Modality + multi-modal preprocessing (schema v10 + v11, plus the
 +    # additive `text-moe` discriminator).
      # Text-family bases leave `modality in {"text", "text-moe"}`
      # with both plans None;
      # `modality="vision-language"` requires a `vl_preprocessor_plan`

src/dlm/cli/commands.pymodified

              help=(
                  "After the export, also write a ready-to-run sway.yaml "
                  "(via dlm-sway autogen) into the export dir. Requires the "
 -                "[sway] extra: pip install 'dlm[sway]'. Closes the "
 -                "training-then-evaluating gap from sway Sprint 26 X1."
 +                "[sway] extra: pip install 'dlm[sway]'."
              ),
          ),
      ] = False,

src/dlm/doc/migrations/v12.pymodified

  """v12 → v13 migrator: identity bump for the 2026 registry refresh.
 -Sprint 40 adds additive base-model registry metadata
 -(`reasoning_tuned`, `context_length_effective`, `text-moe`) without
 -changing `.dlm` frontmatter shape. The doc schema still advances to
 -v13 so migration-aware tooling can distinguish post-refresh docs from
 -older ones. Existing v12 documents therefore migrate as pure identity.
 +The base-model registry gained additive metadata (`reasoning_tuned`,
 +`context_length_effective`, `text-moe`) without changing `.dlm`
 +frontmatter shape. The doc schema still advances to v13 so
 +migration-aware tooling can distinguish post-refresh docs from older
 +ones. Existing v12 documents therefore migrate as pure identity.
  """
  from __future__ import annotations

src/dlm/doc/migrations/v13.pymodified

  """v13 → v14 migrator: additive auto-mined preference metadata markers.
 -Sprint 42 adds additive metadata on ``::preference::`` sections:
 -``auto_mined`` plus judge provenance / scores / timestamps. These live
 -in body-side magic-comment markers rather than frontmatter, so existing
 -v13 documents migrate as pure identity. The schema still advances so
 +Adds additive metadata on ``::preference::`` sections — ``auto_mined``
 +plus judge provenance / scores / timestamps. These live in body-side
 +magic-comment markers rather than frontmatter, so existing v13
 +documents migrate as pure identity. The schema still advances so
  migration-aware tooling can distinguish docs written before the mining
  loop existed.
  """

src/dlm/doc/migrations/v14.pymodified

  """v14 → v15 migrator: identity bump for auto-synth instruction metadata.
 -Sprint 43 adds additive metadata on ``::instruction::`` sections:
 -``auto_synth`` plus teacher / strategy / timestamp / source provenance.
 -These live in body-side magic-comment markers rather than frontmatter,
 -so existing v14 documents migrate as pure identity. The schema still
 -advances so migration-aware tooling can distinguish docs written before
 -the synth loop existed.
 +Adds additive metadata on ``::instruction::`` sections — ``auto_synth``
 +plus teacher / strategy / timestamp / source provenance. These live in
 +body-side magic-comment markers rather than frontmatter, so existing
 +v14 documents migrate as pure identity. The schema still advances so
 +migration-aware tooling can distinguish docs written before the synth
 +loop existed.
  """
  from __future__ import annotations

src/dlm/doc/parser.pymodified

                → split frontmatter and body on the two `---` delimiters
                → YAML-parse the frontmatter
                → Pydantic validate → DlmFrontmatter
 -              → check dlm_version (sprint 12b owns migration)
 +              → check dlm_version (delegates to the migration registry)
                → tokenize body into Section list (code-fence aware)
                → return ParsedDlm(frozen)
  def _parse_auto_mined_marker(
      attr_blob: str, *, path: Path | None, line: int
  ) -> tuple[str, float, float, str, int]:
 -    """Parse the Sprint 42 auto-mined metadata marker on preference sections."""
 +    """Parse the auto-mined metadata marker on preference sections."""
      if not _MARKER_ATTR_BLOB_RE.fullmatch(attr_blob):
          raise FenceError(
              "invalid dlm-auto-mined marker syntax",
  def _parse_auto_synth_marker(
      attr_blob: str, *, path: Path | None, line: int
  ) -> tuple[str, str, str, str]:
 -    """Parse the Sprint 43 auto-synth metadata marker on instruction sections."""
 +    """Parse the auto-synth metadata marker on instruction sections."""
      if not _MARKER_ATTR_BLOB_RE.fullmatch(attr_blob):
          raise FenceError(
              "invalid dlm-auto-synth marker syntax",

src/dlm/doc/schema.pymodified

  (currently one field, `auto_resample: bool`) — opt-in automatic
  resampling when audio files don't match the base's pinned rate.
  Default False preserves the "refuse on SR mismatch" contract. v13 is
 -an identity bump paired with Sprint 40's base-model registry refresh:
 +an identity bump paired with the 2026 base-model registry refresh:
  the document frontmatter shape is unchanged, but the migration chain
  still advances so tooling can distinguish post-refresh docs from older
  ones. v14 adds additive auto-mined preference metadata on

src/dlm/doc/sections.pymodified

      neither field participates in `section_id`.
      `auto_mined` marks a `::preference::` section as synthesized by
 -    Sprint 42's preference-mining loop rather than hand-authored. The
 +    the preference-mining loop rather than hand-authored. The
      accompanying judge metadata (`judge_name`, `judge_score_chosen`,
      `judge_score_rejected`, `mined_at`, `mined_run_id`) captures
      provenance for review, metrics, and revert flows. Like harvest
      metadata, these fields do not participate in `section_id`.
      `auto_synth` marks an `::instruction::` section as synthesized by
 -    Sprint 43's instruction-generation loop rather than hand-authored.
 -    The accompanying metadata (`synth_teacher`, `synth_strategy`,
 +    the instruction-generation loop rather than hand-authored. The
 +    accompanying metadata (`synth_teacher`, `synth_strategy`,
      `synth_at`, `source_section_id`) captures provenance for review,
      metrics, and revert flows. Like the other provenance flags, these
      fields do not participate in `section_id`.

src/dlm/export/manifest.pymodified

  One file per `exports/<quant>/` directory. Records:
 -- export target (`ollama` today; more runtimes in Sprint 41)
 +- export target (`ollama`, `vllm`, `llama-server`, `mlx-serve`)
  - quant level
  - checksums of emitted GGUF artifacts
  - pinned llama.cpp tag (so a future upstream bump can detect drift)

src/dlm/export/sway_json.pymodified

  """Cross-repo bridge: emit a ready-to-run ``sway.yaml`` next to a dlm export.
 -Sprint 26 X1. Closes Audit 03's "users who train via dlm then evaluate
 -via sway have to run two separate commands" gap. With ``dlm export
 +Closes the gap where users who train via dlm then evaluate via sway
 +had to run two separate commands. With ``dlm export
  --emit-sway-json``, the user runs::
      dlm export myadapter.dlm --target ollama --emit-sway-json

src/dlm/export/targets/base.pymodified

  """Export-target protocol shared by runtime-specific export surfaces.
 -Sprint 41 starts by making "target" a first-class concept even though
 -the only registered runtime today is Ollama. Later targets (vLLM,
 -llama-server, MLX-serve) plug into the same shape instead of growing
 -ad-hoc CLI branches.
 +"target" is a first-class concept so each registered runtime (Ollama,
 +vLLM, llama-server, MLX-serve) plugs into the same shape instead of
 +growing ad-hoc CLI branches.
  """
  from __future__ import annotations

src/dlm/export/targets/ollama.pymodified

  """Ollama target wrapper.
  The text GGUF export path already owns Modelfile emission, registration,
 -and smoke testing. Sprint 41's first substrate slice wraps that
 -behavior in an `ExportTarget` implementation so later runtimes can slot
 -into a shared registry without rewriting the existing Ollama flow.
 +and smoke testing. This module wraps that behavior in an
 +`ExportTarget` implementation so other runtimes can slot into a shared
 +registry without rewriting the existing Ollama flow.
  """
  from __future__ import annotations

src/dlm/export/vendoring.pymodified

  - `llama-quantize` — compiled binary (built by cmake). Converts an
    fp16 GGUF into one of the quant levels.
  - `llama-server` — compiled binary for the OpenAI-compatible HTTP
 -  server target added in Sprint 41.
 +  server target.
  Lookup order for the llama.cpp source tree (convert scripts):

src/dlm/modality/__init__.pymodified

  now read ``modality_for(spec).accepts_images`` (or one of the other
  predicate flags) or call a dispatch method directly. A pregate
  grep-gate refuses new scatter — see ``scripts/pregate.sh``.
 -Text-family tags (`"text"` and Sprint 40's `"text-moe"`) share
 -the same dispatcher.
 +Text-family tags (`"text"` and `"text-moe"`) share the same dispatcher.
  """
  from __future__ import annotations

src/dlm/preference/__init__.pymodified

  """Preference-mining substrate types.
 -Sprint 42 builds the mining/apply/train loop on top of these typed
 -contracts. This module only exposes the pure-value surface; the
 -side-effecting mine/apply runtime lands in follow-up slices.
 +Pure-value surface only — typed contracts the mining/apply/train loop
 +builds on. The side-effecting mine/apply runtime lives in sibling
 +modules.
  """
  from dlm.preference.apply import (

src/dlm/preference/errors.pymodified

 -"""Errors raised by Sprint 42 preference-mining infrastructure."""
 +"""Errors raised by the preference-mining infrastructure."""
  from __future__ import annotations

src/dlm/synth/__init__.pymodified

 -"""Sprint 43 synthetic-instruction generation substrate."""
 +"""Synthetic-instruction generation substrate."""
  from dlm.synth.apply import (
      PlannedSynthAddition,

src/dlm/synth/errors.pymodified

 -"""Typed errors for Sprint 43 synthetic-instruction generation."""
 +"""Typed errors for synthetic-instruction generation."""
  from __future__ import annotations

src/dlm/synth/prompts.pymodified

 -"""Prompt templates for Sprint 43 synthetic instruction generation."""
 +"""Prompt templates for synthetic instruction generation."""
  from __future__ import annotations

src/dlm/synth/run.pymodified

 -"""Pure dry-run orchestration for Sprint 43 synthetic instruction generation."""
 +"""Pure dry-run orchestration for synthetic instruction generation."""
  from __future__ import annotations

src/dlm/synth/teachers.pymodified

 -"""Teacher selector parsing and runtime wrappers for Sprint 43."""
 +"""Teacher selector parsing and runtime wrappers for synthetic-data generation."""
  import importlib
  import json