tenseleyflow/documentlanguagemodel / 1f790be

Browse files

Sweep sprint-N / audit-N references out of src/dlm

Audit 12 M12.3: 22+ historical sprint IDs in module docstrings and one user-visible CLI help string. Each rewritten to describe the code rather than the work-tracking artifact that birthed it.
Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
1f790beb7a1751b0d59c372df9acb84bbe0c2074
Parents
71ebaed
Tree
11179f4

23 changed files

StatusFile+-
M src/dlm/base_models/downloader.py 3 3
M src/dlm/base_models/registry.py 2 2
M src/dlm/base_models/schema.py 2 2
M src/dlm/cli/commands.py 1 2
M src/dlm/doc/migrations/v12.py 5 5
M src/dlm/doc/migrations/v13.py 4 4
M src/dlm/doc/migrations/v14.py 6 6
M src/dlm/doc/parser.py 3 3
M src/dlm/doc/schema.py 1 1
M src/dlm/doc/sections.py 3 3
M src/dlm/export/manifest.py 1 1
M src/dlm/export/sway_json.py 2 2
M src/dlm/export/targets/base.py 3 4
M src/dlm/export/targets/ollama.py 3 3
M src/dlm/export/vendoring.py 1 1
M src/dlm/modality/__init__.py 1 2
M src/dlm/preference/__init__.py 3 3
M src/dlm/preference/errors.py 1 1
M src/dlm/synth/__init__.py 1 1
M src/dlm/synth/errors.py 1 1
M src/dlm/synth/prompts.py 1 1
M src/dlm/synth/run.py 1 1
M src/dlm/synth/teachers.py 1 1
src/dlm/base_models/downloader.pymodified
@@ -46,9 +46,9 @@ def download_spec(
4646
     """Fetch (or locate) the snapshot for `spec` and return a pinned reference.
4747
 
4848
     `cache_dir` overrides `HF_HOME`. `local_dir` copies the snapshot
49
-    into a specific path (non-symlinked) — used by sprint 04's store
50
-    when we want a per-`.dlm` cache. `local_files_only=True` refuses to
51
-    hit the network (mirrors `HF_HUB_OFFLINE`).
49
+    into a specific path (non-symlinked) — used by the per-`.dlm`
50
+    store cache. `local_files_only=True` refuses to hit the network
51
+    (mirrors `HF_HUB_OFFLINE`).
5252
     """
5353
     from huggingface_hub import snapshot_download
5454
     from huggingface_hub.errors import (
src/dlm/base_models/registry.pymodified
@@ -440,8 +440,8 @@ _ENTRIES: tuple[BaseModelSpec, ...] = (
440440
     # Mistral Small 3.1 24B Instruct — Apache-2.0 multimodal base with
441441
     # native vision support and 128k context.
442442
     #
443
-    # The Sprint 40 draft treated this as text-only; the live HF
444
-    # config is `Mistral3ForConditionalGeneration` with both text and
443
+    # An earlier draft treated this as text-only; the live HF config
444
+    # is `Mistral3ForConditionalGeneration` with both text and
445445
     # vision towers, so we register it as vision-language. The current
446446
     # processor config pins `[IMG]` as the image placeholder and a
447447
     # longest edge of 1540 px. DLM's current `VlPreprocessorPlan`
src/dlm/base_models/schema.pymodified
@@ -149,8 +149,8 @@ class BaseModelSpec(BaseModel):
149149
     provenance_url: str | None = None
150150
     provenance_match_text: str | None = None
151151
 
152
-    # Modality + multi-modal preprocessing (schema v10 + v11, plus
153
-    # Sprint 40's additive `text-moe` discriminator).
152
+    # Modality + multi-modal preprocessing (schema v10 + v11, plus the
153
+    # additive `text-moe` discriminator).
154154
     # Text-family bases leave `modality in {"text", "text-moe"}`
155155
     # with both plans None;
156156
     # `modality="vision-language"` requires a `vl_preprocessor_plan`
src/dlm/cli/commands.pymodified
@@ -1675,8 +1675,7 @@ def export_cmd(
16751675
             help=(
16761676
                 "After the export, also write a ready-to-run sway.yaml "
16771677
                 "(via dlm-sway autogen) into the export dir. Requires the "
1678
-                "[sway] extra: pip install 'dlm[sway]'. Closes the "
1679
-                "training-then-evaluating gap from sway Sprint 26 X1."
1678
+                "[sway] extra: pip install 'dlm[sway]'."
16801679
             ),
16811680
         ),
16821681
     ] = False,
src/dlm/doc/migrations/v12.pymodified
@@ -1,10 +1,10 @@
11
 """v12 → v13 migrator: identity bump for the 2026 registry refresh.
22
 
3
-Sprint 40 adds additive base-model registry metadata
4
-(`reasoning_tuned`, `context_length_effective`, `text-moe`) without
5
-changing `.dlm` frontmatter shape. The doc schema still advances to
6
-v13 so migration-aware tooling can distinguish post-refresh docs from
7
-older ones. Existing v12 documents therefore migrate as pure identity.
3
+The base-model registry gained additive metadata (`reasoning_tuned`,
4
+`context_length_effective`, `text-moe`) without changing `.dlm`
5
+frontmatter shape. The doc schema still advances to v13 so
6
+migration-aware tooling can distinguish post-refresh docs from older
7
+ones. Existing v12 documents therefore migrate as pure identity.
88
 """
99
 
1010
 from __future__ import annotations
src/dlm/doc/migrations/v13.pymodified
@@ -1,9 +1,9 @@
11
 """v13 → v14 migrator: additive auto-mined preference metadata markers.
22
 
3
-Sprint 42 adds additive metadata on ``::preference::`` sections:
4
-``auto_mined`` plus judge provenance / scores / timestamps. These live
5
-in body-side magic-comment markers rather than frontmatter, so existing
6
-v13 documents migrate as pure identity. The schema still advances so
3
+Adds additive metadata on ``::preference::`` sections — ``auto_mined``
4
+plus judge provenance / scores / timestamps. These live in body-side
5
+magic-comment markers rather than frontmatter, so existing v13
6
+documents migrate as pure identity. The schema still advances so
77
 migration-aware tooling can distinguish docs written before the mining
88
 loop existed.
99
 """
src/dlm/doc/migrations/v14.pymodified
@@ -1,11 +1,11 @@
11
 """v14 → v15 migrator: identity bump for auto-synth instruction metadata.
22
 
3
-Sprint 43 adds additive metadata on ``::instruction::`` sections:
4
-``auto_synth`` plus teacher / strategy / timestamp / source provenance.
5
-These live in body-side magic-comment markers rather than frontmatter,
6
-so existing v14 documents migrate as pure identity. The schema still
7
-advances so migration-aware tooling can distinguish docs written before
8
-the synth loop existed.
3
+Adds additive metadata on ``::instruction::`` sections — ``auto_synth``
4
+plus teacher / strategy / timestamp / source provenance. These live in
5
+body-side magic-comment markers rather than frontmatter, so existing
6
+v14 documents migrate as pure identity. The schema still advances so
7
+migration-aware tooling can distinguish docs written before the synth
8
+loop existed.
99
 """
1010
 
1111
 from __future__ import annotations
src/dlm/doc/parser.pymodified
@@ -6,7 +6,7 @@ Flow:
66
               → split frontmatter and body on the two `---` delimiters
77
               → YAML-parse the frontmatter
88
               → Pydantic validate → DlmFrontmatter
9
-              → check dlm_version (sprint 12b owns migration)
9
+              → check dlm_version (delegates to the migration registry)
1010
               → tokenize body into Section list (code-fence aware)
1111
               → return ParsedDlm(frozen)
1212
 
@@ -509,7 +509,7 @@ def _resolve_fence_type(name: str, line: int, path: Path | None) -> tuple[Sectio
509509
 def _parse_auto_mined_marker(
510510
     attr_blob: str, *, path: Path | None, line: int
511511
 ) -> tuple[str, float, float, str, int]:
512
-    """Parse the Sprint 42 auto-mined metadata marker on preference sections."""
512
+    """Parse the auto-mined metadata marker on preference sections."""
513513
     if not _MARKER_ATTR_BLOB_RE.fullmatch(attr_blob):
514514
         raise FenceError(
515515
             "invalid dlm-auto-mined marker syntax",
@@ -606,7 +606,7 @@ def _parse_auto_mined_marker(
606606
 def _parse_auto_synth_marker(
607607
     attr_blob: str, *, path: Path | None, line: int
608608
 ) -> tuple[str, str, str, str]:
609
-    """Parse the Sprint 43 auto-synth metadata marker on instruction sections."""
609
+    """Parse the auto-synth metadata marker on instruction sections."""
610610
     if not _MARKER_ATTR_BLOB_RE.fullmatch(attr_blob):
611611
         raise FenceError(
612612
             "invalid dlm-auto-synth marker syntax",
src/dlm/doc/schema.pymodified
@@ -50,7 +50,7 @@ training signal). v12 adds the additive `training.audio` block
5050
 (currently one field, `auto_resample: bool`) — opt-in automatic
5151
 resampling when audio files don't match the base's pinned rate.
5252
 Default False preserves the "refuse on SR mismatch" contract. v13 is
53
-an identity bump paired with Sprint 40's base-model registry refresh:
53
+an identity bump paired with the 2026 base-model registry refresh:
5454
 the document frontmatter shape is unchanged, but the migration chain
5555
 still advances so tooling can distinguish post-refresh docs from older
5656
 ones. v14 adds additive auto-mined preference metadata on
src/dlm/doc/sections.pymodified
@@ -93,15 +93,15 @@ class Section:
9393
     neither field participates in `section_id`.
9494
 
9595
     `auto_mined` marks a `::preference::` section as synthesized by
96
-    Sprint 42's preference-mining loop rather than hand-authored. The
96
+    the preference-mining loop rather than hand-authored. The
9797
     accompanying judge metadata (`judge_name`, `judge_score_chosen`,
9898
     `judge_score_rejected`, `mined_at`, `mined_run_id`) captures
9999
     provenance for review, metrics, and revert flows. Like harvest
100100
     metadata, these fields do not participate in `section_id`.
101101
 
102102
     `auto_synth` marks an `::instruction::` section as synthesized by
103
-    Sprint 43's instruction-generation loop rather than hand-authored.
104
-    The accompanying metadata (`synth_teacher`, `synth_strategy`,
103
+    the instruction-generation loop rather than hand-authored. The
104
+    accompanying metadata (`synth_teacher`, `synth_strategy`,
105105
     `synth_at`, `source_section_id`) captures provenance for review,
106106
     metrics, and revert flows. Like the other provenance flags, these
107107
     fields do not participate in `section_id`.
src/dlm/export/manifest.pymodified
@@ -2,7 +2,7 @@
22
 
33
 One file per `exports/<quant>/` directory. Records:
44
 
5
-- export target (`ollama` today; more runtimes in Sprint 41)
5
+- export target (`ollama`, `vllm`, `llama-server`, `mlx-serve`)
66
 - quant level
77
 - checksums of emitted GGUF artifacts
88
 - pinned llama.cpp tag (so a future upstream bump can detect drift)
src/dlm/export/sway_json.pymodified
@@ -1,7 +1,7 @@
11
 """Cross-repo bridge: emit a ready-to-run ``sway.yaml`` next to a dlm export.
22
 
3
-Sprint 26 X1. Closes Audit 03's "users who train via dlm then evaluate
4
-via sway have to run two separate commands" gap. With ``dlm export
3
+Closes the gap where users who train via dlm then evaluate via sway
4
+had to run two separate commands. With ``dlm export
55
 --emit-sway-json``, the user runs::
66
 
77
     dlm export myadapter.dlm --target ollama --emit-sway-json
src/dlm/export/targets/base.pymodified
@@ -1,9 +1,8 @@
11
 """Export-target protocol shared by runtime-specific export surfaces.
22
 
3
-Sprint 41 starts by making "target" a first-class concept even though
4
-the only registered runtime today is Ollama. Later targets (vLLM,
5
-llama-server, MLX-serve) plug into the same shape instead of growing
6
-ad-hoc CLI branches.
3
+"target" is a first-class concept so each registered runtime (Ollama,
4
+vLLM, llama-server, MLX-serve) plugs into the same shape instead of
5
+growing ad-hoc CLI branches.
76
 """
87
 
98
 from __future__ import annotations
src/dlm/export/targets/ollama.pymodified
@@ -1,9 +1,9 @@
11
 """Ollama target wrapper.
22
 
33
 The text GGUF export path already owns Modelfile emission, registration,
4
-and smoke testing. Sprint 41's first substrate slice wraps that
5
-behavior in an `ExportTarget` implementation so later runtimes can slot
6
-into a shared registry without rewriting the existing Ollama flow.
4
+and smoke testing. This module wraps that behavior in an
5
+`ExportTarget` implementation so other runtimes can slot into a shared
6
+registry without rewriting the existing Ollama flow.
77
 """
88
 
99
 from __future__ import annotations
src/dlm/export/vendoring.pymodified
@@ -12,7 +12,7 @@ Three primary artifacts:
1212
 - `llama-quantize` — compiled binary (built by cmake). Converts an
1313
   fp16 GGUF into one of the quant levels.
1414
 - `llama-server` — compiled binary for the OpenAI-compatible HTTP
15
-  server target added in Sprint 41.
15
+  server target.
1616
 
1717
 Lookup order for the llama.cpp source tree (convert scripts):
1818
 
src/dlm/modality/__init__.pymodified
@@ -13,8 +13,7 @@ Callers that previously wrote ``if spec.modality == "vision-language"``
1313
 now read ``modality_for(spec).accepts_images`` (or one of the other
1414
 predicate flags) or call a dispatch method directly. A pregate
1515
 grep-gate refuses new scatter — see ``scripts/pregate.sh``.
16
-Text-family tags (`"text"` and Sprint 40's `"text-moe"`) share
17
-the same dispatcher.
16
+Text-family tags (`"text"` and `"text-moe"`) share the same dispatcher.
1817
 """
1918
 
2019
 from __future__ import annotations
src/dlm/preference/__init__.pymodified
@@ -1,8 +1,8 @@
11
 """Preference-mining substrate types.
22
 
3
-Sprint 42 builds the mining/apply/train loop on top of these typed
4
-contracts. This module only exposes the pure-value surface; the
5
-side-effecting mine/apply runtime lands in follow-up slices.
3
+Pure-value surface only — typed contracts the mining/apply/train loop
4
+builds on. The side-effecting mine/apply runtime lives in sibling
5
+modules.
66
 """
77
 
88
 from dlm.preference.apply import (
src/dlm/preference/errors.pymodified
@@ -1,4 +1,4 @@
1
-"""Errors raised by Sprint 42 preference-mining infrastructure."""
1
+"""Errors raised by the preference-mining infrastructure."""
22
 
33
 from __future__ import annotations
44
 
src/dlm/synth/__init__.pymodified
@@ -1,4 +1,4 @@
1
-"""Sprint 43 synthetic-instruction generation substrate."""
1
+"""Synthetic-instruction generation substrate."""
22
 
33
 from dlm.synth.apply import (
44
     PlannedSynthAddition,
src/dlm/synth/errors.pymodified
@@ -1,4 +1,4 @@
1
-"""Typed errors for Sprint 43 synthetic-instruction generation."""
1
+"""Typed errors for synthetic-instruction generation."""
22
 
33
 from __future__ import annotations
44
 
src/dlm/synth/prompts.pymodified
@@ -1,4 +1,4 @@
1
-"""Prompt templates for Sprint 43 synthetic instruction generation."""
1
+"""Prompt templates for synthetic instruction generation."""
22
 
33
 from __future__ import annotations
44
 
src/dlm/synth/run.pymodified
@@ -1,4 +1,4 @@
1
-"""Pure dry-run orchestration for Sprint 43 synthetic instruction generation."""
1
+"""Pure dry-run orchestration for synthetic instruction generation."""
22
 
33
 from __future__ import annotations
44
 
src/dlm/synth/teachers.pymodified
@@ -1,4 +1,4 @@
1
-"""Teacher selector parsing and runtime wrappers for Sprint 43."""
1
+"""Teacher selector parsing and runtime wrappers for synthetic-data generation."""
22
 
33
 import importlib
44
 import json