tenseleyflow/documentlanguagemodel / 1f790be

Browse files

Sweep sprint-N / audit-N references out of src/dlm

Audit 12 M12.3: 22+ historical sprint IDs in module docstrings and one user-visible CLI help string. Each rewritten to describe the code rather than the work-tracking artifact that birthed it.
Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
1f790beb7a1751b0d59c372df9acb84bbe0c2074
Parents
71ebaed
Tree
11179f4

23 changed files

StatusFile+-
M src/dlm/base_models/downloader.py 3 3
M src/dlm/base_models/registry.py 2 2
M src/dlm/base_models/schema.py 2 2
M src/dlm/cli/commands.py 1 2
M src/dlm/doc/migrations/v12.py 5 5
M src/dlm/doc/migrations/v13.py 4 4
M src/dlm/doc/migrations/v14.py 6 6
M src/dlm/doc/parser.py 3 3
M src/dlm/doc/schema.py 1 1
M src/dlm/doc/sections.py 3 3
M src/dlm/export/manifest.py 1 1
M src/dlm/export/sway_json.py 2 2
M src/dlm/export/targets/base.py 3 4
M src/dlm/export/targets/ollama.py 3 3
M src/dlm/export/vendoring.py 1 1
M src/dlm/modality/__init__.py 1 2
M src/dlm/preference/__init__.py 3 3
M src/dlm/preference/errors.py 1 1
M src/dlm/synth/__init__.py 1 1
M src/dlm/synth/errors.py 1 1
M src/dlm/synth/prompts.py 1 1
M src/dlm/synth/run.py 1 1
M src/dlm/synth/teachers.py 1 1
src/dlm/base_models/downloader.pymodified
@@ -46,9 +46,9 @@ def download_spec(
46
     """Fetch (or locate) the snapshot for `spec` and return a pinned reference.
46
     """Fetch (or locate) the snapshot for `spec` and return a pinned reference.
47
 
47
 
48
     `cache_dir` overrides `HF_HOME`. `local_dir` copies the snapshot
48
     `cache_dir` overrides `HF_HOME`. `local_dir` copies the snapshot
49
-    into a specific path (non-symlinked) — used by sprint 04's store
49
+    into a specific path (non-symlinked) — used by the per-`.dlm`
50
-    when we want a per-`.dlm` cache. `local_files_only=True` refuses to
50
+    store cache. `local_files_only=True` refuses to hit the network
51
-    hit the network (mirrors `HF_HUB_OFFLINE`).
51
+    (mirrors `HF_HUB_OFFLINE`).
52
     """
52
     """
53
     from huggingface_hub import snapshot_download
53
     from huggingface_hub import snapshot_download
54
     from huggingface_hub.errors import (
54
     from huggingface_hub.errors import (
src/dlm/base_models/registry.pymodified
@@ -440,8 +440,8 @@ _ENTRIES: tuple[BaseModelSpec, ...] = (
440
     # Mistral Small 3.1 24B Instruct — Apache-2.0 multimodal base with
440
     # Mistral Small 3.1 24B Instruct — Apache-2.0 multimodal base with
441
     # native vision support and 128k context.
441
     # native vision support and 128k context.
442
     #
442
     #
443
-    # The Sprint 40 draft treated this as text-only; the live HF
443
+    # An earlier draft treated this as text-only; the live HF config
444
-    # config is `Mistral3ForConditionalGeneration` with both text and
444
+    # is `Mistral3ForConditionalGeneration` with both text and
445
     # vision towers, so we register it as vision-language. The current
445
     # vision towers, so we register it as vision-language. The current
446
     # processor config pins `[IMG]` as the image placeholder and a
446
     # processor config pins `[IMG]` as the image placeholder and a
447
     # longest edge of 1540 px. DLM's current `VlPreprocessorPlan`
447
     # longest edge of 1540 px. DLM's current `VlPreprocessorPlan`
src/dlm/base_models/schema.pymodified
@@ -149,8 +149,8 @@ class BaseModelSpec(BaseModel):
149
     provenance_url: str | None = None
149
     provenance_url: str | None = None
150
     provenance_match_text: str | None = None
150
     provenance_match_text: str | None = None
151
 
151
 
152
-    # Modality + multi-modal preprocessing (schema v10 + v11, plus
152
+    # Modality + multi-modal preprocessing (schema v10 + v11, plus the
153
-    # Sprint 40's additive `text-moe` discriminator).
153
+    # additive `text-moe` discriminator).
154
     # Text-family bases leave `modality in {"text", "text-moe"}`
154
     # Text-family bases leave `modality in {"text", "text-moe"}`
155
     # with both plans None;
155
     # with both plans None;
156
     # `modality="vision-language"` requires a `vl_preprocessor_plan`
156
     # `modality="vision-language"` requires a `vl_preprocessor_plan`
src/dlm/cli/commands.pymodified
@@ -1675,8 +1675,7 @@ def export_cmd(
1675
             help=(
1675
             help=(
1676
                 "After the export, also write a ready-to-run sway.yaml "
1676
                 "After the export, also write a ready-to-run sway.yaml "
1677
                 "(via dlm-sway autogen) into the export dir. Requires the "
1677
                 "(via dlm-sway autogen) into the export dir. Requires the "
1678
-                "[sway] extra: pip install 'dlm[sway]'. Closes the "
1678
+                "[sway] extra: pip install 'dlm[sway]'."
1679
-                "training-then-evaluating gap from sway Sprint 26 X1."
1680
             ),
1679
             ),
1681
         ),
1680
         ),
1682
     ] = False,
1681
     ] = False,
src/dlm/doc/migrations/v12.pymodified
@@ -1,10 +1,10 @@
1
 """v12 → v13 migrator: identity bump for the 2026 registry refresh.
1
 """v12 → v13 migrator: identity bump for the 2026 registry refresh.
2
 
2
 
3
-Sprint 40 adds additive base-model registry metadata
3
+The base-model registry gained additive metadata (`reasoning_tuned`,
4
-(`reasoning_tuned`, `context_length_effective`, `text-moe`) without
4
+`context_length_effective`, `text-moe`) without changing `.dlm`
5
-changing `.dlm` frontmatter shape. The doc schema still advances to
5
+frontmatter shape. The doc schema still advances to v13 so
6
-v13 so migration-aware tooling can distinguish post-refresh docs from
6
+migration-aware tooling can distinguish post-refresh docs from older
7
-older ones. Existing v12 documents therefore migrate as pure identity.
7
+ones. Existing v12 documents therefore migrate as pure identity.
8
 """
8
 """
9
 
9
 
10
 from __future__ import annotations
10
 from __future__ import annotations
src/dlm/doc/migrations/v13.pymodified
@@ -1,9 +1,9 @@
1
 """v13 → v14 migrator: additive auto-mined preference metadata markers.
1
 """v13 → v14 migrator: additive auto-mined preference metadata markers.
2
 
2
 
3
-Sprint 42 adds additive metadata on ``::preference::`` sections:
3
+Adds additive metadata on ``::preference::`` sections — ``auto_mined``
4
-``auto_mined`` plus judge provenance / scores / timestamps. These live
4
+plus judge provenance / scores / timestamps. These live in body-side
5
-in body-side magic-comment markers rather than frontmatter, so existing
5
+magic-comment markers rather than frontmatter, so existing v13
6
-v13 documents migrate as pure identity. The schema still advances so
6
+documents migrate as pure identity. The schema still advances so
7
 migration-aware tooling can distinguish docs written before the mining
7
 migration-aware tooling can distinguish docs written before the mining
8
 loop existed.
8
 loop existed.
9
 """
9
 """
src/dlm/doc/migrations/v14.pymodified
@@ -1,11 +1,11 @@
1
 """v14 → v15 migrator: identity bump for auto-synth instruction metadata.
1
 """v14 → v15 migrator: identity bump for auto-synth instruction metadata.
2
 
2
 
3
-Sprint 43 adds additive metadata on ``::instruction::`` sections:
3
+Adds additive metadata on ``::instruction::`` sections — ``auto_synth``
4
-``auto_synth`` plus teacher / strategy / timestamp / source provenance.
4
+plus teacher / strategy / timestamp / source provenance. These live in
5
-These live in body-side magic-comment markers rather than frontmatter,
5
+body-side magic-comment markers rather than frontmatter, so existing
6
-so existing v14 documents migrate as pure identity. The schema still
6
+v14 documents migrate as pure identity. The schema still advances so
7
-advances so migration-aware tooling can distinguish docs written before
7
+migration-aware tooling can distinguish docs written before the synth
8
-the synth loop existed.
8
+loop existed.
9
 """
9
 """
10
 
10
 
11
 from __future__ import annotations
11
 from __future__ import annotations
src/dlm/doc/parser.pymodified
@@ -6,7 +6,7 @@ Flow:
6
               → split frontmatter and body on the two `---` delimiters
6
               → split frontmatter and body on the two `---` delimiters
7
               → YAML-parse the frontmatter
7
               → YAML-parse the frontmatter
8
               → Pydantic validate → DlmFrontmatter
8
               → Pydantic validate → DlmFrontmatter
9
-              → check dlm_version (sprint 12b owns migration)
9
+              → check dlm_version (delegates to the migration registry)
10
               → tokenize body into Section list (code-fence aware)
10
               → tokenize body into Section list (code-fence aware)
11
               → return ParsedDlm(frozen)
11
               → return ParsedDlm(frozen)
12
 
12
 
@@ -509,7 +509,7 @@ def _resolve_fence_type(name: str, line: int, path: Path | None) -> tuple[Sectio
509
 def _parse_auto_mined_marker(
509
 def _parse_auto_mined_marker(
510
     attr_blob: str, *, path: Path | None, line: int
510
     attr_blob: str, *, path: Path | None, line: int
511
 ) -> tuple[str, float, float, str, int]:
511
 ) -> tuple[str, float, float, str, int]:
512
-    """Parse the Sprint 42 auto-mined metadata marker on preference sections."""
512
+    """Parse the auto-mined metadata marker on preference sections."""
513
     if not _MARKER_ATTR_BLOB_RE.fullmatch(attr_blob):
513
     if not _MARKER_ATTR_BLOB_RE.fullmatch(attr_blob):
514
         raise FenceError(
514
         raise FenceError(
515
             "invalid dlm-auto-mined marker syntax",
515
             "invalid dlm-auto-mined marker syntax",
@@ -606,7 +606,7 @@ def _parse_auto_mined_marker(
606
 def _parse_auto_synth_marker(
606
 def _parse_auto_synth_marker(
607
     attr_blob: str, *, path: Path | None, line: int
607
     attr_blob: str, *, path: Path | None, line: int
608
 ) -> tuple[str, str, str, str]:
608
 ) -> tuple[str, str, str, str]:
609
-    """Parse the Sprint 43 auto-synth metadata marker on instruction sections."""
609
+    """Parse the auto-synth metadata marker on instruction sections."""
610
     if not _MARKER_ATTR_BLOB_RE.fullmatch(attr_blob):
610
     if not _MARKER_ATTR_BLOB_RE.fullmatch(attr_blob):
611
         raise FenceError(
611
         raise FenceError(
612
             "invalid dlm-auto-synth marker syntax",
612
             "invalid dlm-auto-synth marker syntax",
src/dlm/doc/schema.pymodified
@@ -50,7 +50,7 @@ training signal). v12 adds the additive `training.audio` block
50
 (currently one field, `auto_resample: bool`) — opt-in automatic
50
 (currently one field, `auto_resample: bool`) — opt-in automatic
51
 resampling when audio files don't match the base's pinned rate.
51
 resampling when audio files don't match the base's pinned rate.
52
 Default False preserves the "refuse on SR mismatch" contract. v13 is
52
 Default False preserves the "refuse on SR mismatch" contract. v13 is
53
-an identity bump paired with Sprint 40's base-model registry refresh:
53
+an identity bump paired with the 2026 base-model registry refresh:
54
 the document frontmatter shape is unchanged, but the migration chain
54
 the document frontmatter shape is unchanged, but the migration chain
55
 still advances so tooling can distinguish post-refresh docs from older
55
 still advances so tooling can distinguish post-refresh docs from older
56
 ones. v14 adds additive auto-mined preference metadata on
56
 ones. v14 adds additive auto-mined preference metadata on
src/dlm/doc/sections.pymodified
@@ -93,15 +93,15 @@ class Section:
93
     neither field participates in `section_id`.
93
     neither field participates in `section_id`.
94
 
94
 
95
     `auto_mined` marks a `::preference::` section as synthesized by
95
     `auto_mined` marks a `::preference::` section as synthesized by
96
-    Sprint 42's preference-mining loop rather than hand-authored. The
96
+    the preference-mining loop rather than hand-authored. The
97
     accompanying judge metadata (`judge_name`, `judge_score_chosen`,
97
     accompanying judge metadata (`judge_name`, `judge_score_chosen`,
98
     `judge_score_rejected`, `mined_at`, `mined_run_id`) captures
98
     `judge_score_rejected`, `mined_at`, `mined_run_id`) captures
99
     provenance for review, metrics, and revert flows. Like harvest
99
     provenance for review, metrics, and revert flows. Like harvest
100
     metadata, these fields do not participate in `section_id`.
100
     metadata, these fields do not participate in `section_id`.
101
 
101
 
102
     `auto_synth` marks an `::instruction::` section as synthesized by
102
     `auto_synth` marks an `::instruction::` section as synthesized by
103
-    Sprint 43's instruction-generation loop rather than hand-authored.
103
+    the instruction-generation loop rather than hand-authored. The
104
-    The accompanying metadata (`synth_teacher`, `synth_strategy`,
104
+    accompanying metadata (`synth_teacher`, `synth_strategy`,
105
     `synth_at`, `source_section_id`) captures provenance for review,
105
     `synth_at`, `source_section_id`) captures provenance for review,
106
     metrics, and revert flows. Like the other provenance flags, these
106
     metrics, and revert flows. Like the other provenance flags, these
107
     fields do not participate in `section_id`.
107
     fields do not participate in `section_id`.
src/dlm/export/manifest.pymodified
@@ -2,7 +2,7 @@
2
 
2
 
3
 One file per `exports/<quant>/` directory. Records:
3
 One file per `exports/<quant>/` directory. Records:
4
 
4
 
5
-- export target (`ollama` today; more runtimes in Sprint 41)
5
+- export target (`ollama`, `vllm`, `llama-server`, `mlx-serve`)
6
 - quant level
6
 - quant level
7
 - checksums of emitted GGUF artifacts
7
 - checksums of emitted GGUF artifacts
8
 - pinned llama.cpp tag (so a future upstream bump can detect drift)
8
 - pinned llama.cpp tag (so a future upstream bump can detect drift)
src/dlm/export/sway_json.pymodified
@@ -1,7 +1,7 @@
1
 """Cross-repo bridge: emit a ready-to-run ``sway.yaml`` next to a dlm export.
1
 """Cross-repo bridge: emit a ready-to-run ``sway.yaml`` next to a dlm export.
2
 
2
 
3
-Sprint 26 X1. Closes Audit 03's "users who train via dlm then evaluate
3
+Closes the gap where users who train via dlm then evaluate via sway
4
-via sway have to run two separate commands" gap. With ``dlm export
4
+had to run two separate commands. With ``dlm export
5
 --emit-sway-json``, the user runs::
5
 --emit-sway-json``, the user runs::
6
 
6
 
7
     dlm export myadapter.dlm --target ollama --emit-sway-json
7
     dlm export myadapter.dlm --target ollama --emit-sway-json
src/dlm/export/targets/base.pymodified
@@ -1,9 +1,8 @@
1
 """Export-target protocol shared by runtime-specific export surfaces.
1
 """Export-target protocol shared by runtime-specific export surfaces.
2
 
2
 
3
-Sprint 41 starts by making "target" a first-class concept even though
3
+"target" is a first-class concept so each registered runtime (Ollama,
4
-the only registered runtime today is Ollama. Later targets (vLLM,
4
+vLLM, llama-server, MLX-serve) plugs into the same shape instead of
5
-llama-server, MLX-serve) plug into the same shape instead of growing
5
+growing ad-hoc CLI branches.
6
-ad-hoc CLI branches.
7
 """
6
 """
8
 
7
 
9
 from __future__ import annotations
8
 from __future__ import annotations
src/dlm/export/targets/ollama.pymodified
@@ -1,9 +1,9 @@
1
 """Ollama target wrapper.
1
 """Ollama target wrapper.
2
 
2
 
3
 The text GGUF export path already owns Modelfile emission, registration,
3
 The text GGUF export path already owns Modelfile emission, registration,
4
-and smoke testing. Sprint 41's first substrate slice wraps that
4
+and smoke testing. This module wraps that behavior in an
5
-behavior in an `ExportTarget` implementation so later runtimes can slot
5
+`ExportTarget` implementation so other runtimes can slot into a shared
6
-into a shared registry without rewriting the existing Ollama flow.
6
+registry without rewriting the existing Ollama flow.
7
 """
7
 """
8
 
8
 
9
 from __future__ import annotations
9
 from __future__ import annotations
src/dlm/export/vendoring.pymodified
@@ -12,7 +12,7 @@ Three primary artifacts:
12
 - `llama-quantize` — compiled binary (built by cmake). Converts an
12
 - `llama-quantize` — compiled binary (built by cmake). Converts an
13
   fp16 GGUF into one of the quant levels.
13
   fp16 GGUF into one of the quant levels.
14
 - `llama-server` — compiled binary for the OpenAI-compatible HTTP
14
 - `llama-server` — compiled binary for the OpenAI-compatible HTTP
15
-  server target added in Sprint 41.
15
+  server target.
16
 
16
 
17
 Lookup order for the llama.cpp source tree (convert scripts):
17
 Lookup order for the llama.cpp source tree (convert scripts):
18
 
18
 
src/dlm/modality/__init__.pymodified
@@ -13,8 +13,7 @@ Callers that previously wrote ``if spec.modality == "vision-language"``
13
 now read ``modality_for(spec).accepts_images`` (or one of the other
13
 now read ``modality_for(spec).accepts_images`` (or one of the other
14
 predicate flags) or call a dispatch method directly. A pregate
14
 predicate flags) or call a dispatch method directly. A pregate
15
 grep-gate refuses new scatter — see ``scripts/pregate.sh``.
15
 grep-gate refuses new scatter — see ``scripts/pregate.sh``.
16
-Text-family tags (`"text"` and Sprint 40's `"text-moe"`) share
16
+Text-family tags (`"text"` and `"text-moe"`) share the same dispatcher.
17
-the same dispatcher.
18
 """
17
 """
19
 
18
 
20
 from __future__ import annotations
19
 from __future__ import annotations
src/dlm/preference/__init__.pymodified
@@ -1,8 +1,8 @@
1
 """Preference-mining substrate types.
1
 """Preference-mining substrate types.
2
 
2
 
3
-Sprint 42 builds the mining/apply/train loop on top of these typed
3
+Pure-value surface only — typed contracts the mining/apply/train loop
4
-contracts. This module only exposes the pure-value surface; the
4
+builds on. The side-effecting mine/apply runtime lives in sibling
5
-side-effecting mine/apply runtime lands in follow-up slices.
5
+modules.
6
 """
6
 """
7
 
7
 
8
 from dlm.preference.apply import (
8
 from dlm.preference.apply import (
src/dlm/preference/errors.pymodified
@@ -1,4 +1,4 @@
1
-"""Errors raised by Sprint 42 preference-mining infrastructure."""
1
+"""Errors raised by the preference-mining infrastructure."""
2
 
2
 
3
 from __future__ import annotations
3
 from __future__ import annotations
4
 
4
 
src/dlm/synth/__init__.pymodified
@@ -1,4 +1,4 @@
1
-"""Sprint 43 synthetic-instruction generation substrate."""
1
+"""Synthetic-instruction generation substrate."""
2
 
2
 
3
 from dlm.synth.apply import (
3
 from dlm.synth.apply import (
4
     PlannedSynthAddition,
4
     PlannedSynthAddition,
src/dlm/synth/errors.pymodified
@@ -1,4 +1,4 @@
1
-"""Typed errors for Sprint 43 synthetic-instruction generation."""
1
+"""Typed errors for synthetic-instruction generation."""
2
 
2
 
3
 from __future__ import annotations
3
 from __future__ import annotations
4
 
4
 
src/dlm/synth/prompts.pymodified
@@ -1,4 +1,4 @@
1
-"""Prompt templates for Sprint 43 synthetic instruction generation."""
1
+"""Prompt templates for synthetic instruction generation."""
2
 
2
 
3
 from __future__ import annotations
3
 from __future__ import annotations
4
 
4
 
src/dlm/synth/run.pymodified
@@ -1,4 +1,4 @@
1
-"""Pure dry-run orchestration for Sprint 43 synthetic instruction generation."""
1
+"""Pure dry-run orchestration for synthetic instruction generation."""
2
 
2
 
3
 from __future__ import annotations
3
 from __future__ import annotations
4
 
4
 
src/dlm/synth/teachers.pymodified
@@ -1,4 +1,4 @@
1
-"""Teacher selector parsing and runtime wrappers for Sprint 43."""
1
+"""Teacher selector parsing and runtime wrappers for synthetic-data generation."""
2
 
2
 
3
 import importlib
3
 import importlib
4
 import json
4
 import json