tenseleyflow/documentlanguagemodel / 63979ff

Browse files

test: precision override behavior + v4→v5 migrator coverage

Authored by espadonne
SHA
63979fff407dae04d260045628e7b5185ea4b7d6
Parents
c2bcba4
Tree
aa12303

3 changed files

StatusFile+-
A tests/unit/doc/test_migration_v4_to_v5.py 57 0
M tests/unit/doc/test_schema.py 15 1
M tests/unit/hardware/test_plan.py 42 0
tests/unit/doc/test_migration_v4_to_v5.pyadded
@@ -0,0 +1,57 @@
1
+"""v4 → v5 migrator: additive `training.precision` override (identity)."""
2
+
3
+from __future__ import annotations
4
+
5
+from typing import Any
6
+
7
+from dlm.doc.migrations.v4 import migrate
8
+from dlm.doc.schema import DlmFrontmatter
9
+
10
+VALID_ULID = "01HZ4X7TGZM3J1A2B3C4D5E6F7"
11
+
12
+
13
+class TestIdentity:
14
+    def test_empty_passthrough(self) -> None:
15
+        raw: dict[str, Any] = {}
16
+        out = migrate(raw)
17
+        assert out == raw
18
+        assert out is not raw
19
+
20
+    def test_v4_training_block_preserved(self) -> None:
21
+        raw: dict[str, Any] = {
22
+            "training": {
23
+                "lora_r": 16,
24
+                "adapters": {
25
+                    "knowledge": {"adapter": "lora", "lora_r": 8},
26
+                },
27
+            },
28
+        }
29
+        out = migrate(raw)
30
+        assert out == raw
31
+
32
+
33
+class TestValidatesAsV5:
34
+    def test_migrated_doc_validates_without_precision(self) -> None:
35
+        # A v4 doc with no precision override parses as v5 unchanged;
36
+        # precision defaults to None (let the planner pick).
37
+        raw: dict[str, Any] = {
38
+            "dlm_id": VALID_ULID,
39
+            "base_model": "smollm2-135m",
40
+            "dlm_version": 4,
41
+        }
42
+        out = migrate(raw)
43
+        out["dlm_version"] = 5
44
+        fm = DlmFrontmatter.model_validate(out)
45
+        assert fm.training.precision is None
46
+
47
+    def test_migrated_doc_accepts_precision_override(self) -> None:
48
+        raw: dict[str, Any] = {
49
+            "dlm_id": VALID_ULID,
50
+            "base_model": "smollm2-135m",
51
+            "dlm_version": 4,
52
+            "training": {"precision": "fp16"},
53
+        }
54
+        out = migrate(raw)
55
+        out["dlm_version"] = 5
56
+        fm = DlmFrontmatter.model_validate(out)
57
+        assert fm.training.precision == "fp16"
tests/unit/doc/test_schema.pymodified
@@ -34,6 +34,7 @@ class TestTrainingConfigDefaults:
3434
         assert t.optimizer == "adamw_torch"
3535
         assert t.lr_scheduler == "cosine"
3636
         assert t.warmup_ratio == pytest.approx(0.1)
37
+        assert t.precision is None
3738
         assert t.seed == 42
3839
 
3940
     def test_frozen_model_rejects_mutation(self) -> None:
@@ -72,6 +73,19 @@ class TestTrainingConfigConstraints:
7273
         with pytest.raises(ValidationError):
7374
             TrainingConfig(warmup_ratio=bad)
7475
 
76
+    @pytest.mark.parametrize("value", ["bf16", "fp16", "fp32"])
77
+    def test_precision_accepts_valid_values(self, value: str) -> None:
78
+        t = TrainingConfig(precision=value)  # type: ignore[arg-type]
79
+        assert t.precision == value
80
+
81
+    def test_precision_defaults_to_none(self) -> None:
82
+        assert TrainingConfig().precision is None
83
+
84
+    @pytest.mark.parametrize("bad", ["float32", "FP16", "int8", ""])
85
+    def test_precision_rejects_invalid(self, bad: str) -> None:
86
+        with pytest.raises(ValidationError):
87
+            TrainingConfig(precision=bad)  # type: ignore[arg-type]
88
+
7589
     def test_adapter_literal_rejects_unknown(self) -> None:
7690
         with pytest.raises(ValidationError):
7791
             TrainingConfig(adapter="full")  # type: ignore[arg-type]
@@ -387,7 +401,7 @@ class TestDlmFrontmatterForwardVersion:
387401
 class TestDlmFrontmatter:
388402
     def test_minimal_valid(self) -> None:
389403
         fm = DlmFrontmatter(dlm_id=VALID_ULID, base_model="smollm2-135m")
390
-        assert fm.dlm_version == 4
404
+        assert fm.dlm_version == 5
391405
         assert fm.training == TrainingConfig()
392406
         assert fm.export == ExportConfig()
393407
         assert fm.system_prompt is None
tests/unit/hardware/test_plan.pymodified
@@ -36,6 +36,48 @@ class TestPrecisionPicker:
3636
         plan = resolve(_cfg(), caps, base_params=1_500_000_000, seq_len=2048)
3737
         assert plan.precision == "fp32"
3838
 
39
+    def test_mps_fp16_override_honored(self, caplog: object) -> None:
40
+        # Frontmatter `training.precision: fp16` on MPS wins over the
41
+        # default fp32 pin (see .docs/bugs/01-nan-adapter-on-mps.md —
42
+        # user is opting in to the known NaN risk for memory headroom).
43
+        import logging
44
+
45
+        with force_mps():
46
+            caps = probe()
47
+        with caplog.at_level(logging.WARNING, logger="dlm.hardware.plan"):  # type: ignore[attr-defined]
48
+            plan = resolve(
49
+                _cfg(precision="fp16"), caps, base_params=8_000_000_000, seq_len=2048
50
+            )
51
+        assert plan.precision == "fp16"
52
+        # The caller must see the risk explicitly — silent fp16 on MPS
53
+        # is what caused the original bug.
54
+        messages = " ".join(r.message for r in caplog.records)  # type: ignore[attr-defined]
55
+        assert "fp16 on MPS" in messages
56
+        assert "NaN" in messages
57
+
58
+    def test_mps_bf16_override_silent(self, caplog: object) -> None:
59
+        # bf16 on MPS is user-asserted; no warning, since bf16 doesn't
60
+        # have the fp16 attention-kernel issue.
61
+        import logging
62
+
63
+        with force_mps():
64
+            caps = probe()
65
+        with caplog.at_level(logging.WARNING, logger="dlm.hardware.plan"):  # type: ignore[attr-defined]
66
+            plan = resolve(
67
+                _cfg(precision="bf16"), caps, base_params=1_500_000_000, seq_len=2048
68
+            )
69
+        assert plan.precision == "bf16"
70
+        assert caplog.records == []  # type: ignore[attr-defined]
71
+
72
+    def test_cuda_override_fp32_honored(self) -> None:
73
+        # CUDA default is bf16 (Ampere+) — override to fp32 honored.
74
+        with force_cuda(sm=(8, 0)):
75
+            caps = probe()
76
+        plan = resolve(
77
+            _cfg(precision="fp32"), caps, base_params=1_500_000_000, seq_len=2048
78
+        )
79
+        assert plan.precision == "fp32"
80
+
3981
 
4082
 class TestAttentionPicker:
4183
     def test_cuda_without_flash_falls_back_to_sdpa(self) -> None: