tenseleyflow/documentlanguagemodel / 4630a24

Browse files

test(doc): CptConfig + v2→v3 migrator coverage

Authored by espadonne
SHA
4630a2483cc7d73acf4ab27003366bca320e7d6e
Parents
3f2d0f5
Tree
863cadf

2 changed files

StatusFile+-
A tests/unit/doc/test_migration_v2_to_v3.py 63 0
M tests/unit/doc/test_schema.py 51 1
tests/unit/doc/test_migration_v2_to_v3.pyadded
@@ -0,0 +1,63 @@
1
+"""v2 → v3 migrator: additive `training.cpt` block.
2
+
3
+v3 is additive with defaults, so the migrator is pure identity. These
4
+tests lock that shape in: a v2 doc round-trips unchanged, and the
5
+combined output validates under the v3 `DlmFrontmatter` with the
6
+default `CptConfig`.
7
+"""
8
+
9
+from __future__ import annotations
10
+
11
+from typing import Any
12
+
13
+from dlm.doc.migrations.v2 import migrate
14
+from dlm.doc.schema import CptConfig, DlmFrontmatter
15
+
16
+VALID_ULID = "01HZ4X7TGZM3J1A2B3C4D5E6F7"
17
+
18
+
19
+class TestIdentityShape:
20
+    def test_empty_dict_passthrough(self) -> None:
21
+        raw: dict[str, Any] = {}
22
+        out = migrate(raw)
23
+        assert out == raw
24
+        assert out is not raw  # copy, not alias
25
+
26
+    def test_v2_preference_block_preserved(self) -> None:
27
+        raw: dict[str, Any] = {
28
+            "dlm_id": VALID_ULID,
29
+            "base_model": "smollm2-135m",
30
+            "training": {
31
+                "preference": {
32
+                    "method": "orpo",
33
+                    "hyperparams": {"alpha": 0.15},
34
+                },
35
+            },
36
+        }
37
+        out = migrate(raw)
38
+        assert out == raw
39
+
40
+    def test_full_training_block_preserved(self) -> None:
41
+        raw: dict[str, Any] = {
42
+            "training": {
43
+                "adapter": "lora",
44
+                "lora_r": 16,
45
+                "learning_rate": 1e-4,
46
+            },
47
+        }
48
+        out = migrate(raw)
49
+        assert out == raw
50
+
51
+
52
+class TestValidatesAsV3:
53
+    def test_migrated_doc_validates_with_default_cpt(self) -> None:
54
+        raw: dict[str, Any] = {
55
+            "dlm_id": VALID_ULID,
56
+            "base_model": "smollm2-135m",
57
+            "dlm_version": 2,
58
+        }
59
+        out = migrate(raw)
60
+        # Dispatcher stamps dlm_version post-migrate; simulate that.
61
+        out["dlm_version"] = 3
62
+        fm = DlmFrontmatter.model_validate(out)
63
+        assert fm.training.cpt == CptConfig()
tests/unit/doc/test_schema.pymodified
@@ -6,6 +6,7 @@ import pytest
66
 from pydantic import ValidationError
77
 
88
 from dlm.doc.schema import (
9
+    CptConfig,
910
     DlmFrontmatter,
1011
     ExportConfig,
1112
     PreferenceConfig,
@@ -180,6 +181,55 @@ class TestTrainingConfigPreferenceSubfield:
180181
             )
181182
 
182183
 
184
+class TestCptConfig:
185
+    def test_default_instance(self) -> None:
186
+        c = CptConfig()
187
+        assert c.schedule == "auto"
188
+        assert c.embed_warmup_steps == 0
189
+
190
+    def test_frozen_model_rejects_mutation(self) -> None:
191
+        c = CptConfig()
192
+        with pytest.raises(ValidationError):
193
+            c.embed_warmup_steps = 10  # type: ignore[misc]
194
+
195
+    @pytest.mark.parametrize("value", ["auto", "dapt", "sft"])
196
+    def test_schedule_accepts_known_values(self, value: str) -> None:
197
+        CptConfig(schedule=value)  # type: ignore[arg-type]
198
+
199
+    def test_schedule_rejects_unknown(self) -> None:
200
+        with pytest.raises(ValidationError):
201
+            CptConfig(schedule="warmup")  # type: ignore[arg-type]
202
+
203
+    def test_embed_warmup_steps_must_be_non_negative(self) -> None:
204
+        with pytest.raises(ValidationError):
205
+            CptConfig(embed_warmup_steps=-1)
206
+
207
+    def test_extra_fields_forbidden(self) -> None:
208
+        with pytest.raises(ValidationError):
209
+            CptConfig.model_validate({"schedule": "auto", "rubbish": 1})
210
+
211
+
212
+class TestTrainingConfigCptSubfield:
213
+    def test_default_training_has_auto_cpt(self) -> None:
214
+        t = TrainingConfig()
215
+        assert isinstance(t.cpt, CptConfig)
216
+        assert t.cpt.schedule == "auto"
217
+        assert t.cpt.embed_warmup_steps == 0
218
+
219
+    def test_accepts_nested_dict_for_cpt(self) -> None:
220
+        t = TrainingConfig.model_validate(
221
+            {"cpt": {"schedule": "dapt", "embed_warmup_steps": 200}}
222
+        )
223
+        assert t.cpt.schedule == "dapt"
224
+        assert t.cpt.embed_warmup_steps == 200
225
+
226
+    def test_rejects_unknown_field_inside_cpt(self) -> None:
227
+        with pytest.raises(ValidationError):
228
+            TrainingConfig.model_validate(
229
+                {"cpt": {"schedule": "dapt", "rubbish": 1}}
230
+            )
231
+
232
+
183233
 class TestExportConfig:
184234
     def test_default_quant(self) -> None:
185235
         assert ExportConfig().default_quant == "Q4_K_M"
@@ -200,7 +250,7 @@ class TestExportConfig:
200250
 class TestDlmFrontmatter:
201251
     def test_minimal_valid(self) -> None:
202252
         fm = DlmFrontmatter(dlm_id=VALID_ULID, base_model="smollm2-135m")
203
-        assert fm.dlm_version == 2
253
+        assert fm.dlm_version == 3
204254
         assert fm.training == TrainingConfig()
205255
         assert fm.export == ExportConfig()
206256
         assert fm.system_prompt is None