tenseleyflow/documentlanguagemodel / 1bf4b01

Browse files

Use warmup_ratio instead of warmup_steps in DAPT overrides

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
1bf4b0171794897b609de92c7d16f465470769a8
Parents
ec26ce6
Tree
a9bb2c1

2 changed files

StatusFile+-
M src/dlm/train/cpt/runtime.py 1 5
M tests/unit/train/cpt/test_runtime.py 3 3
src/dlm/train/cpt/runtime.pymodified
@@ -90,13 +90,9 @@ def dapt_sft_config_overrides(
9090
     at `learning_rate * min_lr_rate` instead of zero — matching our
9191
     DAPT curve. The warmup-ratio bump to 20% is literature-default for
9292
     domain-adaptive pretraining.
93
-
94
-    We thread that ratio through `warmup_steps` because current
95
-    `TrainingArguments` interprets values `< 1` as ratios and the
96
-    dedicated `warmup_ratio` knob is deprecated.
9793
     """
9894
     return {
9995
         "lr_scheduler_type": "cosine_with_min_lr",
100
-        "warmup_steps": warmup_ratio,
96
+        "warmup_ratio": warmup_ratio,
10197
         "lr_scheduler_kwargs": {"min_lr_rate": floor_ratio},
10298
     }
tests/unit/train/cpt/test_runtime.pymodified
@@ -84,9 +84,9 @@ class TestDaptOverrides:
8484
         ov = dapt_sft_config_overrides()
8585
         assert ov["lr_scheduler_type"] == "cosine_with_min_lr"
8686
 
87
-    def test_warmup_steps_default_20_percent_ratio(self) -> None:
87
+    def test_warmup_ratio_default_20_percent(self) -> None:
8888
         ov = dapt_sft_config_overrides()
89
-        assert ov["warmup_steps"] == pytest.approx(0.2)
89
+        assert ov["warmup_ratio"] == pytest.approx(0.2)
9090
 
9191
     def test_floor_ratio_default(self) -> None:
9292
         ov = dapt_sft_config_overrides()
@@ -98,4 +98,4 @@ class TestDaptOverrides:
9898
 
9999
     def test_custom_warmup_respected(self) -> None:
100100
         ov = dapt_sft_config_overrides(warmup_ratio=0.3)
101
-        assert ov["warmup_steps"] == pytest.approx(0.3)
101
+        assert ov["warmup_ratio"] == pytest.approx(0.3)