tenseleyflow/documentlanguagemodel / 71ebaed

Browse files

Resolve dlm-registry aliases in --teacher hf:<key>

Audit 12 M12.6: &#39;hf:smollm2-135m&#39; was passed literally to HF Hub (401), inconsistent with &#39;dlm init --base smollm2-135m&#39; which resolves the alias. Normalize at parse_teacher_ref&#39;s hf branch — canonical HF id wins, literal org/repo passes through unchanged.
Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
71ebaedd02823e3e71ae76f7bf8ce364b658b8f8
Parents
a19a302
Tree
7a57e98

2 changed files

StatusFile+-
M src/dlm/synth/teachers.py 9 0
M tests/unit/synth/test_teachers.py 11 0
src/dlm/synth/teachers.pymodified
@@ -400,6 +400,15 @@ def parse_teacher_ref(raw: str) -> TeacherRef:
400400
         target = spec.removeprefix("hf:").strip()
401401
         if not target:
402402
             raise InvalidTeacherSpecError("hf teacher selector must include a model id")
403
+        # Resolve dlm-registry aliases (e.g. `hf:smollm2-135m`) to their
404
+        # canonical HuggingFace id. Keeps `--teacher hf:<key>` consistent
405
+        # with `dlm init --base <key>`. A literal HF id (`org/repo`)
406
+        # passes through unchanged.
407
+        from dlm.base_models.registry import BASE_MODELS
408
+
409
+        spec_entry = BASE_MODELS.get(target)
410
+        if spec_entry is not None:
411
+            target = spec_entry.hf_id
403412
         return TeacherRef(raw=spec, kind="hf", target=target)
404413
     if spec.startswith("openai:"):
405414
         target = spec.removeprefix("openai:").strip()
tests/unit/synth/test_teachers.pymodified
@@ -72,6 +72,17 @@ class TestTeacherSelectorParsing:
7272
         with pytest.raises(InvalidTeacherSpecError, match=message):
7373
             parse_teacher_ref(raw)
7474
 
75
+    def test_hf_teacher_resolves_dlm_registry_alias(self) -> None:
76
+        # Audit 12 M12.6: `hf:smollm2-135m` should resolve via the registry,
77
+        # mirroring `dlm init --base smollm2-135m`. A literal HF id stays put.
78
+        ref = parse_teacher_ref("hf:smollm2-135m")
79
+        assert ref.kind == "hf"
80
+        assert ref.target == "HuggingFaceTB/SmolLM2-135M-Instruct"
81
+
82
+    def test_hf_teacher_passes_through_literal_hf_id(self) -> None:
83
+        ref = parse_teacher_ref("hf:Qwen/Qwen2.5-1.5B-Instruct")
84
+        assert ref.target == "Qwen/Qwen2.5-1.5B-Instruct"
85
+
7586
 
7687
 class TestBuildTeacher:
7788
     def test_self_requires_dlm_path(self) -> None: