tenseleyflow/sway / 2e074c6

Browse files

sway(tests): bridge unit tests with mocked dlm imports

Authored by espadonne
SHA
2e074c6e10d1f61d0a7fea6e62fd4be0474e3328
Parents
c9714b8
Tree
c13ae9e

1 changed file

StatusFile+-
A tests/unit/test_dlm_bridge.py 215 0
tests/unit/test_dlm_bridge.pyadded
@@ -0,0 +1,215 @@
1
+"""Tests for :mod:`dlm_sway.integrations.dlm`.
2
+
3
+The bridge imports ``dlm.*`` modules lazily. We mock those via
4
+``sys.modules`` injection so the tests run without the ``dlm-sway[dlm]``
5
+extra installed. A full end-to-end integration test against a real
6
+``.dlm`` lives under ``tests/integration/``.
7
+"""
8
+
9
+from __future__ import annotations
10
+
11
+import sys
12
+import types
13
+from dataclasses import dataclass
14
+from pathlib import Path
15
+
16
+import pytest
17
+import yaml
18
+
19
+
20
+@pytest.fixture
21
+def fake_dlm(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Path:
22
+    """Install a fake ``dlm`` package so the resolver can import."""
23
+
24
+    # Build synthetic parsed .dlm structure.
25
+    @dataclass
26
+    class _Frontmatter:
27
+        dlm_id: str = "01TESTULID"
28
+        base_model: str = "HuggingFaceTB/SmolLM2-135M-Instruct"
29
+
30
+    @dataclass
31
+    class _InstrProbe:
32
+        prompt: str
33
+        gold: str
34
+
35
+    @dataclass
36
+    class _PrefTriple:
37
+        prompt: str
38
+        chosen: str
39
+        rejected: str
40
+
41
+    @dataclass
42
+    class _Section:
43
+        section_id: str
44
+        kind: str
45
+        content: str
46
+        probes: tuple[object, ...] = ()
47
+        preferences: tuple[object, ...] = ()
48
+        tag: str | None = None
49
+
50
+    @dataclass
51
+    class _Parsed:
52
+        frontmatter: _Frontmatter
53
+        sections: tuple[_Section, ...]
54
+
55
+    def _parse_file(_path: Path):  # type: ignore[no-untyped-def]
56
+        return _Parsed(
57
+            frontmatter=_Frontmatter(),
58
+            sections=(
59
+                _Section(
60
+                    section_id="prose-1",
61
+                    kind="PROSE",
62
+                    content="This is a prose section with some information. Further detail follows.",
63
+                ),
64
+                _Section(
65
+                    section_id="instr-1",
66
+                    kind="INSTRUCTION",
67
+                    content="Q-A pairs",
68
+                    probes=(_InstrProbe("What is X?", "X is a concept"),),
69
+                ),
70
+                _Section(
71
+                    section_id="pref-1",
72
+                    kind="PREFERENCE",
73
+                    content="Prefs",
74
+                    preferences=(_PrefTriple("Which?", "good answer", "bad answer"),),
75
+                ),
76
+            ),
77
+        )
78
+
79
+    # Fake ``dlm.doc.parser`` module.
80
+    dlm_pkg = types.ModuleType("dlm")
81
+    dlm_doc = types.ModuleType("dlm.doc")
82
+    dlm_doc_parser = types.ModuleType("dlm.doc.parser")
83
+    dlm_doc_parser.parse_file = _parse_file  # type: ignore[attr-defined]
84
+
85
+    # Fake ``dlm.store.paths`` that returns a resolvable path.
86
+    dlm_store = types.ModuleType("dlm.store")
87
+    dlm_store_paths = types.ModuleType("dlm.store.paths")
88
+
89
+    adapter_dir = tmp_path / "adapter_v1"
90
+    adapter_dir.mkdir()
91
+    (adapter_dir / "adapter_config.json").write_text("{}", encoding="utf-8")
92
+
93
+    class _StorePath:
94
+        def __init__(self, path: Path) -> None:
95
+            self._p = path
96
+
97
+        @classmethod
98
+        def for_dlm(cls, _dlm_id: str) -> _StorePath:
99
+            return cls(adapter_dir)
100
+
101
+        def resolve_current_adapter(self) -> Path:
102
+            return self._p
103
+
104
+    dlm_store_paths.StorePath = _StorePath  # type: ignore[attr-defined]
105
+
106
+    monkeypatch.setitem(sys.modules, "dlm", dlm_pkg)
107
+    monkeypatch.setitem(sys.modules, "dlm.doc", dlm_doc)
108
+    monkeypatch.setitem(sys.modules, "dlm.doc.parser", dlm_doc_parser)
109
+    monkeypatch.setitem(sys.modules, "dlm.store", dlm_store)
110
+    monkeypatch.setitem(sys.modules, "dlm.store.paths", dlm_store_paths)
111
+
112
+    # Return a path to a fake .dlm file (the parser won't actually read it).
113
+    dlm_file = tmp_path / "doc.dlm"
114
+    dlm_file.write_text("---\ndlm_id: 01TEST\n---\n\nbody\n", encoding="utf-8")
115
+    return dlm_file
116
+
117
+
118
+def test_resolve_dlm_maps_sections(fake_dlm: Path) -> None:
119
+    from dlm_sway.integrations.dlm.resolver import resolve_dlm
120
+
121
+    handle = resolve_dlm(fake_dlm)
122
+    assert handle.dlm_id == "01TESTULID"
123
+    assert handle.base_model == "HuggingFaceTB/SmolLM2-135M-Instruct"
124
+    assert handle.adapter_path is not None
125
+    assert handle.adapter_path.exists()
126
+    assert len(handle.sections) == 3
127
+    # Kinds normalized from uppercase dlm enum values.
128
+    assert {s.kind for s in handle.sections} == {"prose", "instruction", "preference"}
129
+    # Instruction Q/A pair survived the translation.
130
+    instr = next(s for s in handle.sections if s.kind == "instruction")
131
+    assert instr.probes
132
+    assert instr.probes[0].prompt == "What is X?"
133
+    # Preference triple too.
134
+    pref = next(s for s in handle.sections if s.kind == "preference")
135
+    assert pref.preferences
136
+    assert pref.preferences[0].chosen == "good answer"
137
+
138
+
139
+def test_resolve_without_dlm_installed(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
140
+    """resolve_dlm surfaces a SwayError when the dlm package is missing."""
141
+    # Wipe any cached dlm modules so the lazy import fails.
142
+    for mod in list(sys.modules):
143
+        if mod == "dlm" or mod.startswith("dlm."):
144
+            monkeypatch.delitem(sys.modules, mod, raising=False)
145
+
146
+    import builtins
147
+
148
+    real_import = builtins.__import__
149
+
150
+    def fake_import(name: str, *args, **kwargs):  # type: ignore[no-untyped-def]
151
+        if name.startswith("dlm."):
152
+            raise ImportError("missing extra")
153
+        return real_import(name, *args, **kwargs)
154
+
155
+    monkeypatch.setattr(builtins, "__import__", fake_import)
156
+
157
+    from dlm_sway.core.errors import SwayError
158
+    from dlm_sway.integrations.dlm.resolver import resolve_dlm
159
+
160
+    with pytest.raises(SwayError, match="dlm package not installed"):
161
+        resolve_dlm(tmp_path / "doc.dlm")
162
+
163
+
164
+def test_autogen_writes_complete_suite(fake_dlm: Path, tmp_path: Path) -> None:
165
+    from dlm_sway.integrations.dlm.autogen import write_sway_yaml
166
+
167
+    out = tmp_path / "sway.yaml"
168
+    write_sway_yaml(fake_dlm, out)
169
+    data = yaml.safe_load(out.read_text(encoding="utf-8"))
170
+
171
+    assert data["version"] == 1
172
+    assert data["models"]["base"]["base"] == "HuggingFaceTB/SmolLM2-135M-Instruct"
173
+    assert data["models"]["ft"]["adapter"] is not None
174
+    assert data["dlm_source"] == str(fake_dlm.resolve())
175
+
176
+    kinds = {entry["kind"] for entry in data["suite"]}
177
+    # The full 11-primitive battery minus nothing is present (some may
178
+    # be skipped when data is absent, but here we have one of every
179
+    # section type).
180
+    expected = {
181
+        "null_adapter",
182
+        "delta_kl",
183
+        "adapter_revert",
184
+        "prompt_collapse",
185
+        "section_internalization",
186
+        "paraphrase_invariance",
187
+        "preference_flip",
188
+        "style_fingerprint",
189
+        "calibration_drift",
190
+        "leakage",
191
+        "adapter_ablation",
192
+    }
193
+    assert expected <= kinds, f"missing: {expected - kinds}"
194
+
195
+
196
+def test_build_spec_dict_skips_preference_when_absent() -> None:
197
+    from dlm_sway.core.sections import Section
198
+    from dlm_sway.integrations.dlm.autogen import build_spec_dict
199
+    from dlm_sway.integrations.dlm.resolver import DlmHandle
200
+
201
+    sections = (
202
+        Section(id="a", kind="prose", content="A prose section. Second sentence."),
203
+        Section(id="b", kind="prose", content="Another prose section."),
204
+    )
205
+    handle = DlmHandle(
206
+        dlm_id="x",
207
+        base_model="base",
208
+        adapter_path=Path("/tmp/adapter"),
209
+        sections=sections,
210
+        doc_text="whole document",
211
+    )
212
+    spec = build_spec_dict(handle)
213
+    kinds = {entry["kind"] for entry in spec["suite"]}
214
+    assert "preference_flip" not in kinds
215
+    assert "section_internalization" in kinds