tenseleyflow/sway / 3bd59ce

Browse files

tests: add D3/D4/D6/D7/D10/D11/D12 coverage — formatters, extras rollup, CLI surfaces

Authored by espadonne
SHA
3bd59cee000be33e1ade4c2331705bdfd12ca377
Parents
1b0d36a
Tree
8db4ac1

3 changed files

StatusFile+-
M tests/unit/test_cli.py 167 0
A tests/unit/test_report_extras_rollup.py 121 0
A tests/unit/test_report_formatters.py 63 0
tests/unit/test_cli.pymodified
@@ -90,3 +90,170 @@ def test_autogen_without_dlm_extra_exits_nonzero(tmp_path: Path, monkeypatch) ->
9090
     monkeypatch.setattr(builtins, "__import__", fake_import)
9191
     result = CliRunner().invoke(app, ["autogen", "any.dlm"])
9292
     assert result.exit_code != 0
93
+
94
+
95
+# -- Sprint 06 additions ----------------------------------------------
96
+
97
+
98
+class TestDoctorJson:
99
+    """D7: ``sway doctor --json`` must emit a parseable payload."""
100
+
101
+    def test_json_is_parseable(self) -> None:
102
+        result = CliRunner().invoke(app, ["doctor", "--json"])
103
+        assert result.exit_code == 0
104
+        payload = json.loads(result.stdout)
105
+        assert "sway_version" in payload
106
+        assert "python" in payload
107
+        assert "platform" in payload
108
+        assert "extras" in payload
109
+        # Every extra bucket is a mapping of module → version-or-null.
110
+        assert set(payload["extras"]) >= {"hf", "mlx", "semsim", "style", "dlm", "viz"}
111
+
112
+
113
+class TestListProbes:
114
+    """D6: ``sway list-probes`` prints the registered kinds."""
115
+
116
+    def test_emits_every_shipped_kind(self) -> None:
117
+        result = CliRunner().invoke(app, ["list-probes"])
118
+        assert result.exit_code == 0
119
+        for kind in (
120
+            "delta_kl",
121
+            "adapter_revert",
122
+            "prompt_collapse",
123
+            "section_internalization",
124
+            "paraphrase_invariance",
125
+            "preference_flip",
126
+            "style_fingerprint",
127
+            "calibration_drift",
128
+            "leakage",
129
+            "adapter_ablation",
130
+            "null_adapter",
131
+        ):
132
+            assert kind in result.stdout
133
+
134
+
135
+class TestReportFormatEnum:
136
+    """D11: unknown ``--format`` surfaces a clear error, not silent terminal."""
137
+
138
+    def test_unknown_format_rejected(self, tmp_path: Path) -> None:
139
+        result_path = tmp_path / "r.json"
140
+        result_path.write_text(
141
+            json.dumps(
142
+                {
143
+                    "sway_version": "0",
144
+                    "base_model_id": "b",
145
+                    "adapter_id": "a",
146
+                    "score": {"overall": 0.0, "band": "noise", "components": {}, "findings": []},
147
+                    "probes": [],
148
+                }
149
+            ),
150
+            encoding="utf-8",
151
+        )
152
+        result = CliRunner().invoke(app, ["report", str(result_path), "--format", "csv"])
153
+        assert result.exit_code != 0
154
+        combined = (result.stdout or "") + (result.output or "")
155
+        assert "csv" in combined.lower() or "invalid" in combined.lower()
156
+
157
+
158
+class TestCheckBaseInference:
159
+    """D4: ``sway check`` reads base_model_name_or_path from adapter_config.json."""
160
+
161
+    def test_reads_base_from_adapter_config(self, tmp_path: Path) -> None:
162
+        from dlm_sway.cli.commands import _infer_base_from_adapter_config
163
+
164
+        adapter = tmp_path / "adapter"
165
+        adapter.mkdir()
166
+        (adapter / "adapter_config.json").write_text(
167
+            json.dumps({"base_model_name_or_path": "HuggingFaceTB/SmolLM2-135M-Instruct"}),
168
+            encoding="utf-8",
169
+        )
170
+        assert _infer_base_from_adapter_config(adapter) == "HuggingFaceTB/SmolLM2-135M-Instruct"
171
+
172
+    def test_returns_none_when_config_missing(self, tmp_path: Path) -> None:
173
+        from dlm_sway.cli.commands import _infer_base_from_adapter_config
174
+
175
+        assert _infer_base_from_adapter_config(tmp_path) is None
176
+
177
+    def test_returns_none_when_field_missing(self, tmp_path: Path) -> None:
178
+        from dlm_sway.cli.commands import _infer_base_from_adapter_config
179
+
180
+        adapter = tmp_path / "adapter"
181
+        adapter.mkdir()
182
+        (adapter / "adapter_config.json").write_text(json.dumps({"rank": 8}), encoding="utf-8")
183
+        assert _infer_base_from_adapter_config(adapter) is None
184
+
185
+    def test_returns_none_when_config_malformed(self, tmp_path: Path) -> None:
186
+        from dlm_sway.cli.commands import _infer_base_from_adapter_config
187
+
188
+        adapter = tmp_path / "adapter"
189
+        adapter.mkdir()
190
+        (adapter / "adapter_config.json").write_text("{ not json", encoding="utf-8")
191
+        assert _infer_base_from_adapter_config(adapter) is None
192
+
193
+
194
+class TestCheckBanner:
195
+    """D12: ``_check_banner`` maps z-score to the right verdict tier."""
196
+
197
+    def _suite_with_z(self, z_value: float | None) -> tuple:
198
+        from datetime import UTC, datetime
199
+
200
+        from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict
201
+
202
+        now = datetime.now(UTC)
203
+        probes = (
204
+            ProbeResult(
205
+                name="dk",
206
+                kind="delta_kl",
207
+                verdict=Verdict.PASS if z_value and z_value >= 3 else Verdict.FAIL,
208
+                score=0.5,
209
+                z_score=z_value,
210
+            ),
211
+        )
212
+        suite = SuiteResult(
213
+            spec_path="<t>",
214
+            started_at=now,
215
+            finished_at=now,
216
+            base_model_id="b",
217
+            adapter_id="a",
218
+            sway_version="0.0.0",
219
+            probes=probes,
220
+        )
221
+        score = SwayScore(
222
+            overall=0.5,
223
+            components={"adherence": 0.5},
224
+            band="partial",
225
+        )
226
+        return suite, score
227
+
228
+    def test_high_z_is_green(self) -> None:
229
+        from dlm_sway.cli.commands import _check_banner
230
+
231
+        suite, score = self._suite_with_z(4.5)
232
+        text, style = _check_banner(score, suite)
233
+        assert "✅" in text
234
+        assert "above noise" in text
235
+        assert "green" in style
236
+
237
+    def test_marginal_z_is_yellow(self) -> None:
238
+        from dlm_sway.cli.commands import _check_banner
239
+
240
+        suite, score = self._suite_with_z(1.5)
241
+        text, style = _check_banner(score, suite)
242
+        assert "⚠️" in text
243
+        assert "yellow" in style
244
+
245
+    def test_low_z_is_red(self) -> None:
246
+        from dlm_sway.cli.commands import _check_banner
247
+
248
+        suite, score = self._suite_with_z(0.3)
249
+        text, style = _check_banner(score, suite)
250
+        assert "❌" in text
251
+        assert "red" in style
252
+
253
+    def test_missing_z_falls_back_to_composite(self) -> None:
254
+        from dlm_sway.cli.commands import _check_banner
255
+
256
+        suite, score = self._suite_with_z(None)
257
+        text, _style = _check_banner(score, suite)
258
+        # No "σ above noise" language when we don't have a z-score.
259
+        assert "σ" not in text
tests/unit/test_report_extras_rollup.pyadded
@@ -0,0 +1,121 @@
1
+"""Tests for the D3 extras-rollup surface.
2
+
3
+Covers ``report.collect_missing_extras`` (pure extraction) and the
4
+terminal/markdown renderers' handling of the resulting footer.
5
+"""
6
+
7
+from __future__ import annotations
8
+
9
+from datetime import UTC, datetime
10
+
11
+from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict
12
+from dlm_sway.suite import report
13
+
14
+
15
+def _suite_with_messages(messages: list[str]) -> SuiteResult:
16
+    now = datetime.now(UTC)
17
+    probes = tuple(
18
+        ProbeResult(
19
+            name=f"p{i}",
20
+            kind="delta_kl",
21
+            verdict=Verdict.SKIP,
22
+            score=None,
23
+            message=msg,
24
+        )
25
+        for i, msg in enumerate(messages)
26
+    )
27
+    return SuiteResult(
28
+        spec_path="<test>",
29
+        started_at=now,
30
+        finished_at=now,
31
+        base_model_id="b",
32
+        adapter_id="a",
33
+        sway_version="0.0.0",
34
+        probes=probes,
35
+    )
36
+
37
+
38
+class TestCollectMissingExtras:
39
+    def test_single_extra_single_probe(self) -> None:
40
+        suite = _suite_with_messages(
41
+            ["adapter_revert: install the [semsim] extra for sentence embeddings"]
42
+        )
43
+        assert report.collect_missing_extras(suite) == ["semsim"]
44
+
45
+    def test_multiple_probes_deduplicated(self) -> None:
46
+        suite = _suite_with_messages(
47
+            [
48
+                "install the [semsim] extra",
49
+                "install the [semsim] extra",
50
+                "install the [style] extra",
51
+            ]
52
+        )
53
+        assert report.collect_missing_extras(suite) == ["semsim", "style"]
54
+
55
+    def test_non_skip_messages_ignored(self) -> None:
56
+        now = datetime.now(UTC)
57
+        probes = (
58
+            ProbeResult(
59
+                name="p1",
60
+                kind="delta_kl",
61
+                verdict=Verdict.PASS,
62
+                score=1.0,
63
+                message="install the [semsim] extra",
64
+            ),
65
+        )
66
+        suite = SuiteResult(
67
+            spec_path="<test>",
68
+            started_at=now,
69
+            finished_at=now,
70
+            base_model_id="b",
71
+            adapter_id="a",
72
+            sway_version="0.0.0",
73
+            probes=probes,
74
+        )
75
+        # A PASS probe mentioning install hints in passing must not
76
+        # pollute the rollup.
77
+        assert report.collect_missing_extras(suite) == []
78
+
79
+    def test_empty_suite_no_extras(self) -> None:
80
+        now = datetime.now(UTC)
81
+        suite = SuiteResult(
82
+            spec_path="<test>",
83
+            started_at=now,
84
+            finished_at=now,
85
+            base_model_id="b",
86
+            adapter_id="a",
87
+            sway_version="0.0.0",
88
+        )
89
+        assert report.collect_missing_extras(suite) == []
90
+
91
+
92
+class TestExtrasFooterInMarkdown:
93
+    def test_footer_includes_pip_command(self) -> None:
94
+        suite = _suite_with_messages(
95
+            [
96
+                "adapter_revert: install the [semsim] extra",
97
+                "style_fingerprint: install the [style] extra",
98
+            ]
99
+        )
100
+        score = SwayScore(overall=0.0, components={}, band="noise")
101
+        md = report.to_markdown(suite, score)
102
+        assert "pip install 'dlm-sway[semsim,style]'" in md
103
+        assert "Skipped probes" in md
104
+
105
+    def test_no_footer_when_no_skips(self) -> None:
106
+        now = datetime.now(UTC)
107
+        probes = (
108
+            ProbeResult(name="p1", kind="delta_kl", verdict=Verdict.PASS, score=0.9, message="ok"),
109
+        )
110
+        suite = SuiteResult(
111
+            spec_path="<test>",
112
+            started_at=now,
113
+            finished_at=now,
114
+            base_model_id="b",
115
+            adapter_id="a",
116
+            sway_version="0.0.0",
117
+            probes=probes,
118
+        )
119
+        score = SwayScore(overall=0.9, components={}, band="healthy")
120
+        md = report.to_markdown(suite, score)
121
+        assert "Skipped probes" not in md
tests/unit/test_report_formatters.pyadded
@@ -0,0 +1,63 @@
1
+"""Tests for the unified number formatters in :mod:`dlm_sway.suite.report` (D10)."""
2
+
3
+from __future__ import annotations
4
+
5
+import math
6
+
7
+from dlm_sway.suite import report
8
+
9
+
10
+class TestFormatScore:
11
+    def test_two_decimals(self) -> None:
12
+        assert report.format_score(0.8765) == "0.88"
13
+
14
+    def test_none_is_em_dash(self) -> None:
15
+        assert report.format_score(None) == "—"
16
+
17
+    def test_nan_is_em_dash(self) -> None:
18
+        assert report.format_score(math.nan) == "—"
19
+
20
+    def test_inf_is_em_dash(self) -> None:
21
+        assert report.format_score(math.inf) == "—"
22
+
23
+    def test_int_accepted(self) -> None:
24
+        assert report.format_score(1) == "1.00"
25
+
26
+
27
+class TestFormatRaw:
28
+    def test_three_decimals(self) -> None:
29
+        assert report.format_raw(0.123456) == "0.123"
30
+
31
+    def test_thousands_separator(self) -> None:
32
+        assert report.format_raw(1234.5678) == "1,234.568"
33
+
34
+    def test_none_is_em_dash(self) -> None:
35
+        assert report.format_raw(None) == "—"
36
+
37
+
38
+class TestFormatZ:
39
+    def test_signed_with_sigma(self) -> None:
40
+        assert report.format_z(3.14) == "+3.14σ"
41
+
42
+    def test_negative(self) -> None:
43
+        assert report.format_z(-1.5) == "-1.50σ"
44
+
45
+    def test_large_thousands_separator(self) -> None:
46
+        assert report.format_z(1234.56) == "+1,234.56σ"
47
+
48
+    def test_none_is_em_dash(self) -> None:
49
+        assert report.format_z(None) == "—"
50
+
51
+
52
+class TestFormatDuration:
53
+    def test_sub_ten_seconds(self) -> None:
54
+        assert report.format_duration_s(1.234) == "1.23s"
55
+
56
+    def test_between_ten_and_hundred(self) -> None:
57
+        assert report.format_duration_s(42.678) == "42.7s"
58
+
59
+    def test_large_seconds_with_thousands(self) -> None:
60
+        assert report.format_duration_s(12345.6) == "12,346s"
61
+
62
+    def test_none_is_em_dash(self) -> None:
63
+        assert report.format_duration_s(None) == "—"