Python · 12198 bytes Raw Blame History
1 """Smoke tests for the sway CLI.
2
3 We avoid exercising backends (they need real models) and instead test
4 arg parsing, error paths, and the read-only commands (``doctor``,
5 ``report``, and the help surface).
6 """
7
8 from __future__ import annotations
9
10 import json
11 from pathlib import Path
12
13 from typer.testing import CliRunner
14
15 from dlm_sway.cli.app import app
16
17
18 def test_version_exits_zero() -> None:
19 result = CliRunner().invoke(app, ["--version"])
20 assert result.exit_code == 0
21 assert "sway" in result.stdout
22
23
24 def test_help_lists_all_commands() -> None:
25 result = CliRunner().invoke(app, ["--help"])
26 assert result.exit_code == 0
27 for cmd in ("run", "gate", "check", "diff", "autogen", "doctor", "report"):
28 assert cmd in result.stdout
29
30
31 def test_doctor_runs(capsys) -> None: # type: ignore[no-untyped-def]
32 result = CliRunner().invoke(app, ["doctor"])
33 assert result.exit_code == 0
34 # Rich applies color codes by default; assert the bare product name appears.
35 assert "sway" in result.stdout
36 assert "backends" in result.stdout
37
38
39 def test_run_without_file_errors(tmp_path: Path) -> None:
40 missing = tmp_path / "nope.yaml"
41 result = CliRunner().invoke(app, ["run", str(missing)])
42 # Exit code 2 = SwayError bubble-up; 1 = typer missing-arg; accept either.
43 assert result.exit_code != 0
44
45
46 def test_report_from_json(tmp_path: Path) -> None:
47 sample = {
48 "schema_version": 1,
49 "sway_version": "0.1.0.dev0",
50 "base_model_id": "base",
51 "adapter_id": "adp",
52 "score": {"overall": 0.7, "band": "healthy", "components": {}, "findings": []},
53 "probes": [
54 {
55 "name": "p1",
56 "kind": "delta_kl",
57 "verdict": "pass",
58 "score": 0.7,
59 "message": "ok",
60 },
61 ],
62 }
63 path = tmp_path / "result.json"
64 path.write_text(json.dumps(sample), encoding="utf-8")
65
66 terminal = CliRunner().invoke(app, ["report", str(path)])
67 assert terminal.exit_code == 0
68 assert "p1" in terminal.stdout
69
70 md = CliRunner().invoke(app, ["report", str(path), "--format", "md"])
71 assert md.exit_code == 0
72 assert "sway report" in md.stdout
73
74 junit = CliRunner().invoke(app, ["report", str(path), "--format", "junit"])
75 assert junit.exit_code == 0
76 assert "<testsuite" in junit.stdout
77
78
79 def test_autogen_without_dlm_extra_exits_nonzero(tmp_path: Path, monkeypatch) -> None: # type: ignore[no-untyped-def]
80 # Force the import path to fail so the CLI prints the extra hint.
81 import builtins
82
83 real_import = builtins.__import__
84
85 def fake_import(name: str, *args: object, **kwargs: object): # type: ignore[no-untyped-def]
86 if name.startswith("dlm_sway.integrations.dlm"):
87 raise ImportError("simulated missing extra")
88 return real_import(name, *args, **kwargs) # type: ignore[no-untyped-call]
89
90 monkeypatch.setattr(builtins, "__import__", fake_import)
91 result = CliRunner().invoke(app, ["autogen", "any.dlm"])
92 assert result.exit_code != 0
93
94
95 # -- Sprint 06 additions ----------------------------------------------
96
97
98 class TestDoctorJson:
99 """D7: ``sway doctor --json`` must emit a parseable payload."""
100
101 def test_json_is_parseable(self) -> None:
102 result = CliRunner().invoke(app, ["doctor", "--json"])
103 assert result.exit_code == 0
104 payload = json.loads(result.stdout)
105 assert "sway_version" in payload
106 assert "python" in payload
107 assert "platform" in payload
108 assert "extras" in payload
109 # Every extra bucket is a mapping of module → version-or-null.
110 assert set(payload["extras"]) >= {
111 "hf",
112 "mlx",
113 "semsim",
114 "style",
115 "dlm",
116 "viz",
117 "api",
118 "pytest",
119 }
120 # F04 regression: load-bearing deps appear under the right extras.
121 assert "plotly" in payload["extras"]["viz"]
122 assert "sklearn" in payload["extras"]["semsim"]
123 assert "httpx" in payload["extras"]["api"]
124 assert "tenacity" in payload["extras"]["api"]
125
126 def test_json_schema_is_snapshot_stable(self) -> None:
127 """Stronger-test #11 — pin ``sway doctor --json``'s *shape*
128 (top-level keys + extras bucket keys + their contents as sets of
129 module names). Values (``sway_version``, ``python``, ``platform``,
130 installed vs missing) vary by host and are masked so the snapshot
131 catches structural drift without being environment-sensitive."""
132 result = CliRunner().invoke(app, ["doctor", "--json"])
133 assert result.exit_code == 0
134 payload = json.loads(result.stdout)
135
136 assert set(payload) == {"sway_version", "python", "platform", "extras"}
137 # Every extra bucket's keys are stable; values (module versions)
138 # are not. Snapshot the sorted module-name set per bucket.
139 extras = payload["extras"]
140 assert isinstance(extras, dict)
141 extras_shape = {bucket: sorted(extras[bucket]) for bucket in sorted(extras)}
142 assert extras_shape == {
143 "api": ["httpx", "tenacity"],
144 "dlm": ["dlm"],
145 "hf": ["peft", "torch", "transformers"],
146 "mlx": ["mlx", "mlx_lm"],
147 "pytest": ["pytest"],
148 "semsim": ["sentence_transformers", "sklearn"],
149 "style": ["nlpaug", "spacy", "textstat"],
150 "viz": ["matplotlib", "plotly"],
151 }
152 # Value type is str-or-None on every module entry.
153 for bucket_name, bucket in extras.items():
154 for mod_name, version in bucket.items():
155 assert isinstance(mod_name, str), bucket_name
156 assert version is None or isinstance(version, str), (bucket_name, mod_name)
157
158
159 class TestListProbes:
160 """D6: ``sway list-probes`` prints the registered kinds."""
161
162 def test_emits_every_shipped_kind(self) -> None:
163 result = CliRunner().invoke(app, ["list-probes"])
164 assert result.exit_code == 0
165 for kind in (
166 "delta_kl",
167 "adapter_revert",
168 "prompt_collapse",
169 "section_internalization",
170 "paraphrase_invariance",
171 "preference_flip",
172 "style_fingerprint",
173 "calibration_drift",
174 "leakage",
175 "adapter_ablation",
176 "null_adapter",
177 "external_perplexity",
178 "cluster_kl",
179 ):
180 assert kind in result.stdout
181
182 def test_every_probe_has_a_summary_line(self) -> None:
183 """F03 regression — before the module-docstring fallback, half
184 the probe rows shipped with an empty summary column."""
185 from dlm_sway.probes.base import registry
186
187 result = CliRunner().invoke(app, ["list-probes"])
188 assert result.exit_code == 0
189 out = result.stdout
190 for kind in sorted(registry()):
191 # Find the row by its leading ``kind`` token. Rich wraps
192 # long summaries across lines, so match any non-empty
193 # continuation after the category column.
194 idx = out.find(kind)
195 assert idx != -1, f"{kind} missing from list-probes output"
196 row = out[idx : out.find("\n", idx)]
197 # Row format: "kind category summary..."
198 tokens = row.split()
199 # Past the 2nd column (category) there should be at least one
200 # summary token. Empty rows surfaced as len(tokens) == 2.
201 assert len(tokens) > 2, f"{kind} has an empty summary: {row!r}"
202
203
204 class TestReportFormatEnum:
205 """D11: unknown ``--format`` surfaces a clear error, not silent terminal."""
206
207 def test_unknown_format_rejected(self, tmp_path: Path) -> None:
208 result_path = tmp_path / "r.json"
209 result_path.write_text(
210 json.dumps(
211 {
212 "sway_version": "0",
213 "base_model_id": "b",
214 "adapter_id": "a",
215 "score": {"overall": 0.0, "band": "noise", "components": {}, "findings": []},
216 "probes": [],
217 }
218 ),
219 encoding="utf-8",
220 )
221 result = CliRunner().invoke(app, ["report", str(result_path), "--format", "csv"])
222 assert result.exit_code != 0
223 combined = (result.stdout or "") + (result.output or "")
224 assert "csv" in combined.lower() or "invalid" in combined.lower()
225
226
227 class TestCheckBaseInference:
228 """D4: ``sway check`` reads base_model_name_or_path from adapter_config.json."""
229
230 def test_reads_base_from_adapter_config(self, tmp_path: Path) -> None:
231 from dlm_sway.cli.commands import _infer_base_from_adapter_config
232
233 adapter = tmp_path / "adapter"
234 adapter.mkdir()
235 (adapter / "adapter_config.json").write_text(
236 json.dumps({"base_model_name_or_path": "HuggingFaceTB/SmolLM2-135M-Instruct"}),
237 encoding="utf-8",
238 )
239 assert _infer_base_from_adapter_config(adapter) == "HuggingFaceTB/SmolLM2-135M-Instruct"
240
241 def test_returns_none_when_config_missing(self, tmp_path: Path) -> None:
242 from dlm_sway.cli.commands import _infer_base_from_adapter_config
243
244 assert _infer_base_from_adapter_config(tmp_path) is None
245
246 def test_returns_none_when_field_missing(self, tmp_path: Path) -> None:
247 from dlm_sway.cli.commands import _infer_base_from_adapter_config
248
249 adapter = tmp_path / "adapter"
250 adapter.mkdir()
251 (adapter / "adapter_config.json").write_text(json.dumps({"rank": 8}), encoding="utf-8")
252 assert _infer_base_from_adapter_config(adapter) is None
253
254 def test_returns_none_when_config_malformed(self, tmp_path: Path) -> None:
255 from dlm_sway.cli.commands import _infer_base_from_adapter_config
256
257 adapter = tmp_path / "adapter"
258 adapter.mkdir()
259 (adapter / "adapter_config.json").write_text("{ not json", encoding="utf-8")
260 assert _infer_base_from_adapter_config(adapter) is None
261
262
263 class TestCheckBanner:
264 """D12: ``_check_banner`` maps z-score to the right verdict tier."""
265
266 def _suite_with_z(self, z_value: float | None) -> tuple:
267 from datetime import UTC, datetime
268
269 from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict
270
271 now = datetime.now(UTC)
272 probes = (
273 ProbeResult(
274 name="dk",
275 kind="delta_kl",
276 verdict=Verdict.PASS if z_value and z_value >= 3 else Verdict.FAIL,
277 score=0.5,
278 z_score=z_value,
279 ),
280 )
281 suite = SuiteResult(
282 spec_path="<t>",
283 started_at=now,
284 finished_at=now,
285 base_model_id="b",
286 adapter_id="a",
287 sway_version="0.0.0",
288 probes=probes,
289 )
290 score = SwayScore(
291 overall=0.5,
292 components={"adherence": 0.5},
293 band="partial",
294 )
295 return suite, score
296
297 def test_high_z_is_green(self) -> None:
298 from dlm_sway.cli.commands import _check_banner
299
300 suite, score = self._suite_with_z(4.5)
301 text, style = _check_banner(score, suite)
302 assert "✅" in text
303 assert "above noise" in text
304 assert "green" in style
305
306 def test_marginal_z_is_yellow(self) -> None:
307 from dlm_sway.cli.commands import _check_banner
308
309 suite, score = self._suite_with_z(1.5)
310 text, style = _check_banner(score, suite)
311 assert "⚠️" in text
312 assert "yellow" in style
313
314 def test_low_z_is_red(self) -> None:
315 from dlm_sway.cli.commands import _check_banner
316
317 suite, score = self._suite_with_z(0.3)
318 text, style = _check_banner(score, suite)
319 assert "❌" in text
320 assert "red" in style
321
322 def test_missing_z_falls_back_to_composite(self) -> None:
323 from dlm_sway.cli.commands import _check_banner
324
325 suite, score = self._suite_with_z(None)
326 text, _style = _check_banner(score, suite)
327 # No "σ above noise" language when we don't have a z-score.
328 assert "σ" not in text