| 1 | """Smoke tests for the sway CLI. |
| 2 | |
| 3 | We avoid exercising backends (they need real models) and instead test |
| 4 | arg parsing, error paths, and the read-only commands (``doctor``, |
| 5 | ``report``, and the help surface). |
| 6 | """ |
| 7 | |
| 8 | from __future__ import annotations |
| 9 | |
| 10 | import json |
| 11 | from pathlib import Path |
| 12 | |
| 13 | from typer.testing import CliRunner |
| 14 | |
| 15 | from dlm_sway.cli.app import app |
| 16 | |
| 17 | |
| 18 | def test_version_exits_zero() -> None: |
| 19 | result = CliRunner().invoke(app, ["--version"]) |
| 20 | assert result.exit_code == 0 |
| 21 | assert "sway" in result.stdout |
| 22 | |
| 23 | |
| 24 | def test_help_lists_all_commands() -> None: |
| 25 | result = CliRunner().invoke(app, ["--help"]) |
| 26 | assert result.exit_code == 0 |
| 27 | for cmd in ("run", "gate", "check", "diff", "autogen", "doctor", "report"): |
| 28 | assert cmd in result.stdout |
| 29 | |
| 30 | |
| 31 | def test_doctor_runs(capsys) -> None: # type: ignore[no-untyped-def] |
| 32 | result = CliRunner().invoke(app, ["doctor"]) |
| 33 | assert result.exit_code == 0 |
| 34 | # Rich applies color codes by default; assert the bare product name appears. |
| 35 | assert "sway" in result.stdout |
| 36 | assert "backends" in result.stdout |
| 37 | |
| 38 | |
| 39 | def test_run_without_file_errors(tmp_path: Path) -> None: |
| 40 | missing = tmp_path / "nope.yaml" |
| 41 | result = CliRunner().invoke(app, ["run", str(missing)]) |
| 42 | # Exit code 2 = SwayError bubble-up; 1 = typer missing-arg; accept either. |
| 43 | assert result.exit_code != 0 |
| 44 | |
| 45 | |
| 46 | def test_report_from_json(tmp_path: Path) -> None: |
| 47 | sample = { |
| 48 | "schema_version": 1, |
| 49 | "sway_version": "0.1.0.dev0", |
| 50 | "base_model_id": "base", |
| 51 | "adapter_id": "adp", |
| 52 | "score": {"overall": 0.7, "band": "healthy", "components": {}, "findings": []}, |
| 53 | "probes": [ |
| 54 | { |
| 55 | "name": "p1", |
| 56 | "kind": "delta_kl", |
| 57 | "verdict": "pass", |
| 58 | "score": 0.7, |
| 59 | "message": "ok", |
| 60 | }, |
| 61 | ], |
| 62 | } |
| 63 | path = tmp_path / "result.json" |
| 64 | path.write_text(json.dumps(sample), encoding="utf-8") |
| 65 | |
| 66 | terminal = CliRunner().invoke(app, ["report", str(path)]) |
| 67 | assert terminal.exit_code == 0 |
| 68 | assert "p1" in terminal.stdout |
| 69 | |
| 70 | md = CliRunner().invoke(app, ["report", str(path), "--format", "md"]) |
| 71 | assert md.exit_code == 0 |
| 72 | assert "sway report" in md.stdout |
| 73 | |
| 74 | junit = CliRunner().invoke(app, ["report", str(path), "--format", "junit"]) |
| 75 | assert junit.exit_code == 0 |
| 76 | assert "<testsuite" in junit.stdout |
| 77 | |
| 78 | |
| 79 | def test_autogen_without_dlm_extra_exits_nonzero(tmp_path: Path, monkeypatch) -> None: # type: ignore[no-untyped-def] |
| 80 | # Force the import path to fail so the CLI prints the extra hint. |
| 81 | import builtins |
| 82 | |
| 83 | real_import = builtins.__import__ |
| 84 | |
| 85 | def fake_import(name: str, *args: object, **kwargs: object): # type: ignore[no-untyped-def] |
| 86 | if name.startswith("dlm_sway.integrations.dlm"): |
| 87 | raise ImportError("simulated missing extra") |
| 88 | return real_import(name, *args, **kwargs) # type: ignore[no-untyped-call] |
| 89 | |
| 90 | monkeypatch.setattr(builtins, "__import__", fake_import) |
| 91 | result = CliRunner().invoke(app, ["autogen", "any.dlm"]) |
| 92 | assert result.exit_code != 0 |
| 93 | |
| 94 | |
| 95 | # -- Sprint 06 additions ---------------------------------------------- |
| 96 | |
| 97 | |
| 98 | class TestDoctorJson: |
| 99 | """D7: ``sway doctor --json`` must emit a parseable payload.""" |
| 100 | |
| 101 | def test_json_is_parseable(self) -> None: |
| 102 | result = CliRunner().invoke(app, ["doctor", "--json"]) |
| 103 | assert result.exit_code == 0 |
| 104 | payload = json.loads(result.stdout) |
| 105 | assert "sway_version" in payload |
| 106 | assert "python" in payload |
| 107 | assert "platform" in payload |
| 108 | assert "extras" in payload |
| 109 | # Every extra bucket is a mapping of module → version-or-null. |
| 110 | assert set(payload["extras"]) >= { |
| 111 | "hf", |
| 112 | "mlx", |
| 113 | "semsim", |
| 114 | "style", |
| 115 | "dlm", |
| 116 | "viz", |
| 117 | "api", |
| 118 | "pytest", |
| 119 | } |
| 120 | # F04 regression: load-bearing deps appear under the right extras. |
| 121 | assert "plotly" in payload["extras"]["viz"] |
| 122 | assert "sklearn" in payload["extras"]["semsim"] |
| 123 | assert "httpx" in payload["extras"]["api"] |
| 124 | assert "tenacity" in payload["extras"]["api"] |
| 125 | |
| 126 | def test_json_schema_is_snapshot_stable(self) -> None: |
| 127 | """Stronger-test #11 — pin ``sway doctor --json``'s *shape* |
| 128 | (top-level keys + extras bucket keys + their contents as sets of |
| 129 | module names). Values (``sway_version``, ``python``, ``platform``, |
| 130 | installed vs missing) vary by host and are masked so the snapshot |
| 131 | catches structural drift without being environment-sensitive.""" |
| 132 | result = CliRunner().invoke(app, ["doctor", "--json"]) |
| 133 | assert result.exit_code == 0 |
| 134 | payload = json.loads(result.stdout) |
| 135 | |
| 136 | assert set(payload) == {"sway_version", "python", "platform", "extras"} |
| 137 | # Every extra bucket's keys are stable; values (module versions) |
| 138 | # are not. Snapshot the sorted module-name set per bucket. |
| 139 | extras = payload["extras"] |
| 140 | assert isinstance(extras, dict) |
| 141 | extras_shape = {bucket: sorted(extras[bucket]) for bucket in sorted(extras)} |
| 142 | assert extras_shape == { |
| 143 | "api": ["httpx", "tenacity"], |
| 144 | "dlm": ["dlm"], |
| 145 | "hf": ["peft", "torch", "transformers"], |
| 146 | "mlx": ["mlx", "mlx_lm"], |
| 147 | "pytest": ["pytest"], |
| 148 | "semsim": ["sentence_transformers", "sklearn"], |
| 149 | "style": ["nlpaug", "spacy", "textstat"], |
| 150 | "viz": ["matplotlib", "plotly"], |
| 151 | } |
| 152 | # Value type is str-or-None on every module entry. |
| 153 | for bucket_name, bucket in extras.items(): |
| 154 | for mod_name, version in bucket.items(): |
| 155 | assert isinstance(mod_name, str), bucket_name |
| 156 | assert version is None or isinstance(version, str), (bucket_name, mod_name) |
| 157 | |
| 158 | |
| 159 | class TestListProbes: |
| 160 | """D6: ``sway list-probes`` prints the registered kinds.""" |
| 161 | |
| 162 | def test_emits_every_shipped_kind(self) -> None: |
| 163 | result = CliRunner().invoke(app, ["list-probes"]) |
| 164 | assert result.exit_code == 0 |
| 165 | for kind in ( |
| 166 | "delta_kl", |
| 167 | "adapter_revert", |
| 168 | "prompt_collapse", |
| 169 | "section_internalization", |
| 170 | "paraphrase_invariance", |
| 171 | "preference_flip", |
| 172 | "style_fingerprint", |
| 173 | "calibration_drift", |
| 174 | "leakage", |
| 175 | "adapter_ablation", |
| 176 | "null_adapter", |
| 177 | "external_perplexity", |
| 178 | "cluster_kl", |
| 179 | ): |
| 180 | assert kind in result.stdout |
| 181 | |
| 182 | def test_every_probe_has_a_summary_line(self) -> None: |
| 183 | """F03 regression — before the module-docstring fallback, half |
| 184 | the probe rows shipped with an empty summary column.""" |
| 185 | from dlm_sway.probes.base import registry |
| 186 | |
| 187 | result = CliRunner().invoke(app, ["list-probes"]) |
| 188 | assert result.exit_code == 0 |
| 189 | out = result.stdout |
| 190 | for kind in sorted(registry()): |
| 191 | # Find the row by its leading ``kind`` token. Rich wraps |
| 192 | # long summaries across lines, so match any non-empty |
| 193 | # continuation after the category column. |
| 194 | idx = out.find(kind) |
| 195 | assert idx != -1, f"{kind} missing from list-probes output" |
| 196 | row = out[idx : out.find("\n", idx)] |
| 197 | # Row format: "kind category summary..." |
| 198 | tokens = row.split() |
| 199 | # Past the 2nd column (category) there should be at least one |
| 200 | # summary token. Empty rows surfaced as len(tokens) == 2. |
| 201 | assert len(tokens) > 2, f"{kind} has an empty summary: {row!r}" |
| 202 | |
| 203 | |
| 204 | class TestReportFormatEnum: |
| 205 | """D11: unknown ``--format`` surfaces a clear error, not silent terminal.""" |
| 206 | |
| 207 | def test_unknown_format_rejected(self, tmp_path: Path) -> None: |
| 208 | result_path = tmp_path / "r.json" |
| 209 | result_path.write_text( |
| 210 | json.dumps( |
| 211 | { |
| 212 | "sway_version": "0", |
| 213 | "base_model_id": "b", |
| 214 | "adapter_id": "a", |
| 215 | "score": {"overall": 0.0, "band": "noise", "components": {}, "findings": []}, |
| 216 | "probes": [], |
| 217 | } |
| 218 | ), |
| 219 | encoding="utf-8", |
| 220 | ) |
| 221 | result = CliRunner().invoke(app, ["report", str(result_path), "--format", "csv"]) |
| 222 | assert result.exit_code != 0 |
| 223 | combined = (result.stdout or "") + (result.output or "") |
| 224 | assert "csv" in combined.lower() or "invalid" in combined.lower() |
| 225 | |
| 226 | |
| 227 | class TestCheckBaseInference: |
| 228 | """D4: ``sway check`` reads base_model_name_or_path from adapter_config.json.""" |
| 229 | |
| 230 | def test_reads_base_from_adapter_config(self, tmp_path: Path) -> None: |
| 231 | from dlm_sway.cli.commands import _infer_base_from_adapter_config |
| 232 | |
| 233 | adapter = tmp_path / "adapter" |
| 234 | adapter.mkdir() |
| 235 | (adapter / "adapter_config.json").write_text( |
| 236 | json.dumps({"base_model_name_or_path": "HuggingFaceTB/SmolLM2-135M-Instruct"}), |
| 237 | encoding="utf-8", |
| 238 | ) |
| 239 | assert _infer_base_from_adapter_config(adapter) == "HuggingFaceTB/SmolLM2-135M-Instruct" |
| 240 | |
| 241 | def test_returns_none_when_config_missing(self, tmp_path: Path) -> None: |
| 242 | from dlm_sway.cli.commands import _infer_base_from_adapter_config |
| 243 | |
| 244 | assert _infer_base_from_adapter_config(tmp_path) is None |
| 245 | |
| 246 | def test_returns_none_when_field_missing(self, tmp_path: Path) -> None: |
| 247 | from dlm_sway.cli.commands import _infer_base_from_adapter_config |
| 248 | |
| 249 | adapter = tmp_path / "adapter" |
| 250 | adapter.mkdir() |
| 251 | (adapter / "adapter_config.json").write_text(json.dumps({"rank": 8}), encoding="utf-8") |
| 252 | assert _infer_base_from_adapter_config(adapter) is None |
| 253 | |
| 254 | def test_returns_none_when_config_malformed(self, tmp_path: Path) -> None: |
| 255 | from dlm_sway.cli.commands import _infer_base_from_adapter_config |
| 256 | |
| 257 | adapter = tmp_path / "adapter" |
| 258 | adapter.mkdir() |
| 259 | (adapter / "adapter_config.json").write_text("{ not json", encoding="utf-8") |
| 260 | assert _infer_base_from_adapter_config(adapter) is None |
| 261 | |
| 262 | |
| 263 | class TestCheckBanner: |
| 264 | """D12: ``_check_banner`` maps z-score to the right verdict tier.""" |
| 265 | |
| 266 | def _suite_with_z(self, z_value: float | None) -> tuple: |
| 267 | from datetime import UTC, datetime |
| 268 | |
| 269 | from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict |
| 270 | |
| 271 | now = datetime.now(UTC) |
| 272 | probes = ( |
| 273 | ProbeResult( |
| 274 | name="dk", |
| 275 | kind="delta_kl", |
| 276 | verdict=Verdict.PASS if z_value and z_value >= 3 else Verdict.FAIL, |
| 277 | score=0.5, |
| 278 | z_score=z_value, |
| 279 | ), |
| 280 | ) |
| 281 | suite = SuiteResult( |
| 282 | spec_path="<t>", |
| 283 | started_at=now, |
| 284 | finished_at=now, |
| 285 | base_model_id="b", |
| 286 | adapter_id="a", |
| 287 | sway_version="0.0.0", |
| 288 | probes=probes, |
| 289 | ) |
| 290 | score = SwayScore( |
| 291 | overall=0.5, |
| 292 | components={"adherence": 0.5}, |
| 293 | band="partial", |
| 294 | ) |
| 295 | return suite, score |
| 296 | |
| 297 | def test_high_z_is_green(self) -> None: |
| 298 | from dlm_sway.cli.commands import _check_banner |
| 299 | |
| 300 | suite, score = self._suite_with_z(4.5) |
| 301 | text, style = _check_banner(score, suite) |
| 302 | assert "✅" in text |
| 303 | assert "above noise" in text |
| 304 | assert "green" in style |
| 305 | |
| 306 | def test_marginal_z_is_yellow(self) -> None: |
| 307 | from dlm_sway.cli.commands import _check_banner |
| 308 | |
| 309 | suite, score = self._suite_with_z(1.5) |
| 310 | text, style = _check_banner(score, suite) |
| 311 | assert "⚠️" in text |
| 312 | assert "yellow" in style |
| 313 | |
| 314 | def test_low_z_is_red(self) -> None: |
| 315 | from dlm_sway.cli.commands import _check_banner |
| 316 | |
| 317 | suite, score = self._suite_with_z(0.3) |
| 318 | text, style = _check_banner(score, suite) |
| 319 | assert "❌" in text |
| 320 | assert "red" in style |
| 321 | |
| 322 | def test_missing_z_falls_back_to_composite(self) -> None: |
| 323 | from dlm_sway.cli.commands import _check_banner |
| 324 | |
| 325 | suite, score = self._suite_with_z(None) |
| 326 | text, _style = _check_banner(score, suite) |
| 327 | # No "σ above noise" language when we don't have a z-score. |
| 328 | assert "σ" not in text |