tenseleyflow/sway / e67f59a

Browse files

tests/unit: sway compare CLI — formats, exit codes, --fail-on-regression gate

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
e67f59ac65b94214fbe924fbc0a58fc758f7d9ab
Parents
51870c6
Tree
c6d3084

1 changed file

StatusFile+-
A tests/unit/test_cli_compare.py 122 0
tests/unit/test_cli_compare.pyadded
@@ -0,0 +1,122 @@
1
+"""CLI tests for ``sway compare`` (S11)."""
2
+
3
+from __future__ import annotations
4
+
5
+import json
6
+from pathlib import Path
7
+
8
+from typer.testing import CliRunner
9
+
10
+from dlm_sway.cli.app import app
11
+
12
+
13
+def _write_run(path: Path, *, timestamp: str, score: float, probe_score: float) -> None:
14
+    """Write a minimal result JSON at ``path``.
15
+
16
+    The payload only needs the fields ``report.from_json`` and ``compare``
17
+    actually read — probes (name + score), overall score, timestamp.
18
+    """
19
+    payload = {
20
+        "schema_version": 1,
21
+        "sway_version": "0.1.0.dev0",
22
+        "base_model_id": "base",
23
+        "adapter_id": "adp",
24
+        "started_at": timestamp,
25
+        "finished_at": timestamp,
26
+        "score": {
27
+            "overall": score,
28
+            "band": "healthy",
29
+            "components": {},
30
+            "findings": [],
31
+        },
32
+        "probes": [
33
+            {
34
+                "name": "dk",
35
+                "kind": "delta_kl",
36
+                "verdict": "pass",
37
+                "score": probe_score,
38
+                "message": f"dk score={probe_score}",
39
+            },
40
+        ],
41
+    }
42
+    path.write_text(json.dumps(payload), encoding="utf-8")
43
+
44
+
45
+class TestCompareCli:
46
+    def test_two_files_terminal_default(self, tmp_path: Path) -> None:
47
+        a = tmp_path / "a.json"
48
+        b = tmp_path / "b.json"
49
+        _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
50
+        _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.82, probe_score=0.82)
51
+        result = CliRunner().invoke(app, ["compare", str(a), str(b)])
52
+        assert result.exit_code == 0, result.stdout
53
+        assert "sway compare" in result.stdout
54
+        assert "dk" in result.stdout
55
+        assert "composite" in result.stdout
56
+
57
+    def test_markdown_format(self, tmp_path: Path) -> None:
58
+        a = tmp_path / "a.json"
59
+        b = tmp_path / "b.json"
60
+        _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
61
+        _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.82, probe_score=0.82)
62
+        result = CliRunner().invoke(app, ["compare", str(a), str(b), "--format", "md"])
63
+        assert result.exit_code == 0, result.stdout
64
+        # Markdown header + table pipes.
65
+        assert "# sway compare" in result.stdout
66
+        assert "| probe |" in result.stdout
67
+
68
+    def test_json_format(self, tmp_path: Path) -> None:
69
+        a = tmp_path / "a.json"
70
+        b = tmp_path / "b.json"
71
+        _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
72
+        _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.82, probe_score=0.82)
73
+        result = CliRunner().invoke(app, ["compare", str(a), str(b), "--format", "json"])
74
+        assert result.exit_code == 0, result.stdout
75
+        parsed = json.loads(result.stdout)
76
+        assert parsed["labels"] == ["a", "b"]
77
+        assert "dk" in parsed["scores"]
78
+
79
+    def test_fewer_than_two_files_exits_2(self, tmp_path: Path) -> None:
80
+        a = tmp_path / "a.json"
81
+        _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
82
+        result = CliRunner().invoke(app, ["compare", str(a)])
83
+        assert result.exit_code == 2
84
+        assert "at least two" in result.stderr + result.stdout
85
+
86
+    def test_unreadable_file_exits_2(self, tmp_path: Path) -> None:
87
+        a = tmp_path / "a.json"
88
+        b = tmp_path / "b.json"
89
+        _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
90
+        b.write_text("{ not valid json", encoding="utf-8")
91
+        result = CliRunner().invoke(app, ["compare", str(a), str(b)])
92
+        assert result.exit_code == 2
93
+
94
+    def test_fail_on_regression_exits_0_when_improving(self, tmp_path: Path) -> None:
95
+        a = tmp_path / "a.json"
96
+        b = tmp_path / "b.json"
97
+        _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
98
+        _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.85, probe_score=0.85)
99
+        result = CliRunner().invoke(
100
+            app, ["compare", str(a), str(b), "--fail-on-regression", "0.10"]
101
+        )
102
+        assert result.exit_code == 0, result.stdout
103
+
104
+    def test_fail_on_regression_exits_1_when_probe_drops(self, tmp_path: Path) -> None:
105
+        a = tmp_path / "a.json"
106
+        b = tmp_path / "b.json"
107
+        # dk dropped 0.20 — above the 0.10 threshold.
108
+        _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
109
+        _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.60, probe_score=0.60)
110
+        result = CliRunner().invoke(
111
+            app, ["compare", str(a), str(b), "--fail-on-regression", "0.10"]
112
+        )
113
+        assert result.exit_code == 1
114
+
115
+    def test_fail_on_regression_zero_disables_gate(self, tmp_path: Path) -> None:
116
+        a = tmp_path / "a.json"
117
+        b = tmp_path / "b.json"
118
+        # Drop ≥0.10 but threshold=0 → no gate.
119
+        _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
120
+        _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.50, probe_score=0.50)
121
+        result = CliRunner().invoke(app, ["compare", str(a), str(b), "--fail-on-regression", "0"])
122
+        assert result.exit_code == 0, result.stdout