sway Public

Watch 0 Fork 0 Star 0

Python · 5250 bytes Raw Blame History

  
        1
        """CLI tests for ``sway compare`` (S11)."""
      
        2
        
        3
        from __future__ import annotations
      
        4
        
        5
        import json
      
        6
        from pathlib import Path
      
        7
        
        8
        from typer.testing import CliRunner
      
        9
        
        10
        from dlm_sway.cli.app import app
      
        11
        
        12
        
        13
        def _write_run(path: Path, *, timestamp: str, score: float, probe_score: float) -> None:
      
        14
            """Write a minimal result JSON at ``path``.
      
        15
        
        16
            The payload only needs the fields ``report.from_json`` and ``compare``
      
        17
            actually read — probes (name + score), overall score, timestamp.
      
        18
            """
      
        19
            payload = {
      
        20
                "schema_version": 1,
      
        21
                "sway_version": "0.1.0.dev0",
      
        22
                "base_model_id": "base",
      
        23
                "adapter_id": "adp",
      
        24
                "started_at": timestamp,
      
        25
                "finished_at": timestamp,
      
        26
                "score": {
      
        27
                    "overall": score,
      
        28
                    "band": "healthy",
      
        29
                    "components": {},
      
        30
                    "findings": [],
      
        31
                },
      
        32
                "probes": [
      
        33
                    {
      
        34
                        "name": "dk",
      
        35
                        "kind": "delta_kl",
      
        36
                        "verdict": "pass",
      
        37
                        "score": probe_score,
      
        38
                        "message": f"dk score={probe_score}",
      
        39
                    },
      
        40
                ],
      
        41
            }
      
        42
            path.write_text(json.dumps(payload), encoding="utf-8")
      
        43
        
        44
        
        45
        class TestCompareCli:
      
        46
            def test_two_files_terminal_default(self, tmp_path: Path) -> None:
      
        47
                a = tmp_path / "a.json"
      
        48
                b = tmp_path / "b.json"
      
        49
                _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
      
        50
                _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.82, probe_score=0.82)
      
        51
                result = CliRunner().invoke(app, ["compare", str(a), str(b)])
      
        52
                assert result.exit_code == 0, result.stdout
      
        53
                assert "sway compare" in result.stdout
      
        54
                assert "dk" in result.stdout
      
        55
                assert "composite" in result.stdout
      
        56
        
        57
            def test_markdown_format(self, tmp_path: Path) -> None:
      
        58
                a = tmp_path / "a.json"
      
        59
                b = tmp_path / "b.json"
      
        60
                _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
      
        61
                _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.82, probe_score=0.82)
      
        62
                result = CliRunner().invoke(app, ["compare", str(a), str(b), "--format", "md"])
      
        63
                assert result.exit_code == 0, result.stdout
      
        64
                # Markdown header + table pipes.
      
        65
                assert "# sway compare" in result.stdout
      
        66
                assert "| probe |" in result.stdout
      
        67
        
        68
            def test_json_format(self, tmp_path: Path) -> None:
      
        69
                a = tmp_path / "a.json"
      
        70
                b = tmp_path / "b.json"
      
        71
                _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
      
        72
                _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.82, probe_score=0.82)
      
        73
                result = CliRunner().invoke(app, ["compare", str(a), str(b), "--format", "json"])
      
        74
                assert result.exit_code == 0, result.stdout
      
        75
                parsed = json.loads(result.stdout)
      
        76
                assert parsed["labels"] == ["a", "b"]
      
        77
                assert "dk" in parsed["scores"]
      
        78
        
        79
            def test_fewer_than_two_files_exits_2(self, tmp_path: Path) -> None:
      
        80
                a = tmp_path / "a.json"
      
        81
                _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
      
        82
                result = CliRunner().invoke(app, ["compare", str(a)])
      
        83
                assert result.exit_code == 2
      
        84
                assert "at least two" in result.stderr + result.stdout
      
        85
        
        86
            def test_unreadable_file_exits_2(self, tmp_path: Path) -> None:
      
        87
                a = tmp_path / "a.json"
      
        88
                b = tmp_path / "b.json"
      
        89
                _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
      
        90
                b.write_text("{ not valid json", encoding="utf-8")
      
        91
                result = CliRunner().invoke(app, ["compare", str(a), str(b)])
      
        92
                assert result.exit_code == 2
      
        93
        
        94
            def test_fail_on_regression_exits_0_when_improving(self, tmp_path: Path) -> None:
      
        95
                a = tmp_path / "a.json"
      
        96
                b = tmp_path / "b.json"
      
        97
                _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
      
        98
                _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.85, probe_score=0.85)
      
        99
                result = CliRunner().invoke(
      
        100
                    app, ["compare", str(a), str(b), "--fail-on-regression", "0.10"]
      
        101
                )
      
        102
                assert result.exit_code == 0, result.stdout
      
        103
        
        104
            def test_fail_on_regression_exits_1_when_probe_drops(self, tmp_path: Path) -> None:
      
        105
                a = tmp_path / "a.json"
      
        106
                b = tmp_path / "b.json"
      
        107
                # dk dropped 0.20 — above the 0.10 threshold.
      
        108
                _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
      
        109
                _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.60, probe_score=0.60)
      
        110
                result = CliRunner().invoke(
      
        111
                    app, ["compare", str(a), str(b), "--fail-on-regression", "0.10"]
      
        112
                )
      
        113
                assert result.exit_code == 1
      
        114
        
        115
            def test_fail_on_regression_zero_disables_gate(self, tmp_path: Path) -> None:
      
        116
                a = tmp_path / "a.json"
      
        117
                b = tmp_path / "b.json"
      
        118
                # Drop ≥0.10 but threshold=0 → no gate.
      
        119
                _write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
      
        120
                _write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.50, probe_score=0.50)
      
        121
                result = CliRunner().invoke(app, ["compare", str(a), str(b), "--fail-on-regression", "0"])
      
        122
                assert result.exit_code == 0, result.stdout

1	"""CLI tests for ``sway compare`` (S11)."""
2
3	from __future__ import annotations
4
5	import json
6	from pathlib import Path
7
8	from typer.testing import CliRunner
9
10	from dlm_sway.cli.app import app
11
12
13	def _write_run(path: Path, *, timestamp: str, score: float, probe_score: float) -> None:
14	"""Write a minimal result JSON at ``path``.
15
16	The payload only needs the fields ``report.from_json`` and ``compare``
17	actually read — probes (name + score), overall score, timestamp.
18	"""
19	payload = {
20	"schema_version": 1,
21	"sway_version": "0.1.0.dev0",
22	"base_model_id": "base",
23	"adapter_id": "adp",
24	"started_at": timestamp,
25	"finished_at": timestamp,
26	"score": {
27	"overall": score,
28	"band": "healthy",
29	"components": {},
30	"findings": [],
31	},
32	"probes": [
33	{
34	"name": "dk",
35	"kind": "delta_kl",
36	"verdict": "pass",
37	"score": probe_score,
38	"message": f"dk score={probe_score}",
39	},
40	],
41	}
42	path.write_text(json.dumps(payload), encoding="utf-8")
43
44
45	class TestCompareCli:
46	def test_two_files_terminal_default(self, tmp_path: Path) -> None:
47	a = tmp_path / "a.json"
48	b = tmp_path / "b.json"
49	_write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
50	_write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.82, probe_score=0.82)
51	result = CliRunner().invoke(app, ["compare", str(a), str(b)])
52	assert result.exit_code == 0, result.stdout
53	assert "sway compare" in result.stdout
54	assert "dk" in result.stdout
55	assert "composite" in result.stdout
56
57	def test_markdown_format(self, tmp_path: Path) -> None:
58	a = tmp_path / "a.json"
59	b = tmp_path / "b.json"
60	_write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
61	_write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.82, probe_score=0.82)
62	result = CliRunner().invoke(app, ["compare", str(a), str(b), "--format", "md"])
63	assert result.exit_code == 0, result.stdout
64	# Markdown header + table pipes.
65	assert "# sway compare" in result.stdout
66	assert "\| probe \|" in result.stdout
67
68	def test_json_format(self, tmp_path: Path) -> None:
69	a = tmp_path / "a.json"
70	b = tmp_path / "b.json"
71	_write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
72	_write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.82, probe_score=0.82)
73	result = CliRunner().invoke(app, ["compare", str(a), str(b), "--format", "json"])
74	assert result.exit_code == 0, result.stdout
75	parsed = json.loads(result.stdout)
76	assert parsed["labels"] == ["a", "b"]
77	assert "dk" in parsed["scores"]
78
79	def test_fewer_than_two_files_exits_2(self, tmp_path: Path) -> None:
80	a = tmp_path / "a.json"
81	_write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
82	result = CliRunner().invoke(app, ["compare", str(a)])
83	assert result.exit_code == 2
84	assert "at least two" in result.stderr + result.stdout
85
86	def test_unreadable_file_exits_2(self, tmp_path: Path) -> None:
87	a = tmp_path / "a.json"
88	b = tmp_path / "b.json"
89	_write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
90	b.write_text("{ not valid json", encoding="utf-8")
91	result = CliRunner().invoke(app, ["compare", str(a), str(b)])
92	assert result.exit_code == 2
93
94	def test_fail_on_regression_exits_0_when_improving(self, tmp_path: Path) -> None:
95	a = tmp_path / "a.json"
96	b = tmp_path / "b.json"
97	_write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
98	_write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.85, probe_score=0.85)
99	result = CliRunner().invoke(
100	app, ["compare", str(a), str(b), "--fail-on-regression", "0.10"]
101	)
102	assert result.exit_code == 0, result.stdout
103
104	def test_fail_on_regression_exits_1_when_probe_drops(self, tmp_path: Path) -> None:
105	a = tmp_path / "a.json"
106	b = tmp_path / "b.json"
107	# dk dropped 0.20 — above the 0.10 threshold.
108	_write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
109	_write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.60, probe_score=0.60)
110	result = CliRunner().invoke(
111	app, ["compare", str(a), str(b), "--fail-on-regression", "0.10"]
112	)
113	assert result.exit_code == 1
114
115	def test_fail_on_regression_zero_disables_gate(self, tmp_path: Path) -> None:
116	a = tmp_path / "a.json"
117	b = tmp_path / "b.json"
118	# Drop ≥0.10 but threshold=0 → no gate.
119	_write_run(a, timestamp="2026-01-01T12:00:00+00:00", score=0.80, probe_score=0.80)
120	_write_run(b, timestamp="2026-01-02T12:00:00+00:00", score=0.50, probe_score=0.50)
121	result = CliRunner().invoke(app, ["compare", str(a), str(b), "--fail-on-regression", "0"])
122	assert result.exit_code == 0, result.stdout