Python · 13462 bytes Raw Blame History
1 """Tests for :mod:`dlm_sway.pytest_plugin` via pytest's ``pytester`` fixture.
2
3 The canonical way to test a pytest plugin is to spawn a sub-session
4 using pytest's own ``pytester`` harness. We write a tiny spec +
5 test file into ``pytester``'s tmp rootdir, monkeypatch the suite
6 cache to return canned ``SuiteResult`` / ``SwayScore`` values, and
7 then assert the observed pytest outcomes match what the plugin's
8 verdict translation claims to do.
9 """
10
11 from __future__ import annotations
12
13 from datetime import UTC, datetime
14 from typing import Any
15
16 import pytest
17
18 from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict
19
20 pytest_plugins = ["pytester"]
21
22
23 # ----------------------------------------------------------------------
24 # Canned suite / score helpers
25 # ----------------------------------------------------------------------
26
27
28 def _suite_with(probes: list[ProbeResult]) -> SuiteResult:
29 t0 = datetime(2026, 1, 1, 12, 0, 0, tzinfo=UTC)
30 return SuiteResult(
31 spec_path="sway.yaml",
32 started_at=t0,
33 finished_at=t0,
34 base_model_id="test/base",
35 adapter_id="",
36 sway_version="0.0.0",
37 probes=tuple(probes),
38 )
39
40
41 def _score(overall: float) -> SwayScore:
42 return SwayScore(overall=overall, components={}, band=SwayScore.band_for(overall))
43
44
45 def _stub_cache(monkeypatch: pytest.MonkeyPatch, suite: SuiteResult, score: SwayScore) -> None:
46 """Replace ``_SuiteCache.get_or_run`` with a lambda that returns canned data."""
47 from dlm_sway.pytest_plugin import _SuiteCache
48
49 def _canned(
50 self: _SuiteCache, spec_path: Any, *, weights: Any = None
51 ) -> tuple[SuiteResult, SwayScore]:
52 del spec_path, weights
53 return (suite, score)
54
55 monkeypatch.setattr(_SuiteCache, "get_or_run", _canned)
56
57
58 # ----------------------------------------------------------------------
59 # Minimal spec + test file written into pytester's rootdir
60 # ----------------------------------------------------------------------
61
62
63 _MIN_SPEC = """\
64 version: 1
65 models:
66 base:
67 base: "test/base"
68 ft:
69 base: "test/base"
70 suite:
71 - name: "dk"
72 kind: "delta_kl"
73 prompts: ["p1", "p2"]
74 - name: "sis"
75 kind: "section_internalization"
76 """
77
78
79 def _write_spec(pytester: pytest.Pytester, content: str = _MIN_SPEC) -> None:
80 pytester.makefile(".yaml", sway=content)
81
82
83 # ----------------------------------------------------------------------
84 # Tests
85 # ----------------------------------------------------------------------
86
87
88 class TestMarkerRegistration:
89 def test_marker_shows_in_help(self, pytester: pytest.Pytester) -> None:
90 """``pytest --markers`` lists ``sway`` after the plugin loads."""
91 result = pytester.runpytest_inprocess("--markers")
92 assert result.ret == 0
93 assert any("sway(" in line for line in result.stdout.lines)
94
95
96 class TestExpansion:
97 def test_one_item_per_probe(
98 self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
99 ) -> None:
100 """@pytest.mark.sway expands a single function into N items."""
101 _write_spec(pytester)
102 pytester.makepyfile(
103 """
104 import pytest
105
106 @pytest.mark.sway(spec="sway.yaml")
107 def test_demo():
108 pass
109 """
110 )
111 suite = _suite_with(
112 [
113 ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9),
114 ProbeResult(
115 name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8
116 ),
117 ]
118 )
119 _stub_cache(monkeypatch, suite, _score(0.85))
120 result = pytester.runpytest_inprocess("-v")
121 result.assert_outcomes(passed=2)
122 # The synthetic item names carry the probe labels.
123 stdout = "\n".join(result.stdout.lines)
124 assert "test_demo::dk" in stdout
125 assert "test_demo::sis" in stdout
126
127 def test_fail_verdict_propagates(
128 self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
129 ) -> None:
130 _write_spec(pytester)
131 pytester.makepyfile(
132 """
133 import pytest
134
135 @pytest.mark.sway(spec="sway.yaml")
136 def test_demo():
137 pass
138 """
139 )
140 suite = _suite_with(
141 [
142 ProbeResult(
143 name="dk",
144 kind="delta_kl",
145 verdict=Verdict.FAIL,
146 score=0.2,
147 message="adapter didn't move the needle",
148 ),
149 ProbeResult(
150 name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.9
151 ),
152 ]
153 )
154 _stub_cache(monkeypatch, suite, _score(0.55))
155 result = pytester.runpytest_inprocess("-v")
156 result.assert_outcomes(passed=1, failed=1)
157 stdout = "\n".join(result.stdout.lines)
158 assert "test_demo::dk" in stdout # the failing one
159 assert "adapter didn't move the needle" in stdout
160
161 def test_skip_verdict_propagates(
162 self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
163 ) -> None:
164 _write_spec(pytester)
165 pytester.makepyfile(
166 """
167 import pytest
168
169 @pytest.mark.sway(spec="sway.yaml")
170 def test_demo(): ...
171 """
172 )
173 suite = _suite_with(
174 [
175 ProbeResult(
176 name="dk",
177 kind="delta_kl",
178 verdict=Verdict.SKIP,
179 score=None,
180 message="no calibration",
181 ),
182 ProbeResult(
183 name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.9
184 ),
185 ]
186 )
187 _stub_cache(monkeypatch, suite, _score(0.8))
188 result = pytester.runpytest_inprocess("-v")
189 result.assert_outcomes(passed=1, skipped=1)
190
191 def test_error_verdict_fails(
192 self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
193 ) -> None:
194 _write_spec(pytester)
195 pytester.makepyfile(
196 """
197 import pytest
198
199 @pytest.mark.sway(spec="sway.yaml")
200 def test_demo(): ...
201 """
202 )
203 suite = _suite_with(
204 [
205 ProbeResult(
206 name="dk",
207 kind="delta_kl",
208 verdict=Verdict.ERROR,
209 score=None,
210 message="non-finite raw",
211 ),
212 ProbeResult(
213 name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.9
214 ),
215 ]
216 )
217 _stub_cache(monkeypatch, suite, _score(0.5))
218 result = pytester.runpytest_inprocess("-v")
219 result.assert_outcomes(passed=1, failed=1)
220
221
222 class TestGate:
223 def test_threshold_below_fails_gate(
224 self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
225 ) -> None:
226 _write_spec(pytester)
227 pytester.makepyfile(
228 """
229 import pytest
230
231 @pytest.mark.sway(spec="sway.yaml", threshold=0.8)
232 def test_demo(): ...
233 """
234 )
235 suite = _suite_with(
236 [
237 ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.7),
238 ProbeResult(
239 name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.6
240 ),
241 ]
242 )
243 _stub_cache(monkeypatch, suite, _score(0.65)) # below 0.8 → gate fails
244 result = pytester.runpytest_inprocess("-v")
245 # Two PASS probes + one __gate__ fail = passed=2, failed=1.
246 result.assert_outcomes(passed=2, failed=1)
247 stdout = "\n".join(result.stdout.lines)
248 assert "__gate__" in stdout
249 assert "0.65" in stdout
250 assert "0.80" in stdout
251
252 def test_threshold_above_passes_gate(
253 self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
254 ) -> None:
255 _write_spec(pytester)
256 pytester.makepyfile(
257 """
258 import pytest
259
260 @pytest.mark.sway(spec="sway.yaml", threshold=0.5)
261 def test_demo(): ...
262 """
263 )
264 suite = _suite_with(
265 [
266 ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9),
267 ProbeResult(
268 name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8
269 ),
270 ]
271 )
272 _stub_cache(monkeypatch, suite, _score(0.85))
273 result = pytester.runpytest_inprocess("-v")
274 result.assert_outcomes(passed=3) # 2 probes + 1 __gate__
275
276 def test_threshold_zero_skips_gate_item(
277 self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
278 ) -> None:
279 """No threshold → no synthetic ``__gate__`` item at all."""
280 _write_spec(pytester)
281 pytester.makepyfile(
282 """
283 import pytest
284
285 @pytest.mark.sway(spec="sway.yaml")
286 def test_demo(): ...
287 """
288 )
289 suite = _suite_with(
290 [
291 ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9),
292 ProbeResult(
293 name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8
294 ),
295 ]
296 )
297 _stub_cache(monkeypatch, suite, _score(0.85))
298 result = pytester.runpytest_inprocess("-v")
299 result.assert_outcomes(passed=2)
300 stdout = "\n".join(result.stdout.lines)
301 assert "__gate__" not in stdout
302
303
304 class TestErrorPaths:
305 def test_missing_spec_kwarg(self, pytester: pytest.Pytester) -> None:
306 """No spec kwarg → config-error item fails with the hint."""
307 pytester.makepyfile(
308 """
309 import pytest
310
311 @pytest.mark.sway()
312 def test_demo(): ...
313 """
314 )
315 result = pytester.runpytest_inprocess("-v")
316 result.assert_outcomes(failed=1)
317 stdout = "\n".join(result.stdout.lines)
318 assert "requires a `spec`" in stdout
319
320 def test_nonexistent_spec_file(self, pytester: pytest.Pytester) -> None:
321 pytester.makepyfile(
322 """
323 import pytest
324
325 @pytest.mark.sway(spec="does_not_exist.yaml")
326 def test_demo(): ...
327 """
328 )
329 result = pytester.runpytest_inprocess("-v")
330 result.assert_outcomes(failed=1)
331
332 def test_bad_threshold(self, pytester: pytest.Pytester) -> None:
333 _write_spec(pytester)
334 pytester.makepyfile(
335 """
336 import pytest
337
338 @pytest.mark.sway(spec="sway.yaml", threshold="not-a-number")
339 def test_demo(): ...
340 """
341 )
342 result = pytester.runpytest_inprocess("-v")
343 result.assert_outcomes(failed=1)
344 stdout = "\n".join(result.stdout.lines)
345 assert "threshold" in stdout
346
347 def test_unexpected_kwarg(self, pytester: pytest.Pytester) -> None:
348 _write_spec(pytester)
349 pytester.makepyfile(
350 """
351 import pytest
352
353 @pytest.mark.sway(spec="sway.yaml", nonsense="x")
354 def test_demo(): ...
355 """
356 )
357 result = pytester.runpytest_inprocess("-v")
358 result.assert_outcomes(failed=1)
359 stdout = "\n".join(result.stdout.lines)
360 assert "unexpected arguments" in stdout
361
362
363 class TestSuiteReuse:
364 def test_cache_shared_across_decorated_tests(
365 self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
366 ) -> None:
367 """Two decorators against the same spec share one suite run."""
368 _write_spec(pytester)
369 pytester.makepyfile(
370 """
371 import pytest
372
373 @pytest.mark.sway(spec="sway.yaml")
374 def test_a(): ...
375
376 @pytest.mark.sway(spec="sway.yaml")
377 def test_b(): ...
378 """
379 )
380 call_count = {"n": 0}
381 suite = _suite_with(
382 [
383 ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9),
384 ProbeResult(
385 name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8
386 ),
387 ]
388 )
389 score = _score(0.85)
390
391 from dlm_sway.pytest_plugin import _SuiteCache
392
393 original = _SuiteCache.get_or_run
394
395 def _counted(self: _SuiteCache, *args: Any, **kwargs: Any) -> Any:
396 if not hasattr(self, "_was_called"):
397 call_count["n"] += 1
398 self._was_called = True # type: ignore[attr-defined]
399 self._cache[("x", ())] = (suite, score)
400 return (suite, score)
401
402 monkeypatch.setattr(_SuiteCache, "get_or_run", _counted)
403 result = pytester.runpytest_inprocess("-v")
404 result.assert_outcomes(passed=4) # 2 tests × 2 probes
405 # In a normal (non-stubbed) environment, call_count would be 1
406 # — our stub records whether the real path got invoked once per
407 # unique (spec, weights) pair. This test covers the assertion
408 # that the cache key is being shared correctly.
409 assert call_count["n"] <= 1
410 del original # keep ruff happy