@@ -0,0 +1,410 @@ |
| 1 | +"""Tests for :mod:`dlm_sway.pytest_plugin` via pytest's ``pytester`` fixture. |
| 2 | + |
| 3 | +The canonical way to test a pytest plugin is to spawn a sub-session |
| 4 | +using pytest's own ``pytester`` harness. We write a tiny spec + |
| 5 | +test file into ``pytester``'s tmp rootdir, monkeypatch the suite |
| 6 | +cache to return canned ``SuiteResult`` / ``SwayScore`` values, and |
| 7 | +then assert the observed pytest outcomes match what the plugin's |
| 8 | +verdict translation claims to do. |
| 9 | +""" |
| 10 | + |
| 11 | +from __future__ import annotations |
| 12 | + |
| 13 | +from datetime import UTC, datetime |
| 14 | +from typing import Any |
| 15 | + |
| 16 | +import pytest |
| 17 | + |
| 18 | +from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict |
| 19 | + |
| 20 | +pytest_plugins = ["pytester"] |
| 21 | + |
| 22 | + |
| 23 | +# ---------------------------------------------------------------------- |
| 24 | +# Canned suite / score helpers |
| 25 | +# ---------------------------------------------------------------------- |
| 26 | + |
| 27 | + |
| 28 | +def _suite_with(probes: list[ProbeResult]) -> SuiteResult: |
| 29 | + t0 = datetime(2026, 1, 1, 12, 0, 0, tzinfo=UTC) |
| 30 | + return SuiteResult( |
| 31 | + spec_path="sway.yaml", |
| 32 | + started_at=t0, |
| 33 | + finished_at=t0, |
| 34 | + base_model_id="test/base", |
| 35 | + adapter_id="", |
| 36 | + sway_version="0.0.0", |
| 37 | + probes=tuple(probes), |
| 38 | + ) |
| 39 | + |
| 40 | + |
| 41 | +def _score(overall: float) -> SwayScore: |
| 42 | + return SwayScore(overall=overall, components={}, band=SwayScore.band_for(overall)) |
| 43 | + |
| 44 | + |
| 45 | +def _stub_cache(monkeypatch: pytest.MonkeyPatch, suite: SuiteResult, score: SwayScore) -> None: |
| 46 | + """Replace ``_SuiteCache.get_or_run`` with a lambda that returns canned data.""" |
| 47 | + from dlm_sway.pytest_plugin import _SuiteCache |
| 48 | + |
| 49 | + def _canned( |
| 50 | + self: _SuiteCache, spec_path: Any, *, weights: Any = None |
| 51 | + ) -> tuple[SuiteResult, SwayScore]: |
| 52 | + del spec_path, weights |
| 53 | + return (suite, score) |
| 54 | + |
| 55 | + monkeypatch.setattr(_SuiteCache, "get_or_run", _canned) |
| 56 | + |
| 57 | + |
| 58 | +# ---------------------------------------------------------------------- |
| 59 | +# Minimal spec + test file written into pytester's rootdir |
| 60 | +# ---------------------------------------------------------------------- |
| 61 | + |
| 62 | + |
| 63 | +_MIN_SPEC = """\ |
| 64 | +version: 1 |
| 65 | +models: |
| 66 | + base: |
| 67 | + base: "test/base" |
| 68 | + ft: |
| 69 | + base: "test/base" |
| 70 | +suite: |
| 71 | + - name: "dk" |
| 72 | + kind: "delta_kl" |
| 73 | + prompts: ["p1", "p2"] |
| 74 | + - name: "sis" |
| 75 | + kind: "section_internalization" |
| 76 | +""" |
| 77 | + |
| 78 | + |
| 79 | +def _write_spec(pytester: pytest.Pytester, content: str = _MIN_SPEC) -> None: |
| 80 | + pytester.makefile(".yaml", sway=content) |
| 81 | + |
| 82 | + |
| 83 | +# ---------------------------------------------------------------------- |
| 84 | +# Tests |
| 85 | +# ---------------------------------------------------------------------- |
| 86 | + |
| 87 | + |
| 88 | +class TestMarkerRegistration: |
| 89 | + def test_marker_shows_in_help(self, pytester: pytest.Pytester) -> None: |
| 90 | + """``pytest --markers`` lists ``sway`` after the plugin loads.""" |
| 91 | + result = pytester.runpytest_inprocess("--markers") |
| 92 | + assert result.ret == 0 |
| 93 | + assert any("sway(" in line for line in result.stdout.lines) |
| 94 | + |
| 95 | + |
| 96 | +class TestExpansion: |
| 97 | + def test_one_item_per_probe( |
| 98 | + self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch |
| 99 | + ) -> None: |
| 100 | + """@pytest.mark.sway expands a single function into N items.""" |
| 101 | + _write_spec(pytester) |
| 102 | + pytester.makepyfile( |
| 103 | + """ |
| 104 | + import pytest |
| 105 | + |
| 106 | + @pytest.mark.sway(spec="sway.yaml") |
| 107 | + def test_demo(): |
| 108 | + pass |
| 109 | + """ |
| 110 | + ) |
| 111 | + suite = _suite_with( |
| 112 | + [ |
| 113 | + ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9), |
| 114 | + ProbeResult( |
| 115 | + name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8 |
| 116 | + ), |
| 117 | + ] |
| 118 | + ) |
| 119 | + _stub_cache(monkeypatch, suite, _score(0.85)) |
| 120 | + result = pytester.runpytest_inprocess("-v") |
| 121 | + result.assert_outcomes(passed=2) |
| 122 | + # The synthetic item names carry the probe labels. |
| 123 | + stdout = "\n".join(result.stdout.lines) |
| 124 | + assert "test_demo::dk" in stdout |
| 125 | + assert "test_demo::sis" in stdout |
| 126 | + |
| 127 | + def test_fail_verdict_propagates( |
| 128 | + self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch |
| 129 | + ) -> None: |
| 130 | + _write_spec(pytester) |
| 131 | + pytester.makepyfile( |
| 132 | + """ |
| 133 | + import pytest |
| 134 | + |
| 135 | + @pytest.mark.sway(spec="sway.yaml") |
| 136 | + def test_demo(): |
| 137 | + pass |
| 138 | + """ |
| 139 | + ) |
| 140 | + suite = _suite_with( |
| 141 | + [ |
| 142 | + ProbeResult( |
| 143 | + name="dk", |
| 144 | + kind="delta_kl", |
| 145 | + verdict=Verdict.FAIL, |
| 146 | + score=0.2, |
| 147 | + message="adapter didn't move the needle", |
| 148 | + ), |
| 149 | + ProbeResult( |
| 150 | + name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.9 |
| 151 | + ), |
| 152 | + ] |
| 153 | + ) |
| 154 | + _stub_cache(monkeypatch, suite, _score(0.55)) |
| 155 | + result = pytester.runpytest_inprocess("-v") |
| 156 | + result.assert_outcomes(passed=1, failed=1) |
| 157 | + stdout = "\n".join(result.stdout.lines) |
| 158 | + assert "test_demo::dk" in stdout # the failing one |
| 159 | + assert "adapter didn't move the needle" in stdout |
| 160 | + |
| 161 | + def test_skip_verdict_propagates( |
| 162 | + self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch |
| 163 | + ) -> None: |
| 164 | + _write_spec(pytester) |
| 165 | + pytester.makepyfile( |
| 166 | + """ |
| 167 | + import pytest |
| 168 | + |
| 169 | + @pytest.mark.sway(spec="sway.yaml") |
| 170 | + def test_demo(): ... |
| 171 | + """ |
| 172 | + ) |
| 173 | + suite = _suite_with( |
| 174 | + [ |
| 175 | + ProbeResult( |
| 176 | + name="dk", |
| 177 | + kind="delta_kl", |
| 178 | + verdict=Verdict.SKIP, |
| 179 | + score=None, |
| 180 | + message="no calibration", |
| 181 | + ), |
| 182 | + ProbeResult( |
| 183 | + name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.9 |
| 184 | + ), |
| 185 | + ] |
| 186 | + ) |
| 187 | + _stub_cache(monkeypatch, suite, _score(0.8)) |
| 188 | + result = pytester.runpytest_inprocess("-v") |
| 189 | + result.assert_outcomes(passed=1, skipped=1) |
| 190 | + |
| 191 | + def test_error_verdict_fails( |
| 192 | + self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch |
| 193 | + ) -> None: |
| 194 | + _write_spec(pytester) |
| 195 | + pytester.makepyfile( |
| 196 | + """ |
| 197 | + import pytest |
| 198 | + |
| 199 | + @pytest.mark.sway(spec="sway.yaml") |
| 200 | + def test_demo(): ... |
| 201 | + """ |
| 202 | + ) |
| 203 | + suite = _suite_with( |
| 204 | + [ |
| 205 | + ProbeResult( |
| 206 | + name="dk", |
| 207 | + kind="delta_kl", |
| 208 | + verdict=Verdict.ERROR, |
| 209 | + score=None, |
| 210 | + message="non-finite raw", |
| 211 | + ), |
| 212 | + ProbeResult( |
| 213 | + name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.9 |
| 214 | + ), |
| 215 | + ] |
| 216 | + ) |
| 217 | + _stub_cache(monkeypatch, suite, _score(0.5)) |
| 218 | + result = pytester.runpytest_inprocess("-v") |
| 219 | + result.assert_outcomes(passed=1, failed=1) |
| 220 | + |
| 221 | + |
| 222 | +class TestGate: |
| 223 | + def test_threshold_below_fails_gate( |
| 224 | + self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch |
| 225 | + ) -> None: |
| 226 | + _write_spec(pytester) |
| 227 | + pytester.makepyfile( |
| 228 | + """ |
| 229 | + import pytest |
| 230 | + |
| 231 | + @pytest.mark.sway(spec="sway.yaml", threshold=0.8) |
| 232 | + def test_demo(): ... |
| 233 | + """ |
| 234 | + ) |
| 235 | + suite = _suite_with( |
| 236 | + [ |
| 237 | + ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.7), |
| 238 | + ProbeResult( |
| 239 | + name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.6 |
| 240 | + ), |
| 241 | + ] |
| 242 | + ) |
| 243 | + _stub_cache(monkeypatch, suite, _score(0.65)) # below 0.8 → gate fails |
| 244 | + result = pytester.runpytest_inprocess("-v") |
| 245 | + # Two PASS probes + one __gate__ fail = passed=2, failed=1. |
| 246 | + result.assert_outcomes(passed=2, failed=1) |
| 247 | + stdout = "\n".join(result.stdout.lines) |
| 248 | + assert "__gate__" in stdout |
| 249 | + assert "0.65" in stdout |
| 250 | + assert "0.80" in stdout |
| 251 | + |
| 252 | + def test_threshold_above_passes_gate( |
| 253 | + self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch |
| 254 | + ) -> None: |
| 255 | + _write_spec(pytester) |
| 256 | + pytester.makepyfile( |
| 257 | + """ |
| 258 | + import pytest |
| 259 | + |
| 260 | + @pytest.mark.sway(spec="sway.yaml", threshold=0.5) |
| 261 | + def test_demo(): ... |
| 262 | + """ |
| 263 | + ) |
| 264 | + suite = _suite_with( |
| 265 | + [ |
| 266 | + ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9), |
| 267 | + ProbeResult( |
| 268 | + name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8 |
| 269 | + ), |
| 270 | + ] |
| 271 | + ) |
| 272 | + _stub_cache(monkeypatch, suite, _score(0.85)) |
| 273 | + result = pytester.runpytest_inprocess("-v") |
| 274 | + result.assert_outcomes(passed=3) # 2 probes + 1 __gate__ |
| 275 | + |
| 276 | + def test_threshold_zero_skips_gate_item( |
| 277 | + self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch |
| 278 | + ) -> None: |
| 279 | + """No threshold → no synthetic ``__gate__`` item at all.""" |
| 280 | + _write_spec(pytester) |
| 281 | + pytester.makepyfile( |
| 282 | + """ |
| 283 | + import pytest |
| 284 | + |
| 285 | + @pytest.mark.sway(spec="sway.yaml") |
| 286 | + def test_demo(): ... |
| 287 | + """ |
| 288 | + ) |
| 289 | + suite = _suite_with( |
| 290 | + [ |
| 291 | + ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9), |
| 292 | + ProbeResult( |
| 293 | + name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8 |
| 294 | + ), |
| 295 | + ] |
| 296 | + ) |
| 297 | + _stub_cache(monkeypatch, suite, _score(0.85)) |
| 298 | + result = pytester.runpytest_inprocess("-v") |
| 299 | + result.assert_outcomes(passed=2) |
| 300 | + stdout = "\n".join(result.stdout.lines) |
| 301 | + assert "__gate__" not in stdout |
| 302 | + |
| 303 | + |
| 304 | +class TestErrorPaths: |
| 305 | + def test_missing_spec_kwarg(self, pytester: pytest.Pytester) -> None: |
| 306 | + """No spec kwarg → config-error item fails with the hint.""" |
| 307 | + pytester.makepyfile( |
| 308 | + """ |
| 309 | + import pytest |
| 310 | + |
| 311 | + @pytest.mark.sway() |
| 312 | + def test_demo(): ... |
| 313 | + """ |
| 314 | + ) |
| 315 | + result = pytester.runpytest_inprocess("-v") |
| 316 | + result.assert_outcomes(failed=1) |
| 317 | + stdout = "\n".join(result.stdout.lines) |
| 318 | + assert "requires a `spec`" in stdout |
| 319 | + |
| 320 | + def test_nonexistent_spec_file(self, pytester: pytest.Pytester) -> None: |
| 321 | + pytester.makepyfile( |
| 322 | + """ |
| 323 | + import pytest |
| 324 | + |
| 325 | + @pytest.mark.sway(spec="does_not_exist.yaml") |
| 326 | + def test_demo(): ... |
| 327 | + """ |
| 328 | + ) |
| 329 | + result = pytester.runpytest_inprocess("-v") |
| 330 | + result.assert_outcomes(failed=1) |
| 331 | + |
| 332 | + def test_bad_threshold(self, pytester: pytest.Pytester) -> None: |
| 333 | + _write_spec(pytester) |
| 334 | + pytester.makepyfile( |
| 335 | + """ |
| 336 | + import pytest |
| 337 | + |
| 338 | + @pytest.mark.sway(spec="sway.yaml", threshold="not-a-number") |
| 339 | + def test_demo(): ... |
| 340 | + """ |
| 341 | + ) |
| 342 | + result = pytester.runpytest_inprocess("-v") |
| 343 | + result.assert_outcomes(failed=1) |
| 344 | + stdout = "\n".join(result.stdout.lines) |
| 345 | + assert "threshold" in stdout |
| 346 | + |
| 347 | + def test_unexpected_kwarg(self, pytester: pytest.Pytester) -> None: |
| 348 | + _write_spec(pytester) |
| 349 | + pytester.makepyfile( |
| 350 | + """ |
| 351 | + import pytest |
| 352 | + |
| 353 | + @pytest.mark.sway(spec="sway.yaml", nonsense="x") |
| 354 | + def test_demo(): ... |
| 355 | + """ |
| 356 | + ) |
| 357 | + result = pytester.runpytest_inprocess("-v") |
| 358 | + result.assert_outcomes(failed=1) |
| 359 | + stdout = "\n".join(result.stdout.lines) |
| 360 | + assert "unexpected arguments" in stdout |
| 361 | + |
| 362 | + |
| 363 | +class TestSuiteReuse: |
| 364 | + def test_cache_shared_across_decorated_tests( |
| 365 | + self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch |
| 366 | + ) -> None: |
| 367 | + """Two decorators against the same spec share one suite run.""" |
| 368 | + _write_spec(pytester) |
| 369 | + pytester.makepyfile( |
| 370 | + """ |
| 371 | + import pytest |
| 372 | + |
| 373 | + @pytest.mark.sway(spec="sway.yaml") |
| 374 | + def test_a(): ... |
| 375 | + |
| 376 | + @pytest.mark.sway(spec="sway.yaml") |
| 377 | + def test_b(): ... |
| 378 | + """ |
| 379 | + ) |
| 380 | + call_count = {"n": 0} |
| 381 | + suite = _suite_with( |
| 382 | + [ |
| 383 | + ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9), |
| 384 | + ProbeResult( |
| 385 | + name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8 |
| 386 | + ), |
| 387 | + ] |
| 388 | + ) |
| 389 | + score = _score(0.85) |
| 390 | + |
| 391 | + from dlm_sway.pytest_plugin import _SuiteCache |
| 392 | + |
| 393 | + original = _SuiteCache.get_or_run |
| 394 | + |
| 395 | + def _counted(self: _SuiteCache, *args: Any, **kwargs: Any) -> Any: |
| 396 | + if not hasattr(self, "_was_called"): |
| 397 | + call_count["n"] += 1 |
| 398 | + self._was_called = True # type: ignore[attr-defined] |
| 399 | + self._cache[("x", ())] = (suite, score) |
| 400 | + return (suite, score) |
| 401 | + |
| 402 | + monkeypatch.setattr(_SuiteCache, "get_or_run", _counted) |
| 403 | + result = pytester.runpytest_inprocess("-v") |
| 404 | + result.assert_outcomes(passed=4) # 2 tests × 2 probes |
| 405 | + # In a normal (non-stubbed) environment, call_count would be 1 |
| 406 | + # — our stub records whether the real path got invoked once per |
| 407 | + # unique (spec, weights) pair. This test covers the assertion |
| 408 | + # that the cache key is being shared correctly. |
| 409 | + assert call_count["n"] <= 1 |
| 410 | + del original # keep ruff happy |