Python · 2495 bytes Raw Blame History
1 """Example — @pytest.mark.sway replaces a subprocess ``sway gate`` wrapper.
2
3 Install the plugin alongside the HF backend you use::
4
5 pip install 'dlm-sway[hf,pytest]'
6
7 Then run it like any other pytest file::
8
9 pytest examples/pytest_integration/test_sway_gate.py -v
10
11 Each sway probe lands as its own test item in the pytest report:
12 ``test_adapter_healthy::adherence``, ``test_adapter_healthy::calibration``,
13 ``test_adapter_healthy::__gate__``. Probe-level failures isolate; a
14 failing adherence probe doesn't mask a failing calibration one.
15
16 The single `threshold` kwarg adds a synthetic ``__gate__`` item that
17 fires only when the composite score drops below the given value —
18 one place to put the CI regression gate.
19 """
20
21 from __future__ import annotations
22
23 import pytest
24
25 # -------- the one-liner --------
26
27
28 @pytest.mark.sway(spec="sway.yaml", threshold=0.6)
29 def test_adapter_healthy() -> None:
30 """Sway-gated CI check. The decorator owns the body."""
31
32
33 # -------- what it replaces --------
34 #
35 # Before:
36 #
37 # import subprocess
38 #
39 # def test_adapter_healthy_legacy() -> None:
40 # result = subprocess.run(
41 # ["sway", "gate", "sway.yaml", "--threshold", "0.6"],
42 # capture_output=True, text=True, check=False,
43 # )
44 # assert result.returncode == 0, (
45 # f"sway gate failed:\nstdout:\n{result.stdout}\n"
46 # f"stderr:\n{result.stderr}"
47 # )
48 #
49 # Problems with the legacy shape:
50 #
51 # * Per-probe failures collapse into one big "sway gate failed" —
52 # users have to scrape stdout to know which probe regressed.
53 # * ``pytest -k adherence`` can't select just one probe.
54 # * No per-probe marker filtering, no JUnit-XML per probe, no
55 # integration with ``pytest-html`` / ``pytest --lf`` / any of
56 # pytest's ecosystem.
57 # * Slow-test markers have to be applied to the one wrapper — can't
58 # say "fast lane, skip the ablation probe but keep the others."
59 #
60 # After (with ``@pytest.mark.sway``):
61 #
62 # * Each probe is its own test item. ``pytest -k calibration`` runs
63 # just that probe.
64 # * FAIL / ERROR → pytest Failed; SKIP → pytest Skipped; WARN →
65 # pytest warning; so the whole pytest ecosystem reads verdicts
66 # correctly.
67 # * ``--junitxml`` produces one <testcase> per probe — CI dashboards
68 # can parse it with their existing pipeline.
69 # * Suite runs **once per decorated test**, cached across synthetic
70 # items (no N× model load tax from the expansion).