tenseleyflow/sway / 8b53df2

Browse files

examples/pytest_integration: before/after showing @pytest.mark.sway replaces subprocess wrapper

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
8b53df28db4db21b658d1190df713d75920b4e4d
Parents
07fbed3
Tree
bfaffb0

2 changed files

StatusFile+-
A examples/pytest_integration/sway.yaml 31 0
A examples/pytest_integration/test_sway_gate.py 70 0
examples/pytest_integration/sway.yamladded
@@ -0,0 +1,31 @@
1
+# Minimal sway.yaml for the @pytest.mark.sway example.
2
+# Swap `models` for your real base + adapter before running.
3
+version: 1
4
+
5
+models:
6
+  base:
7
+    kind: hf
8
+    base: "HuggingFaceTB/SmolLM2-135M-Instruct"
9
+  ft:
10
+    kind: hf
11
+    base: "HuggingFaceTB/SmolLM2-135M-Instruct"
12
+    adapter: "~/.dlm/store/<YOUR_DLM_ID>/adapter/versions/v0001"
13
+
14
+defaults:
15
+  seed: 0
16
+
17
+suite:
18
+  - name: "null"
19
+    kind: "null_adapter"
20
+    runs: 3
21
+  - name: "adherence"
22
+    kind: "delta_kl"
23
+    prompts:
24
+      - "The capital of France is"
25
+      - "Two plus two equals"
26
+      - "Once upon a time"
27
+      - "The sky is"
28
+    assert_z_gte: 3.0
29
+  - name: "calibration"
30
+    kind: "calibration_drift"
31
+    items_limit: 50
examples/pytest_integration/test_sway_gate.pyadded
@@ -0,0 +1,70 @@
1
+"""Example — @pytest.mark.sway replaces a subprocess ``sway gate`` wrapper.
2
+
3
+Install the plugin alongside the HF backend you use::
4
+
5
+    pip install 'dlm-sway[hf,pytest]'
6
+
7
+Then run it like any other pytest file::
8
+
9
+    pytest examples/pytest_integration/test_sway_gate.py -v
10
+
11
+Each sway probe lands as its own test item in the pytest report:
12
+``test_adapter_healthy::adherence``, ``test_adapter_healthy::calibration``,
13
+``test_adapter_healthy::__gate__``. Probe-level failures isolate; a
14
+failing adherence probe doesn't mask a failing calibration one.
15
+
16
+The single `threshold` kwarg adds a synthetic ``__gate__`` item that
17
+fires only when the composite score drops below the given value —
18
+one place to put the CI regression gate.
19
+"""
20
+
21
+from __future__ import annotations
22
+
23
+import pytest
24
+
25
+
26
+# -------- the one-liner --------
27
+
28
+@pytest.mark.sway(spec="sway.yaml", threshold=0.6)
29
+def test_adapter_healthy() -> None:
30
+    """Sway-gated CI check. The decorator owns the body."""
31
+
32
+
33
+# -------- what it replaces --------
34
+#
35
+# Before:
36
+#
37
+#     import subprocess
38
+#
39
+#     def test_adapter_healthy_legacy() -> None:
40
+#         result = subprocess.run(
41
+#             ["sway", "gate", "sway.yaml", "--threshold", "0.6"],
42
+#             capture_output=True, text=True, check=False,
43
+#         )
44
+#         assert result.returncode == 0, (
45
+#             f"sway gate failed:\nstdout:\n{result.stdout}\n"
46
+#             f"stderr:\n{result.stderr}"
47
+#         )
48
+#
49
+# Problems with the legacy shape:
50
+#
51
+#   * Per-probe failures collapse into one big "sway gate failed" —
52
+#     users have to scrape stdout to know which probe regressed.
53
+#   * ``pytest -k adherence`` can't select just one probe.
54
+#   * No per-probe marker filtering, no JUnit-XML per probe, no
55
+#     integration with ``pytest-html`` / ``pytest --lf`` / any of
56
+#     pytest's ecosystem.
57
+#   * Slow-test markers have to be applied to the one wrapper — can't
58
+#     say "fast lane, skip the ablation probe but keep the others."
59
+#
60
+# After (with ``@pytest.mark.sway``):
61
+#
62
+#   * Each probe is its own test item. ``pytest -k calibration`` runs
63
+#     just that probe.
64
+#   * FAIL / ERROR → pytest Failed; SKIP → pytest Skipped; WARN →
65
+#     pytest warning; so the whole pytest ecosystem reads verdicts
66
+#     correctly.
67
+#   * ``--junitxml`` produces one <testcase> per probe — CI dashboards
68
+#     can parse it with their existing pipeline.
69
+#   * Suite runs **once per decorated test**, cached across synthetic
70
+#     items (no N× model load tax from the expansion).