| 1 | """pytest plugin — ``@pytest.mark.sway`` expands into per-probe items (S15 / F10). |
| 2 | |
| 3 | Install with ``pip install 'dlm-sway[pytest]'`` and the plugin |
| 4 | auto-loads via the ``pytest11`` entry point. Writing:: |
| 5 | |
| 6 | @pytest.mark.sway(spec="sway.yaml", threshold=0.6) |
| 7 | def test_adapter_healthy(): ... |
| 8 | |
| 9 | turns a single test function into **N + 1** pytest items: |
| 10 | |
| 11 | - one item per probe in ``sway.yaml``, named |
| 12 | ``test_adapter_healthy::<probe_name>``, outcome tied to the probe's |
| 13 | verdict (``FAIL``/``ERROR`` → pytest Failed; ``SKIP`` → pytest |
| 14 | Skipped; everything else passes), |
| 15 | - a single ``test_adapter_healthy::__gate__`` item that fails when |
| 16 | the composite score falls below ``threshold`` (only added when the |
| 17 | caller passes a positive ``threshold``). |
| 18 | |
| 19 | The suite runs **once per decorated function**; subsequent synthetic |
| 20 | items read from a per-session cache so the N-way expansion doesn't |
| 21 | multiply backend wall time. |
| 22 | |
| 23 | The body of the decorated function is intentionally ignored — the |
| 24 | decorator owns the test. A ``pass`` (or any non-raising body) is |
| 25 | conventional. This mirrors how ``@hypothesis.given(...)`` replaces |
| 26 | the function's behavior while pytest still discovers it through the |
| 27 | normal ``test_*`` name convention. |
| 28 | """ |
| 29 | |
| 30 | from __future__ import annotations |
| 31 | |
| 32 | from pathlib import Path |
| 33 | from typing import TYPE_CHECKING, Any |
| 34 | |
| 35 | import pytest |
| 36 | |
| 37 | # F19 — heavy imports are deferred to call sites so pytest's plugin |
| 38 | # discovery doesn't load ``dlm_sway.core.result`` (and everything below |
| 39 | # it: pydantic, numpy) for users who haven't invoked |
| 40 | # ``@pytest.mark.sway``. The plugin registers as ``pytest11`` on |
| 41 | # install; the tax should only be paid by tests that actually use it. |
| 42 | |
| 43 | if TYPE_CHECKING: |
| 44 | from _pytest.config import Config |
| 45 | from _pytest.nodes import Item |
| 46 | |
| 47 | from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore |
| 48 | |
| 49 | |
| 50 | # ---------------------------------------------------------------------- |
| 51 | # Session-scoped suite cache |
| 52 | # ---------------------------------------------------------------------- |
| 53 | |
| 54 | |
| 55 | _CACHE_ATTR: str = "_sway_suite_cache" |
| 56 | |
| 57 | |
| 58 | class _SuiteCache: |
| 59 | """Per-session cache of ``SuiteResult`` / ``SwayScore`` pairs. |
| 60 | |
| 61 | Keyed by ``(spec_path, sorted_weights_tuple)``. Two decorated |
| 62 | functions pointing at the same spec with the same weights share |
| 63 | one backend load + one suite run — the cache is the whole point |
| 64 | of the "one item per probe" expansion being cheap. |
| 65 | """ |
| 66 | |
| 67 | def __init__(self) -> None: |
| 68 | self._cache: dict[ |
| 69 | tuple[str, tuple[tuple[str, float], ...]], tuple[SuiteResult, SwayScore] |
| 70 | ] = {} |
| 71 | |
| 72 | def get_or_run( |
| 73 | self, |
| 74 | spec_path: Path, |
| 75 | *, |
| 76 | weights: dict[str, float] | None, |
| 77 | ) -> tuple[SuiteResult, SwayScore]: |
| 78 | key_weights = tuple(sorted((weights or {}).items())) |
| 79 | key = (str(spec_path.resolve()), key_weights) |
| 80 | if key not in self._cache: |
| 81 | # Deferred import: the CLI module pulls everything else; |
| 82 | # lighter plugin entry if we only import when firing. |
| 83 | from dlm_sway.cli.commands import _execute_spec |
| 84 | |
| 85 | self._cache[key] = _execute_spec(spec_path, weights_override=weights) |
| 86 | return self._cache[key] |
| 87 | |
| 88 | |
| 89 | # ---------------------------------------------------------------------- |
| 90 | # pytest hooks |
| 91 | # ---------------------------------------------------------------------- |
| 92 | |
| 93 | |
| 94 | def pytest_configure(config: Config) -> None: |
| 95 | """Register the ``sway`` marker and install the per-session cache.""" |
| 96 | config.addinivalue_line( |
| 97 | "markers", |
| 98 | "sway(spec, threshold=0.0, weights=None): " |
| 99 | "expand a pytest function into one item per sway probe in the " |
| 100 | "referenced spec. Required kwarg ``spec`` is a path-like to a " |
| 101 | "sway.yaml. Optional ``threshold`` adds a ``__gate__`` item " |
| 102 | "that fails when the composite score drops below that value. " |
| 103 | "Optional ``weights`` overrides the composite-score category " |
| 104 | "weights (same schema as ``sway run --weights``).", |
| 105 | ) |
| 106 | # One cache per session. Using ``setattr`` so plugin uninstall |
| 107 | # simply drops the attribute; no singleton-cleanup dance. |
| 108 | setattr(config, _CACHE_ATTR, _SuiteCache()) |
| 109 | |
| 110 | |
| 111 | def pytest_collection_modifyitems(config: Config, items: list[Item]) -> None: |
| 112 | """Replace each ``@pytest.mark.sway``-decorated item with per-probe items. |
| 113 | |
| 114 | Runs after standard collection — at this point ``items`` is the |
| 115 | list pytest is about to execute. We scan for items carrying the |
| 116 | ``sway`` marker and substitute them in place. |
| 117 | """ |
| 118 | cache: _SuiteCache = getattr(config, _CACHE_ATTR) |
| 119 | new_items: list[Item] = [] |
| 120 | for item in items: |
| 121 | mark = item.get_closest_marker("sway") if hasattr(item, "get_closest_marker") else None |
| 122 | if mark is None: |
| 123 | new_items.append(item) |
| 124 | continue |
| 125 | # Only operate on pytest Function items (vs. Class / Module). |
| 126 | # Anything else — we leave alone with a warning surfaced via |
| 127 | # the usual pytest WARN mechanism. |
| 128 | if not _is_function_item(item) or item.parent is None: |
| 129 | new_items.append(item) |
| 130 | continue |
| 131 | try: |
| 132 | spec_path, threshold, weights = _parse_mark(mark, rootpath=config.rootpath) |
| 133 | except _SwayMarkError as exc: |
| 134 | # Surface the configuration error as a single failed item |
| 135 | # so the user sees a green-field message in pytest's |
| 136 | # output instead of a cryptic collect error. |
| 137 | new_items.append( |
| 138 | _ConfigErrorItem.from_parent( |
| 139 | parent=item.parent, |
| 140 | name=item.name, |
| 141 | message=str(exc), |
| 142 | ) |
| 143 | ) |
| 144 | continue |
| 145 | expanded = _expand_to_probe_items( |
| 146 | parent_item=item, |
| 147 | spec_path=spec_path, |
| 148 | threshold=threshold, |
| 149 | weights=weights, |
| 150 | cache=cache, |
| 151 | ) |
| 152 | new_items.extend(expanded) |
| 153 | items[:] = new_items |
| 154 | |
| 155 | |
| 156 | # ---------------------------------------------------------------------- |
| 157 | # Mark parsing + item expansion |
| 158 | # ---------------------------------------------------------------------- |
| 159 | |
| 160 | |
| 161 | class _SwayMarkError(Exception): |
| 162 | """Raised during mark parsing when the user's arguments are bad.""" |
| 163 | |
| 164 | |
| 165 | def _parse_mark( |
| 166 | mark: pytest.Mark, |
| 167 | *, |
| 168 | rootpath: Path, |
| 169 | ) -> tuple[Path, float, dict[str, float] | None]: |
| 170 | """Pull ``(spec_path, threshold, weights)`` out of a ``@pytest.mark.sway(...)``.""" |
| 171 | # ``mark.args`` + ``mark.kwargs`` together give the call shape. |
| 172 | # We accept either ``@pytest.mark.sway("path.yaml")`` or |
| 173 | # ``@pytest.mark.sway(spec="path.yaml")``. |
| 174 | kwargs = dict(mark.kwargs) |
| 175 | args = list(mark.args) |
| 176 | |
| 177 | spec = kwargs.pop("spec", None) |
| 178 | if spec is None and args: |
| 179 | spec = args.pop(0) |
| 180 | if spec is None: |
| 181 | raise _SwayMarkError("@pytest.mark.sway requires a `spec` kwarg or a positional spec path") |
| 182 | spec_path = Path(spec) |
| 183 | if not spec_path.is_absolute(): |
| 184 | # Resolve against pytest's rootpath — the project root pytest |
| 185 | # discovers — not the process cwd. A user running ``pytest |
| 186 | # tests/`` from a subdir would otherwise see spec-relative |
| 187 | # paths resolved against the subdir, surprising. |
| 188 | spec_path = (rootpath / spec_path).resolve() |
| 189 | |
| 190 | threshold_raw = kwargs.pop("threshold", 0.0) |
| 191 | try: |
| 192 | threshold = float(threshold_raw) |
| 193 | except (TypeError, ValueError) as exc: |
| 194 | raise _SwayMarkError( |
| 195 | f"@pytest.mark.sway `threshold` must be a float; got {threshold_raw!r}" |
| 196 | ) from exc |
| 197 | |
| 198 | weights = kwargs.pop("weights", None) |
| 199 | if weights is not None: |
| 200 | if not isinstance(weights, dict): |
| 201 | raise _SwayMarkError( |
| 202 | f"@pytest.mark.sway `weights` must be a dict or None; got {type(weights).__name__}" |
| 203 | ) |
| 204 | try: |
| 205 | weights = {str(k): float(v) for k, v in weights.items()} |
| 206 | except (TypeError, ValueError) as exc: |
| 207 | raise _SwayMarkError(f"@pytest.mark.sway `weights` must map str→float ({exc})") from exc |
| 208 | |
| 209 | if args or kwargs: |
| 210 | # Unknown args — pytest marks silently drop them otherwise. |
| 211 | extra = list(args) + sorted(kwargs) |
| 212 | raise _SwayMarkError(f"@pytest.mark.sway got unexpected arguments: {extra}") |
| 213 | |
| 214 | return spec_path, threshold, weights |
| 215 | |
| 216 | |
| 217 | def _expand_to_probe_items( |
| 218 | *, |
| 219 | parent_item: Item, |
| 220 | spec_path: Path, |
| 221 | threshold: float, |
| 222 | weights: dict[str, float] | None, |
| 223 | cache: _SuiteCache, |
| 224 | ) -> list[Item]: |
| 225 | """Build one ``_SwayProbeItem`` per probe + an optional gate item. |
| 226 | |
| 227 | We don't run the suite here — suite execution is deferred to the |
| 228 | first item's ``runtest``. Collection stays fast; failures don't |
| 229 | appear until `pytest` actually runs the test. |
| 230 | """ |
| 231 | from dlm_sway.core.errors import SwayError |
| 232 | from dlm_sway.suite.loader import load_spec |
| 233 | |
| 234 | parent = parent_item.parent |
| 235 | assert parent is not None # narrowing: caller filtered None above |
| 236 | |
| 237 | try: |
| 238 | spec = load_spec(spec_path) |
| 239 | except SwayError as exc: |
| 240 | return [ |
| 241 | _ConfigErrorItem.from_parent( |
| 242 | parent=parent, |
| 243 | name=parent_item.name, |
| 244 | message=f"failed to load spec {spec_path}: {exc}", |
| 245 | ) |
| 246 | ] |
| 247 | |
| 248 | base_name = parent_item.name |
| 249 | out: list[Item] = [] |
| 250 | for probe_entry in spec.suite: |
| 251 | probe_name = str(probe_entry.get("name", probe_entry.get("kind", "?"))) |
| 252 | out.append( |
| 253 | _SwayProbeItem.from_parent( |
| 254 | parent=parent, |
| 255 | name=f"{base_name}::{probe_name}", |
| 256 | spec_path=spec_path, |
| 257 | weights=weights, |
| 258 | probe_name=probe_name, |
| 259 | cache=cache, |
| 260 | ) |
| 261 | ) |
| 262 | if threshold > 0.0: |
| 263 | out.append( |
| 264 | _SwayGateItem.from_parent( |
| 265 | parent=parent, |
| 266 | name=f"{base_name}::__gate__", |
| 267 | spec_path=spec_path, |
| 268 | weights=weights, |
| 269 | threshold=threshold, |
| 270 | cache=cache, |
| 271 | ) |
| 272 | ) |
| 273 | return out |
| 274 | |
| 275 | |
| 276 | # ---------------------------------------------------------------------- |
| 277 | # Item classes |
| 278 | # ---------------------------------------------------------------------- |
| 279 | |
| 280 | |
| 281 | def _is_function_item(item: Item) -> bool: |
| 282 | """Duck-check for ``pytest.Function`` without importing its private |
| 283 | module at top level (cheaper plugin init).""" |
| 284 | return item.__class__.__name__ == "Function" |
| 285 | |
| 286 | |
| 287 | class _SwayProbeItem(pytest.Item): |
| 288 | """One pytest item per sway probe. |
| 289 | |
| 290 | When pytest runs it, we ask the session cache for the suite's |
| 291 | result (running it on first demand), find the matching probe by |
| 292 | name, and translate its verdict to a pytest outcome. |
| 293 | """ |
| 294 | |
| 295 | def __init__( |
| 296 | self, |
| 297 | *, |
| 298 | name: str, |
| 299 | parent: Any, |
| 300 | spec_path: Path, |
| 301 | weights: dict[str, float] | None, |
| 302 | probe_name: str, |
| 303 | cache: _SuiteCache, |
| 304 | ) -> None: |
| 305 | super().__init__(name, parent) |
| 306 | self._spec_path = spec_path |
| 307 | self._weights = weights |
| 308 | self._probe_name = probe_name |
| 309 | self._cache = cache |
| 310 | |
| 311 | def runtest(self) -> None: # noqa: D401 |
| 312 | suite_result, _score = self._cache.get_or_run(self._spec_path, weights=self._weights) |
| 313 | probe = _find_probe(suite_result, self._probe_name) |
| 314 | if probe is None: |
| 315 | pytest.fail( |
| 316 | f"probe {self._probe_name!r} not in suite result — " |
| 317 | f"available: {sorted(p.name for p in suite_result.probes)}" |
| 318 | ) |
| 319 | _apply_verdict(probe) |
| 320 | |
| 321 | def repr_failure(self, excinfo: Any, style: str | None = None) -> str: |
| 322 | del style # we always render "short"; pytest's style kwarg ignored |
| 323 | return str(excinfo.getrepr(style="short")) |
| 324 | |
| 325 | def reportinfo(self) -> tuple[Any, int | None, str]: |
| 326 | return self.fspath, 0, self.name |
| 327 | |
| 328 | |
| 329 | class _SwayGateItem(pytest.Item): |
| 330 | """``__gate__`` item — fails when the composite score drops below ``threshold``.""" |
| 331 | |
| 332 | def __init__( |
| 333 | self, |
| 334 | *, |
| 335 | name: str, |
| 336 | parent: Any, |
| 337 | spec_path: Path, |
| 338 | weights: dict[str, float] | None, |
| 339 | threshold: float, |
| 340 | cache: _SuiteCache, |
| 341 | ) -> None: |
| 342 | super().__init__(name, parent) |
| 343 | self._spec_path = spec_path |
| 344 | self._weights = weights |
| 345 | self._threshold = threshold |
| 346 | self._cache = cache |
| 347 | |
| 348 | def runtest(self) -> None: |
| 349 | _suite, score = self._cache.get_or_run(self._spec_path, weights=self._weights) |
| 350 | if score.overall < self._threshold: |
| 351 | pytest.fail( |
| 352 | f"composite score {score.overall:.2f} below threshold {self._threshold:.2f} " |
| 353 | f"(band: {score.band or '—'})", |
| 354 | pytrace=False, |
| 355 | ) |
| 356 | |
| 357 | def repr_failure(self, excinfo: Any, style: str | None = None) -> str: |
| 358 | del style # we always render "short"; pytest's style kwarg ignored |
| 359 | return str(excinfo.getrepr(style="short")) |
| 360 | |
| 361 | def reportinfo(self) -> tuple[Any, int | None, str]: |
| 362 | return self.fspath, 0, self.name |
| 363 | |
| 364 | |
| 365 | class _ConfigErrorItem(pytest.Item): |
| 366 | """Synthetic item that simply fails with a configuration-error message. |
| 367 | |
| 368 | Used when ``@pytest.mark.sway`` itself is malformed — we want the |
| 369 | user to see a clean pytest failure line, not a cryptic collection |
| 370 | error, and we want ``pytest -k`` etc. to still find the test. |
| 371 | """ |
| 372 | |
| 373 | def __init__(self, *, name: str, parent: Any, message: str) -> None: |
| 374 | super().__init__(name, parent) |
| 375 | self._message = message |
| 376 | |
| 377 | def runtest(self) -> None: |
| 378 | pytest.fail(self._message, pytrace=False) |
| 379 | |
| 380 | def repr_failure(self, excinfo: Any, style: str | None = None) -> str: |
| 381 | del excinfo, style # config-error path surfaces only the canned message |
| 382 | return self._message |
| 383 | |
| 384 | def reportinfo(self) -> tuple[Any, int | None, str]: |
| 385 | return self.fspath, 0, self.name |
| 386 | |
| 387 | |
| 388 | # ---------------------------------------------------------------------- |
| 389 | # Verdict → pytest outcome translation |
| 390 | # ---------------------------------------------------------------------- |
| 391 | |
| 392 | |
| 393 | def _find_probe(suite: SuiteResult, name: str) -> ProbeResult | None: |
| 394 | for p in suite.probes: |
| 395 | if p.name == name: |
| 396 | return p |
| 397 | return None |
| 398 | |
| 399 | |
| 400 | def _apply_verdict(probe: ProbeResult) -> None: |
| 401 | """Translate a probe's :class:`Verdict` to a pytest outcome.""" |
| 402 | from dlm_sway.core.result import Verdict |
| 403 | |
| 404 | msg = probe.message or "" |
| 405 | if probe.verdict == Verdict.PASS: |
| 406 | return |
| 407 | if probe.verdict == Verdict.WARN: |
| 408 | # Surface the warning through pytest's own warning channel |
| 409 | # so ``pytest -W`` flags play along. Test still passes. |
| 410 | import warnings |
| 411 | |
| 412 | warnings.warn(f"sway WARN [{probe.kind}]: {msg}", stacklevel=2) |
| 413 | return |
| 414 | if probe.verdict == Verdict.SKIP: |
| 415 | pytest.skip(msg or f"probe {probe.name!r} skipped") |
| 416 | if probe.verdict == Verdict.FAIL: |
| 417 | pytest.fail(f"FAIL [{probe.kind}]: {msg}", pytrace=False) |
| 418 | if probe.verdict == Verdict.ERROR: |
| 419 | pytest.fail(f"ERROR [{probe.kind}]: {msg}", pytrace=False) |
| 420 | |
| 421 | |
| 422 | __all__ = ["pytest_collection_modifyitems", "pytest_configure"] |