Python · 15154 bytes Raw Blame History
1 """pytest plugin — ``@pytest.mark.sway`` expands into per-probe items (S15 / F10).
2
3 Install with ``pip install 'dlm-sway[pytest]'`` and the plugin
4 auto-loads via the ``pytest11`` entry point. Writing::
5
6 @pytest.mark.sway(spec="sway.yaml", threshold=0.6)
7 def test_adapter_healthy(): ...
8
9 turns a single test function into **N + 1** pytest items:
10
11 - one item per probe in ``sway.yaml``, named
12 ``test_adapter_healthy::<probe_name>``, outcome tied to the probe's
13 verdict (``FAIL``/``ERROR`` → pytest Failed; ``SKIP`` → pytest
14 Skipped; everything else passes),
15 - a single ``test_adapter_healthy::__gate__`` item that fails when
16 the composite score falls below ``threshold`` (only added when the
17 caller passes a positive ``threshold``).
18
19 The suite runs **once per decorated function**; subsequent synthetic
20 items read from a per-session cache so the N-way expansion doesn't
21 multiply backend wall time.
22
23 The body of the decorated function is intentionally ignored — the
24 decorator owns the test. A ``pass`` (or any non-raising body) is
25 conventional. This mirrors how ``@hypothesis.given(...)`` replaces
26 the function's behavior while pytest still discovers it through the
27 normal ``test_*`` name convention.
28 """
29
30 from __future__ import annotations
31
32 from pathlib import Path
33 from typing import TYPE_CHECKING, Any
34
35 import pytest
36
37 # F19 — heavy imports are deferred to call sites so pytest's plugin
38 # discovery doesn't load ``dlm_sway.core.result`` (and everything below
39 # it: pydantic, numpy) for users who haven't invoked
40 # ``@pytest.mark.sway``. The plugin registers as ``pytest11`` on
41 # install; the tax should only be paid by tests that actually use it.
42
43 if TYPE_CHECKING:
44 from _pytest.config import Config
45 from _pytest.nodes import Item
46
47 from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore
48
49
50 # ----------------------------------------------------------------------
51 # Session-scoped suite cache
52 # ----------------------------------------------------------------------
53
54
55 _CACHE_ATTR: str = "_sway_suite_cache"
56
57
58 class _SuiteCache:
59 """Per-session cache of ``SuiteResult`` / ``SwayScore`` pairs.
60
61 Keyed by ``(spec_path, sorted_weights_tuple)``. Two decorated
62 functions pointing at the same spec with the same weights share
63 one backend load + one suite run — the cache is the whole point
64 of the "one item per probe" expansion being cheap.
65 """
66
67 def __init__(self) -> None:
68 self._cache: dict[
69 tuple[str, tuple[tuple[str, float], ...]], tuple[SuiteResult, SwayScore]
70 ] = {}
71
72 def get_or_run(
73 self,
74 spec_path: Path,
75 *,
76 weights: dict[str, float] | None,
77 ) -> tuple[SuiteResult, SwayScore]:
78 key_weights = tuple(sorted((weights or {}).items()))
79 key = (str(spec_path.resolve()), key_weights)
80 if key not in self._cache:
81 # Deferred import: the CLI module pulls everything else;
82 # lighter plugin entry if we only import when firing.
83 from dlm_sway.cli.commands import _execute_spec
84
85 self._cache[key] = _execute_spec(spec_path, weights_override=weights)
86 return self._cache[key]
87
88
89 # ----------------------------------------------------------------------
90 # pytest hooks
91 # ----------------------------------------------------------------------
92
93
94 def pytest_configure(config: Config) -> None:
95 """Register the ``sway`` marker and install the per-session cache."""
96 config.addinivalue_line(
97 "markers",
98 "sway(spec, threshold=0.0, weights=None): "
99 "expand a pytest function into one item per sway probe in the "
100 "referenced spec. Required kwarg ``spec`` is a path-like to a "
101 "sway.yaml. Optional ``threshold`` adds a ``__gate__`` item "
102 "that fails when the composite score drops below that value. "
103 "Optional ``weights`` overrides the composite-score category "
104 "weights (same schema as ``sway run --weights``).",
105 )
106 # One cache per session. Using ``setattr`` so plugin uninstall
107 # simply drops the attribute; no singleton-cleanup dance.
108 setattr(config, _CACHE_ATTR, _SuiteCache())
109
110
111 def pytest_collection_modifyitems(config: Config, items: list[Item]) -> None:
112 """Replace each ``@pytest.mark.sway``-decorated item with per-probe items.
113
114 Runs after standard collection — at this point ``items`` is the
115 list pytest is about to execute. We scan for items carrying the
116 ``sway`` marker and substitute them in place.
117 """
118 cache: _SuiteCache = getattr(config, _CACHE_ATTR)
119 new_items: list[Item] = []
120 for item in items:
121 mark = item.get_closest_marker("sway") if hasattr(item, "get_closest_marker") else None
122 if mark is None:
123 new_items.append(item)
124 continue
125 # Only operate on pytest Function items (vs. Class / Module).
126 # Anything else — we leave alone with a warning surfaced via
127 # the usual pytest WARN mechanism.
128 if not _is_function_item(item) or item.parent is None:
129 new_items.append(item)
130 continue
131 try:
132 spec_path, threshold, weights = _parse_mark(mark, rootpath=config.rootpath)
133 except _SwayMarkError as exc:
134 # Surface the configuration error as a single failed item
135 # so the user sees a green-field message in pytest's
136 # output instead of a cryptic collect error.
137 new_items.append(
138 _ConfigErrorItem.from_parent(
139 parent=item.parent,
140 name=item.name,
141 message=str(exc),
142 )
143 )
144 continue
145 expanded = _expand_to_probe_items(
146 parent_item=item,
147 spec_path=spec_path,
148 threshold=threshold,
149 weights=weights,
150 cache=cache,
151 )
152 new_items.extend(expanded)
153 items[:] = new_items
154
155
156 # ----------------------------------------------------------------------
157 # Mark parsing + item expansion
158 # ----------------------------------------------------------------------
159
160
161 class _SwayMarkError(Exception):
162 """Raised during mark parsing when the user's arguments are bad."""
163
164
165 def _parse_mark(
166 mark: pytest.Mark,
167 *,
168 rootpath: Path,
169 ) -> tuple[Path, float, dict[str, float] | None]:
170 """Pull ``(spec_path, threshold, weights)`` out of a ``@pytest.mark.sway(...)``."""
171 # ``mark.args`` + ``mark.kwargs`` together give the call shape.
172 # We accept either ``@pytest.mark.sway("path.yaml")`` or
173 # ``@pytest.mark.sway(spec="path.yaml")``.
174 kwargs = dict(mark.kwargs)
175 args = list(mark.args)
176
177 spec = kwargs.pop("spec", None)
178 if spec is None and args:
179 spec = args.pop(0)
180 if spec is None:
181 raise _SwayMarkError("@pytest.mark.sway requires a `spec` kwarg or a positional spec path")
182 spec_path = Path(spec)
183 if not spec_path.is_absolute():
184 # Resolve against pytest's rootpath — the project root pytest
185 # discovers — not the process cwd. A user running ``pytest
186 # tests/`` from a subdir would otherwise see spec-relative
187 # paths resolved against the subdir, surprising.
188 spec_path = (rootpath / spec_path).resolve()
189
190 threshold_raw = kwargs.pop("threshold", 0.0)
191 try:
192 threshold = float(threshold_raw)
193 except (TypeError, ValueError) as exc:
194 raise _SwayMarkError(
195 f"@pytest.mark.sway `threshold` must be a float; got {threshold_raw!r}"
196 ) from exc
197
198 weights = kwargs.pop("weights", None)
199 if weights is not None:
200 if not isinstance(weights, dict):
201 raise _SwayMarkError(
202 f"@pytest.mark.sway `weights` must be a dict or None; got {type(weights).__name__}"
203 )
204 try:
205 weights = {str(k): float(v) for k, v in weights.items()}
206 except (TypeError, ValueError) as exc:
207 raise _SwayMarkError(f"@pytest.mark.sway `weights` must map str→float ({exc})") from exc
208
209 if args or kwargs:
210 # Unknown args — pytest marks silently drop them otherwise.
211 extra = list(args) + sorted(kwargs)
212 raise _SwayMarkError(f"@pytest.mark.sway got unexpected arguments: {extra}")
213
214 return spec_path, threshold, weights
215
216
217 def _expand_to_probe_items(
218 *,
219 parent_item: Item,
220 spec_path: Path,
221 threshold: float,
222 weights: dict[str, float] | None,
223 cache: _SuiteCache,
224 ) -> list[Item]:
225 """Build one ``_SwayProbeItem`` per probe + an optional gate item.
226
227 We don't run the suite here — suite execution is deferred to the
228 first item's ``runtest``. Collection stays fast; failures don't
229 appear until `pytest` actually runs the test.
230 """
231 from dlm_sway.core.errors import SwayError
232 from dlm_sway.suite.loader import load_spec
233
234 parent = parent_item.parent
235 assert parent is not None # narrowing: caller filtered None above
236
237 try:
238 spec = load_spec(spec_path)
239 except SwayError as exc:
240 return [
241 _ConfigErrorItem.from_parent(
242 parent=parent,
243 name=parent_item.name,
244 message=f"failed to load spec {spec_path}: {exc}",
245 )
246 ]
247
248 base_name = parent_item.name
249 out: list[Item] = []
250 for probe_entry in spec.suite:
251 probe_name = str(probe_entry.get("name", probe_entry.get("kind", "?")))
252 out.append(
253 _SwayProbeItem.from_parent(
254 parent=parent,
255 name=f"{base_name}::{probe_name}",
256 spec_path=spec_path,
257 weights=weights,
258 probe_name=probe_name,
259 cache=cache,
260 )
261 )
262 if threshold > 0.0:
263 out.append(
264 _SwayGateItem.from_parent(
265 parent=parent,
266 name=f"{base_name}::__gate__",
267 spec_path=spec_path,
268 weights=weights,
269 threshold=threshold,
270 cache=cache,
271 )
272 )
273 return out
274
275
276 # ----------------------------------------------------------------------
277 # Item classes
278 # ----------------------------------------------------------------------
279
280
281 def _is_function_item(item: Item) -> bool:
282 """Duck-check for ``pytest.Function`` without importing its private
283 module at top level (cheaper plugin init)."""
284 return item.__class__.__name__ == "Function"
285
286
287 class _SwayProbeItem(pytest.Item):
288 """One pytest item per sway probe.
289
290 When pytest runs it, we ask the session cache for the suite's
291 result (running it on first demand), find the matching probe by
292 name, and translate its verdict to a pytest outcome.
293 """
294
295 def __init__(
296 self,
297 *,
298 name: str,
299 parent: Any,
300 spec_path: Path,
301 weights: dict[str, float] | None,
302 probe_name: str,
303 cache: _SuiteCache,
304 ) -> None:
305 super().__init__(name, parent)
306 self._spec_path = spec_path
307 self._weights = weights
308 self._probe_name = probe_name
309 self._cache = cache
310
311 def runtest(self) -> None: # noqa: D401
312 suite_result, _score = self._cache.get_or_run(self._spec_path, weights=self._weights)
313 probe = _find_probe(suite_result, self._probe_name)
314 if probe is None:
315 pytest.fail(
316 f"probe {self._probe_name!r} not in suite result — "
317 f"available: {sorted(p.name for p in suite_result.probes)}"
318 )
319 _apply_verdict(probe)
320
321 def repr_failure(self, excinfo: Any, style: str | None = None) -> str:
322 del style # we always render "short"; pytest's style kwarg ignored
323 return str(excinfo.getrepr(style="short"))
324
325 def reportinfo(self) -> tuple[Any, int | None, str]:
326 return self.fspath, 0, self.name
327
328
329 class _SwayGateItem(pytest.Item):
330 """``__gate__`` item — fails when the composite score drops below ``threshold``."""
331
332 def __init__(
333 self,
334 *,
335 name: str,
336 parent: Any,
337 spec_path: Path,
338 weights: dict[str, float] | None,
339 threshold: float,
340 cache: _SuiteCache,
341 ) -> None:
342 super().__init__(name, parent)
343 self._spec_path = spec_path
344 self._weights = weights
345 self._threshold = threshold
346 self._cache = cache
347
348 def runtest(self) -> None:
349 _suite, score = self._cache.get_or_run(self._spec_path, weights=self._weights)
350 if score.overall < self._threshold:
351 pytest.fail(
352 f"composite score {score.overall:.2f} below threshold {self._threshold:.2f} "
353 f"(band: {score.band or '—'})",
354 pytrace=False,
355 )
356
357 def repr_failure(self, excinfo: Any, style: str | None = None) -> str:
358 del style # we always render "short"; pytest's style kwarg ignored
359 return str(excinfo.getrepr(style="short"))
360
361 def reportinfo(self) -> tuple[Any, int | None, str]:
362 return self.fspath, 0, self.name
363
364
365 class _ConfigErrorItem(pytest.Item):
366 """Synthetic item that simply fails with a configuration-error message.
367
368 Used when ``@pytest.mark.sway`` itself is malformed — we want the
369 user to see a clean pytest failure line, not a cryptic collection
370 error, and we want ``pytest -k`` etc. to still find the test.
371 """
372
373 def __init__(self, *, name: str, parent: Any, message: str) -> None:
374 super().__init__(name, parent)
375 self._message = message
376
377 def runtest(self) -> None:
378 pytest.fail(self._message, pytrace=False)
379
380 def repr_failure(self, excinfo: Any, style: str | None = None) -> str:
381 del excinfo, style # config-error path surfaces only the canned message
382 return self._message
383
384 def reportinfo(self) -> tuple[Any, int | None, str]:
385 return self.fspath, 0, self.name
386
387
388 # ----------------------------------------------------------------------
389 # Verdict → pytest outcome translation
390 # ----------------------------------------------------------------------
391
392
393 def _find_probe(suite: SuiteResult, name: str) -> ProbeResult | None:
394 for p in suite.probes:
395 if p.name == name:
396 return p
397 return None
398
399
400 def _apply_verdict(probe: ProbeResult) -> None:
401 """Translate a probe's :class:`Verdict` to a pytest outcome."""
402 from dlm_sway.core.result import Verdict
403
404 msg = probe.message or ""
405 if probe.verdict == Verdict.PASS:
406 return
407 if probe.verdict == Verdict.WARN:
408 # Surface the warning through pytest's own warning channel
409 # so ``pytest -W`` flags play along. Test still passes.
410 import warnings
411
412 warnings.warn(f"sway WARN [{probe.kind}]: {msg}", stacklevel=2)
413 return
414 if probe.verdict == Verdict.SKIP:
415 pytest.skip(msg or f"probe {probe.name!r} skipped")
416 if probe.verdict == Verdict.FAIL:
417 pytest.fail(f"FAIL [{probe.kind}]: {msg}", pytrace=False)
418 if probe.verdict == Verdict.ERROR:
419 pytest.fail(f"ERROR [{probe.kind}]: {msg}", pytrace=False)
420
421
422 __all__ = ["pytest_collection_modifyitems", "pytest_configure"]