sway Public

Watch 0 Fork 0 Star 0

Python · 15154 bytes Raw Blame History

  
        1
        """pytest plugin — ``@pytest.mark.sway`` expands into per-probe items (S15 / F10).
      
        2
        
        3
        Install with ``pip install 'dlm-sway[pytest]'`` and the plugin
      
        4
        auto-loads via the ``pytest11`` entry point. Writing::
      
        5
        
        6
            @pytest.mark.sway(spec="sway.yaml", threshold=0.6)
      
        7
            def test_adapter_healthy(): ...
      
        8
        
        9
        turns a single test function into **N + 1** pytest items:
      
        10
        
        11
        - one item per probe in ``sway.yaml``, named
      
        12
          ``test_adapter_healthy::<probe_name>``, outcome tied to the probe's
      
        13
          verdict (``FAIL``/``ERROR`` → pytest Failed; ``SKIP`` → pytest
      
        14
          Skipped; everything else passes),
      
        15
        - a single ``test_adapter_healthy::__gate__`` item that fails when
      
        16
          the composite score falls below ``threshold`` (only added when the
      
        17
          caller passes a positive ``threshold``).
      
        18
        
        19
        The suite runs **once per decorated function**; subsequent synthetic
      
        20
        items read from a per-session cache so the N-way expansion doesn't
      
        21
        multiply backend wall time.
      
        22
        
        23
        The body of the decorated function is intentionally ignored — the
      
        24
        decorator owns the test. A ``pass`` (or any non-raising body) is
      
        25
        conventional. This mirrors how ``@hypothesis.given(...)`` replaces
      
        26
        the function's behavior while pytest still discovers it through the
      
        27
        normal ``test_*`` name convention.
      
        28
        """
      
        29
        
        30
        from __future__ import annotations
      
        31
        
        32
        from pathlib import Path
      
        33
        from typing import TYPE_CHECKING, Any
      
        34
        
        35
        import pytest
      
        36
        
        37
        # F19 — heavy imports are deferred to call sites so pytest's plugin
      
        38
        # discovery doesn't load ``dlm_sway.core.result`` (and everything below
      
        39
        # it: pydantic, numpy) for users who haven't invoked
      
        40
        # ``@pytest.mark.sway``. The plugin registers as ``pytest11`` on
      
        41
        # install; the tax should only be paid by tests that actually use it.
      
        42
        
        43
        if TYPE_CHECKING:
      
        44
            from _pytest.config import Config
      
        45
            from _pytest.nodes import Item
      
        46
        
        47
            from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore
      
        48
        
        49
        
        50
        # ----------------------------------------------------------------------
      
        51
        # Session-scoped suite cache
      
        52
        # ----------------------------------------------------------------------
      
        53
        
        54
        
        55
        _CACHE_ATTR: str = "_sway_suite_cache"
      
        56
        
        57
        
        58
        class _SuiteCache:
      
        59
            """Per-session cache of ``SuiteResult`` / ``SwayScore`` pairs.
      
        60
        
        61
            Keyed by ``(spec_path, sorted_weights_tuple)``. Two decorated
      
        62
            functions pointing at the same spec with the same weights share
      
        63
            one backend load + one suite run — the cache is the whole point
      
        64
            of the "one item per probe" expansion being cheap.
      
        65
            """
      
        66
        
        67
            def __init__(self) -> None:
      
        68
                self._cache: dict[
      
        69
                    tuple[str, tuple[tuple[str, float], ...]], tuple[SuiteResult, SwayScore]
      
        70
                ] = {}
      
        71
        
        72
            def get_or_run(
      
        73
                self,
      
        74
                spec_path: Path,
      
        75
                *,
      
        76
                weights: dict[str, float] | None,
      
        77
            ) -> tuple[SuiteResult, SwayScore]:
      
        78
                key_weights = tuple(sorted((weights or {}).items()))
      
        79
                key = (str(spec_path.resolve()), key_weights)
      
        80
                if key not in self._cache:
      
        81
                    # Deferred import: the CLI module pulls everything else;
      
        82
                    # lighter plugin entry if we only import when firing.
      
        83
                    from dlm_sway.cli.commands import _execute_spec
      
        84
        
        85
                    self._cache[key] = _execute_spec(spec_path, weights_override=weights)
      
        86
                return self._cache[key]
      
        87
        
        88
        
        89
        # ----------------------------------------------------------------------
      
        90
        # pytest hooks
      
        91
        # ----------------------------------------------------------------------
      
        92
        
        93
        
        94
        def pytest_configure(config: Config) -> None:
      
        95
            """Register the ``sway`` marker and install the per-session cache."""
      
        96
            config.addinivalue_line(
      
        97
                "markers",
      
        98
                "sway(spec, threshold=0.0, weights=None): "
      
        99
                "expand a pytest function into one item per sway probe in the "
      
        100
                "referenced spec. Required kwarg ``spec`` is a path-like to a "
      
        101
                "sway.yaml. Optional ``threshold`` adds a ``__gate__`` item "
      
        102
                "that fails when the composite score drops below that value. "
      
        103
                "Optional ``weights`` overrides the composite-score category "
      
        104
                "weights (same schema as ``sway run --weights``).",
      
        105
            )
      
        106
            # One cache per session. Using ``setattr`` so plugin uninstall
      
        107
            # simply drops the attribute; no singleton-cleanup dance.
      
        108
            setattr(config, _CACHE_ATTR, _SuiteCache())
      
        109
        
        110
        
        111
        def pytest_collection_modifyitems(config: Config, items: list[Item]) -> None:
      
        112
            """Replace each ``@pytest.mark.sway``-decorated item with per-probe items.
      
        113
        
        114
            Runs after standard collection — at this point ``items`` is the
      
        115
            list pytest is about to execute. We scan for items carrying the
      
        116
            ``sway`` marker and substitute them in place.
      
        117
            """
      
        118
            cache: _SuiteCache = getattr(config, _CACHE_ATTR)
      
        119
            new_items: list[Item] = []
      
        120
            for item in items:
      
        121
                mark = item.get_closest_marker("sway") if hasattr(item, "get_closest_marker") else None
      
        122
                if mark is None:
      
        123
                    new_items.append(item)
      
        124
                    continue
      
        125
                # Only operate on pytest Function items (vs. Class / Module).
      
        126
                # Anything else — we leave alone with a warning surfaced via
      
        127
                # the usual pytest WARN mechanism.
      
        128
                if not _is_function_item(item) or item.parent is None:
      
        129
                    new_items.append(item)
      
        130
                    continue
      
        131
                try:
      
        132
                    spec_path, threshold, weights = _parse_mark(mark, rootpath=config.rootpath)
      
        133
                except _SwayMarkError as exc:
      
        134
                    # Surface the configuration error as a single failed item
      
        135
                    # so the user sees a green-field message in pytest's
      
        136
                    # output instead of a cryptic collect error.
      
        137
                    new_items.append(
      
        138
                        _ConfigErrorItem.from_parent(
      
        139
                            parent=item.parent,
      
        140
                            name=item.name,
      
        141
                            message=str(exc),
      
        142
                        )
      
        143
                    )
      
        144
                    continue
      
        145
                expanded = _expand_to_probe_items(
      
        146
                    parent_item=item,
      
        147
                    spec_path=spec_path,
      
        148
                    threshold=threshold,
      
        149
                    weights=weights,
      
        150
                    cache=cache,
      
        151
                )
      
        152
                new_items.extend(expanded)
      
        153
            items[:] = new_items
      
        154
        
        155
        
        156
        # ----------------------------------------------------------------------
      
        157
        # Mark parsing + item expansion
      
        158
        # ----------------------------------------------------------------------
      
        159
        
        160
        
        161
        class _SwayMarkError(Exception):
      
        162
            """Raised during mark parsing when the user's arguments are bad."""
      
        163
        
        164
        
        165
        def _parse_mark(
      
        166
            mark: pytest.Mark,
      
        167
            *,
      
        168
            rootpath: Path,
      
        169
        ) -> tuple[Path, float, dict[str, float] | None]:
      
        170
            """Pull ``(spec_path, threshold, weights)`` out of a ``@pytest.mark.sway(...)``."""
      
        171
            # ``mark.args`` + ``mark.kwargs`` together give the call shape.
      
        172
            # We accept either ``@pytest.mark.sway("path.yaml")`` or
      
        173
            # ``@pytest.mark.sway(spec="path.yaml")``.
      
        174
            kwargs = dict(mark.kwargs)
      
        175
            args = list(mark.args)
      
        176
        
        177
            spec = kwargs.pop("spec", None)
      
        178
            if spec is None and args:
      
        179
                spec = args.pop(0)
      
        180
            if spec is None:
      
        181
                raise _SwayMarkError("@pytest.mark.sway requires a `spec` kwarg or a positional spec path")
      
        182
            spec_path = Path(spec)
      
        183
            if not spec_path.is_absolute():
      
        184
                # Resolve against pytest's rootpath — the project root pytest
      
        185
                # discovers — not the process cwd. A user running ``pytest
      
        186
                # tests/`` from a subdir would otherwise see spec-relative
      
        187
                # paths resolved against the subdir, surprising.
      
        188
                spec_path = (rootpath / spec_path).resolve()
      
        189
        
        190
            threshold_raw = kwargs.pop("threshold", 0.0)
      
        191
            try:
      
        192
                threshold = float(threshold_raw)
      
        193
            except (TypeError, ValueError) as exc:
      
        194
                raise _SwayMarkError(
      
        195
                    f"@pytest.mark.sway `threshold` must be a float; got {threshold_raw!r}"
      
        196
                ) from exc
      
        197
        
        198
            weights = kwargs.pop("weights", None)
      
        199
            if weights is not None:
      
        200
                if not isinstance(weights, dict):
      
        201
                    raise _SwayMarkError(
      
        202
                        f"@pytest.mark.sway `weights` must be a dict or None; got {type(weights).__name__}"
      
        203
                    )
      
        204
                try:
      
        205
                    weights = {str(k): float(v) for k, v in weights.items()}
      
        206
                except (TypeError, ValueError) as exc:
      
        207
                    raise _SwayMarkError(f"@pytest.mark.sway `weights` must map str→float ({exc})") from exc
      
        208
        
        209
            if args or kwargs:
      
        210
                # Unknown args — pytest marks silently drop them otherwise.
      
        211
                extra = list(args) + sorted(kwargs)
      
        212
                raise _SwayMarkError(f"@pytest.mark.sway got unexpected arguments: {extra}")
      
        213
        
        214
            return spec_path, threshold, weights
      
        215
        
        216
        
        217
        def _expand_to_probe_items(
      
        218
            *,
      
        219
            parent_item: Item,
      
        220
            spec_path: Path,
      
        221
            threshold: float,
      
        222
            weights: dict[str, float] | None,
      
        223
            cache: _SuiteCache,
      
        224
        ) -> list[Item]:
      
        225
            """Build one ``_SwayProbeItem`` per probe + an optional gate item.
      
        226
        
        227
            We don't run the suite here — suite execution is deferred to the
      
        228
            first item's ``runtest``. Collection stays fast; failures don't
      
        229
            appear until `pytest` actually runs the test.
      
        230
            """
      
        231
            from dlm_sway.core.errors import SwayError
      
        232
            from dlm_sway.suite.loader import load_spec
      
        233
        
        234
            parent = parent_item.parent
      
        235
            assert parent is not None  # narrowing: caller filtered None above
      
        236
        
        237
            try:
      
        238
                spec = load_spec(spec_path)
      
        239
            except SwayError as exc:
      
        240
                return [
      
        241
                    _ConfigErrorItem.from_parent(
      
        242
                        parent=parent,
      
        243
                        name=parent_item.name,
      
        244
                        message=f"failed to load spec {spec_path}: {exc}",
      
        245
                    )
      
        246
                ]
      
        247
        
        248
            base_name = parent_item.name
      
        249
            out: list[Item] = []
      
        250
            for probe_entry in spec.suite:
      
        251
                probe_name = str(probe_entry.get("name", probe_entry.get("kind", "?")))
      
        252
                out.append(
      
        253
                    _SwayProbeItem.from_parent(
      
        254
                        parent=parent,
      
        255
                        name=f"{base_name}::{probe_name}",
      
        256
                        spec_path=spec_path,
      
        257
                        weights=weights,
      
        258
                        probe_name=probe_name,
      
        259
                        cache=cache,
      
        260
                    )
      
        261
                )
      
        262
            if threshold > 0.0:
      
        263
                out.append(
      
        264
                    _SwayGateItem.from_parent(
      
        265
                        parent=parent,
      
        266
                        name=f"{base_name}::__gate__",
      
        267
                        spec_path=spec_path,
      
        268
                        weights=weights,
      
        269
                        threshold=threshold,
      
        270
                        cache=cache,
      
        271
                    )
      
        272
                )
      
        273
            return out
      
        274
        
        275
        
        276
        # ----------------------------------------------------------------------
      
        277
        # Item classes
      
        278
        # ----------------------------------------------------------------------
      
        279
        
        280
        
        281
        def _is_function_item(item: Item) -> bool:
      
        282
            """Duck-check for ``pytest.Function`` without importing its private
      
        283
            module at top level (cheaper plugin init)."""
      
        284
            return item.__class__.__name__ == "Function"
      
        285
        
        286
        
        287
        class _SwayProbeItem(pytest.Item):
      
        288
            """One pytest item per sway probe.
      
        289
        
        290
            When pytest runs it, we ask the session cache for the suite's
      
        291
            result (running it on first demand), find the matching probe by
      
        292
            name, and translate its verdict to a pytest outcome.
      
        293
            """
      
        294
        
        295
            def __init__(
      
        296
                self,
      
        297
                *,
      
        298
                name: str,
      
        299
                parent: Any,
      
        300
                spec_path: Path,
      
        301
                weights: dict[str, float] | None,
      
        302
                probe_name: str,
      
        303
                cache: _SuiteCache,
      
        304
            ) -> None:
      
        305
                super().__init__(name, parent)
      
        306
                self._spec_path = spec_path
      
        307
                self._weights = weights
      
        308
                self._probe_name = probe_name
      
        309
                self._cache = cache
      
        310
        
        311
            def runtest(self) -> None:  # noqa: D401
      
        312
                suite_result, _score = self._cache.get_or_run(self._spec_path, weights=self._weights)
      
        313
                probe = _find_probe(suite_result, self._probe_name)
      
        314
                if probe is None:
      
        315
                    pytest.fail(
      
        316
                        f"probe {self._probe_name!r} not in suite result — "
      
        317
                        f"available: {sorted(p.name for p in suite_result.probes)}"
      
        318
                    )
      
        319
                _apply_verdict(probe)
      
        320
        
        321
            def repr_failure(self, excinfo: Any, style: str | None = None) -> str:
      
        322
                del style  # we always render "short"; pytest's style kwarg ignored
      
        323
                return str(excinfo.getrepr(style="short"))
      
        324
        
        325
            def reportinfo(self) -> tuple[Any, int | None, str]:
      
        326
                return self.fspath, 0, self.name
      
        327
        
        328
        
        329
        class _SwayGateItem(pytest.Item):
      
        330
            """``__gate__`` item — fails when the composite score drops below ``threshold``."""
      
        331
        
        332
            def __init__(
      
        333
                self,
      
        334
                *,
      
        335
                name: str,
      
        336
                parent: Any,
      
        337
                spec_path: Path,
      
        338
                weights: dict[str, float] | None,
      
        339
                threshold: float,
      
        340
                cache: _SuiteCache,
      
        341
            ) -> None:
      
        342
                super().__init__(name, parent)
      
        343
                self._spec_path = spec_path
      
        344
                self._weights = weights
      
        345
                self._threshold = threshold
      
        346
                self._cache = cache
      
        347
        
        348
            def runtest(self) -> None:
      
        349
                _suite, score = self._cache.get_or_run(self._spec_path, weights=self._weights)
      
        350
                if score.overall < self._threshold:
      
        351
                    pytest.fail(
      
        352
                        f"composite score {score.overall:.2f} below threshold {self._threshold:.2f} "
      
        353
                        f"(band: {score.band or '—'})",
      
        354
                        pytrace=False,
      
        355
                    )
      
        356
        
        357
            def repr_failure(self, excinfo: Any, style: str | None = None) -> str:
      
        358
                del style  # we always render "short"; pytest's style kwarg ignored
      
        359
                return str(excinfo.getrepr(style="short"))
      
        360
        
        361
            def reportinfo(self) -> tuple[Any, int | None, str]:
      
        362
                return self.fspath, 0, self.name
      
        363
        
        364
        
        365
        class _ConfigErrorItem(pytest.Item):
      
        366
            """Synthetic item that simply fails with a configuration-error message.
      
        367
        
        368
            Used when ``@pytest.mark.sway`` itself is malformed — we want the
      
        369
            user to see a clean pytest failure line, not a cryptic collection
      
        370
            error, and we want ``pytest -k`` etc. to still find the test.
      
        371
            """
      
        372
        
        373
            def __init__(self, *, name: str, parent: Any, message: str) -> None:
      
        374
                super().__init__(name, parent)
      
        375
                self._message = message
      
        376
        
        377
            def runtest(self) -> None:
      
        378
                pytest.fail(self._message, pytrace=False)
      
        379
        
        380
            def repr_failure(self, excinfo: Any, style: str | None = None) -> str:
      
        381
                del excinfo, style  # config-error path surfaces only the canned message
      
        382
                return self._message
      
        383
        
        384
            def reportinfo(self) -> tuple[Any, int | None, str]:
      
        385
                return self.fspath, 0, self.name
      
        386
        
        387
        
        388
        # ----------------------------------------------------------------------
      
        389
        # Verdict → pytest outcome translation
      
        390
        # ----------------------------------------------------------------------
      
        391
        
        392
        
        393
        def _find_probe(suite: SuiteResult, name: str) -> ProbeResult | None:
      
        394
            for p in suite.probes:
      
        395
                if p.name == name:
      
        396
                    return p
      
        397
            return None
      
        398
        
        399
        
        400
        def _apply_verdict(probe: ProbeResult) -> None:
      
        401
            """Translate a probe's :class:`Verdict` to a pytest outcome."""
      
        402
            from dlm_sway.core.result import Verdict
      
        403
        
        404
            msg = probe.message or ""
      
        405
            if probe.verdict == Verdict.PASS:
      
        406
                return
      
        407
            if probe.verdict == Verdict.WARN:
      
        408
                # Surface the warning through pytest's own warning channel
      
        409
                # so ``pytest -W`` flags play along. Test still passes.
      
        410
                import warnings
      
        411
        
        412
                warnings.warn(f"sway WARN [{probe.kind}]: {msg}", stacklevel=2)
      
        413
                return
      
        414
            if probe.verdict == Verdict.SKIP:
      
        415
                pytest.skip(msg or f"probe {probe.name!r} skipped")
      
        416
            if probe.verdict == Verdict.FAIL:
      
        417
                pytest.fail(f"FAIL [{probe.kind}]: {msg}", pytrace=False)
      
        418
            if probe.verdict == Verdict.ERROR:
      
        419
                pytest.fail(f"ERROR [{probe.kind}]: {msg}", pytrace=False)
      
        420
        
        421
        
        422
        __all__ = ["pytest_collection_modifyitems", "pytest_configure"]

1	"""pytest plugin — ``@pytest.mark.sway`` expands into per-probe items (S15 / F10).
2
3	Install with ``pip install 'dlm-sway[pytest]'`` and the plugin
4	auto-loads via the ``pytest11`` entry point. Writing::
5
6	@pytest.mark.sway(spec="sway.yaml", threshold=0.6)
7	def test_adapter_healthy(): ...
8
9	turns a single test function into N + 1 pytest items:
10
11	- one item per probe in ``sway.yaml``, named
12	``test_adapter_healthy::<probe_name>``, outcome tied to the probe's
13	verdict (``FAIL``/``ERROR`` → pytest Failed; ``SKIP`` → pytest
14	Skipped; everything else passes),
15	- a single ``test_adapter_healthy::__gate__`` item that fails when
16	the composite score falls below ``threshold`` (only added when the
17	caller passes a positive ``threshold``).
18
19	The suite runs once per decorated function; subsequent synthetic
20	items read from a per-session cache so the N-way expansion doesn't
21	multiply backend wall time.
22
23	The body of the decorated function is intentionally ignored — the
24	decorator owns the test. A ``pass`` (or any non-raising body) is
25	conventional. This mirrors how ``@hypothesis.given(...)`` replaces
26	the function's behavior while pytest still discovers it through the
27	normal ``test_*`` name convention.
28	"""
29
30	from __future__ import annotations
31
32	from pathlib import Path
33	from typing import TYPE_CHECKING, Any
34
35	import pytest
36
37	# F19 — heavy imports are deferred to call sites so pytest's plugin
38	# discovery doesn't load ``dlm_sway.core.result`` (and everything below
39	# it: pydantic, numpy) for users who haven't invoked
40	# ``@pytest.mark.sway``. The plugin registers as ``pytest11`` on
41	# install; the tax should only be paid by tests that actually use it.
42
43	if TYPE_CHECKING:
44	from _pytest.config import Config
45	from _pytest.nodes import Item
46
47	from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore
48
49
50	# ----------------------------------------------------------------------
51	# Session-scoped suite cache
52	# ----------------------------------------------------------------------
53
54
55	_CACHE_ATTR: str = "_sway_suite_cache"
56
57
58	class _SuiteCache:
59	"""Per-session cache of ``SuiteResult`` / ``SwayScore`` pairs.
60
61	Keyed by ``(spec_path, sorted_weights_tuple)``. Two decorated
62	functions pointing at the same spec with the same weights share
63	one backend load + one suite run — the cache is the whole point
64	of the "one item per probe" expansion being cheap.
65	"""
66
67	def __init__(self) -> None:
68	self._cache: dict[
69	tuple[str, tuple[tuple[str, float], ...]], tuple[SuiteResult, SwayScore]
70	] = {}
71
72	def get_or_run(
73	self,
74	spec_path: Path,
75	*,
76	weights: dict[str, float] \| None,
77	) -> tuple[SuiteResult, SwayScore]:
78	key_weights = tuple(sorted((weights or {}).items()))
79	key = (str(spec_path.resolve()), key_weights)
80	if key not in self._cache:
81	# Deferred import: the CLI module pulls everything else;
82	# lighter plugin entry if we only import when firing.
83	from dlm_sway.cli.commands import _execute_spec
84
85	self._cache[key] = _execute_spec(spec_path, weights_override=weights)
86	return self._cache[key]
87
88
89	# ----------------------------------------------------------------------
90	# pytest hooks
91	# ----------------------------------------------------------------------
92
93
94	def pytest_configure(config: Config) -> None:
95	"""Register the ``sway`` marker and install the per-session cache."""
96	config.addinivalue_line(
97	"markers",
98	"sway(spec, threshold=0.0, weights=None): "
99	"expand a pytest function into one item per sway probe in the "
100	"referenced spec. Required kwarg ``spec`` is a path-like to a "
101	"sway.yaml. Optional ``threshold`` adds a ``__gate__`` item "
102	"that fails when the composite score drops below that value. "
103	"Optional ``weights`` overrides the composite-score category "
104	"weights (same schema as ``sway run --weights``).",
105	)
106	# One cache per session. Using ``setattr`` so plugin uninstall
107	# simply drops the attribute; no singleton-cleanup dance.
108	setattr(config, _CACHE_ATTR, _SuiteCache())
109
110
111	def pytest_collection_modifyitems(config: Config, items: list[Item]) -> None:
112	"""Replace each ``@pytest.mark.sway``-decorated item with per-probe items.
113
114	Runs after standard collection — at this point ``items`` is the
115	list pytest is about to execute. We scan for items carrying the
116	``sway`` marker and substitute them in place.
117	"""
118	cache: _SuiteCache = getattr(config, _CACHE_ATTR)
119	new_items: list[Item] = []
120	for item in items:
121	mark = item.get_closest_marker("sway") if hasattr(item, "get_closest_marker") else None
122	if mark is None:
123	new_items.append(item)
124	continue
125	# Only operate on pytest Function items (vs. Class / Module).
126	# Anything else — we leave alone with a warning surfaced via
127	# the usual pytest WARN mechanism.
128	if not _is_function_item(item) or item.parent is None:
129	new_items.append(item)
130	continue
131	try:
132	spec_path, threshold, weights = _parse_mark(mark, rootpath=config.rootpath)
133	except _SwayMarkError as exc:
134	# Surface the configuration error as a single failed item
135	# so the user sees a green-field message in pytest's
136	# output instead of a cryptic collect error.
137	new_items.append(
138	_ConfigErrorItem.from_parent(
139	parent=item.parent,
140	name=item.name,
141	message=str(exc),
142	)
143	)
144	continue
145	expanded = _expand_to_probe_items(
146	parent_item=item,
147	spec_path=spec_path,
148	threshold=threshold,
149	weights=weights,
150	cache=cache,
151	)
152	new_items.extend(expanded)
153	items[:] = new_items
154
155
156	# ----------------------------------------------------------------------
157	# Mark parsing + item expansion
158	# ----------------------------------------------------------------------
159
160
161	class _SwayMarkError(Exception):
162	"""Raised during mark parsing when the user's arguments are bad."""
163
164
165	def _parse_mark(
166	mark: pytest.Mark,
167	*,
168	rootpath: Path,
169	) -> tuple[Path, float, dict[str, float] \| None]:
170	"""Pull ``(spec_path, threshold, weights)`` out of a ``@pytest.mark.sway(...)``."""
171	# ``mark.args`` + ``mark.kwargs`` together give the call shape.
172	# We accept either ``@pytest.mark.sway("path.yaml")`` or
173	# ``@pytest.mark.sway(spec="path.yaml")``.
174	kwargs = dict(mark.kwargs)
175	args = list(mark.args)
176
177	spec = kwargs.pop("spec", None)
178	if spec is None and args:
179	spec = args.pop(0)
180	if spec is None:
181	raise _SwayMarkError("@pytest.mark.sway requires a `spec` kwarg or a positional spec path")
182	spec_path = Path(spec)
183	if not spec_path.is_absolute():
184	# Resolve against pytest's rootpath — the project root pytest
185	# discovers — not the process cwd. A user running ``pytest
186	# tests/`` from a subdir would otherwise see spec-relative
187	# paths resolved against the subdir, surprising.
188	spec_path = (rootpath / spec_path).resolve()
189
190	threshold_raw = kwargs.pop("threshold", 0.0)
191	try:
192	threshold = float(threshold_raw)
193	except (TypeError, ValueError) as exc:
194	raise _SwayMarkError(
195	f"@pytest.mark.sway `threshold` must be a float; got {threshold_raw!r}"
196	) from exc
197
198	weights = kwargs.pop("weights", None)
199	if weights is not None:
200	if not isinstance(weights, dict):
201	raise _SwayMarkError(
202	f"@pytest.mark.sway `weights` must be a dict or None; got {type(weights).__name__}"
203	)
204	try:
205	weights = {str(k): float(v) for k, v in weights.items()}
206	except (TypeError, ValueError) as exc:
207	raise _SwayMarkError(f"@pytest.mark.sway `weights` must map str→float ({exc})") from exc
208
209	if args or kwargs:
210	# Unknown args — pytest marks silently drop them otherwise.
211	extra = list(args) + sorted(kwargs)
212	raise _SwayMarkError(f"@pytest.mark.sway got unexpected arguments: {extra}")
213
214	return spec_path, threshold, weights
215
216
217	def _expand_to_probe_items(
218	*,
219	parent_item: Item,
220	spec_path: Path,
221	threshold: float,
222	weights: dict[str, float] \| None,
223	cache: _SuiteCache,
224	) -> list[Item]:
225	"""Build one ``_SwayProbeItem`` per probe + an optional gate item.
226
227	We don't run the suite here — suite execution is deferred to the
228	first item's ``runtest``. Collection stays fast; failures don't
229	appear until `pytest` actually runs the test.
230	"""
231	from dlm_sway.core.errors import SwayError
232	from dlm_sway.suite.loader import load_spec
233
234	parent = parent_item.parent
235	assert parent is not None # narrowing: caller filtered None above
236
237	try:
238	spec = load_spec(spec_path)
239	except SwayError as exc:
240	return [
241	_ConfigErrorItem.from_parent(
242	parent=parent,
243	name=parent_item.name,
244	message=f"failed to load spec {spec_path}: {exc}",
245	)
246	]
247
248	base_name = parent_item.name
249	out: list[Item] = []
250	for probe_entry in spec.suite:
251	probe_name = str(probe_entry.get("name", probe_entry.get("kind", "?")))
252	out.append(
253	_SwayProbeItem.from_parent(
254	parent=parent,
255	name=f"{base_name}::{probe_name}",
256	spec_path=spec_path,
257	weights=weights,
258	probe_name=probe_name,
259	cache=cache,
260	)
261	)
262	if threshold > 0.0:
263	out.append(
264	_SwayGateItem.from_parent(
265	parent=parent,
266	name=f"{base_name}::__gate__",
267	spec_path=spec_path,
268	weights=weights,
269	threshold=threshold,
270	cache=cache,
271	)
272	)
273	return out
274
275
276	# ----------------------------------------------------------------------
277	# Item classes
278	# ----------------------------------------------------------------------
279
280
281	def _is_function_item(item: Item) -> bool:
282	"""Duck-check for ``pytest.Function`` without importing its private
283	module at top level (cheaper plugin init)."""
284	return item.__class__.__name__ == "Function"
285
286
287	class _SwayProbeItem(pytest.Item):
288	"""One pytest item per sway probe.
289
290	When pytest runs it, we ask the session cache for the suite's
291	result (running it on first demand), find the matching probe by
292	name, and translate its verdict to a pytest outcome.
293	"""
294
295	def __init__(
296	self,
297	*,
298	name: str,
299	parent: Any,
300	spec_path: Path,
301	weights: dict[str, float] \| None,
302	probe_name: str,
303	cache: _SuiteCache,
304	) -> None:
305	super().__init__(name, parent)
306	self._spec_path = spec_path
307	self._weights = weights
308	self._probe_name = probe_name
309	self._cache = cache
310
311	def runtest(self) -> None: # noqa: D401
312	suite_result, _score = self._cache.get_or_run(self._spec_path, weights=self._weights)
313	probe = _find_probe(suite_result, self._probe_name)
314	if probe is None:
315	pytest.fail(
316	f"probe {self._probe_name!r} not in suite result — "
317	f"available: {sorted(p.name for p in suite_result.probes)}"
318	)
319	_apply_verdict(probe)
320
321	def repr_failure(self, excinfo: Any, style: str \| None = None) -> str:
322	del style # we always render "short"; pytest's style kwarg ignored
323	return str(excinfo.getrepr(style="short"))
324
325	def reportinfo(self) -> tuple[Any, int \| None, str]:
326	return self.fspath, 0, self.name
327
328
329	class _SwayGateItem(pytest.Item):
330	"""``__gate__`` item — fails when the composite score drops below ``threshold``."""
331
332	def __init__(
333	self,
334	*,
335	name: str,
336	parent: Any,
337	spec_path: Path,
338	weights: dict[str, float] \| None,
339	threshold: float,
340	cache: _SuiteCache,
341	) -> None:
342	super().__init__(name, parent)
343	self._spec_path = spec_path
344	self._weights = weights
345	self._threshold = threshold
346	self._cache = cache
347
348	def runtest(self) -> None:
349	_suite, score = self._cache.get_or_run(self._spec_path, weights=self._weights)
350	if score.overall < self._threshold:
351	pytest.fail(
352	f"composite score {score.overall:.2f} below threshold {self._threshold:.2f} "
353	f"(band: {score.band or '—'})",
354	pytrace=False,
355	)
356
357	def repr_failure(self, excinfo: Any, style: str \| None = None) -> str:
358	del style # we always render "short"; pytest's style kwarg ignored
359	return str(excinfo.getrepr(style="short"))
360
361	def reportinfo(self) -> tuple[Any, int \| None, str]:
362	return self.fspath, 0, self.name
363
364
365	class _ConfigErrorItem(pytest.Item):
366	"""Synthetic item that simply fails with a configuration-error message.
367
368	Used when ``@pytest.mark.sway`` itself is malformed — we want the
369	user to see a clean pytest failure line, not a cryptic collection
370	error, and we want ``pytest -k`` etc. to still find the test.
371	"""
372
373	def __init__(self, *, name: str, parent: Any, message: str) -> None:
374	super().__init__(name, parent)
375	self._message = message
376
377	def runtest(self) -> None:
378	pytest.fail(self._message, pytrace=False)
379
380	def repr_failure(self, excinfo: Any, style: str \| None = None) -> str:
381	del excinfo, style # config-error path surfaces only the canned message
382	return self._message
383
384	def reportinfo(self) -> tuple[Any, int \| None, str]:
385	return self.fspath, 0, self.name
386
387
388	# ----------------------------------------------------------------------
389	# Verdict → pytest outcome translation
390	# ----------------------------------------------------------------------
391
392
393	def _find_probe(suite: SuiteResult, name: str) -> ProbeResult \| None:
394	for p in suite.probes:
395	if p.name == name:
396	return p
397	return None
398
399
400	def _apply_verdict(probe: ProbeResult) -> None:
401	"""Translate a probe's :class:`Verdict` to a pytest outcome."""
402	from dlm_sway.core.result import Verdict
403
404	msg = probe.message or ""
405	if probe.verdict == Verdict.PASS:
406	return
407	if probe.verdict == Verdict.WARN:
408	# Surface the warning through pytest's own warning channel
409	# so ``pytest -W`` flags play along. Test still passes.
410	import warnings
411
412	warnings.warn(f"sway WARN [{probe.kind}]: {msg}", stacklevel=2)
413	return
414	if probe.verdict == Verdict.SKIP:
415	pytest.skip(msg or f"probe {probe.name!r} skipped")
416	if probe.verdict == Verdict.FAIL:
417	pytest.fail(f"FAIL [{probe.kind}]: {msg}", pytrace=False)
418	if probe.verdict == Verdict.ERROR:
419	pytest.fail(f"ERROR [{probe.kind}]: {msg}", pytrace=False)
420
421
422	__all__ = ["pytest_collection_modifyitems", "pytest_configure"]