"""Tests for :mod:`dlm_sway.pytest_plugin` via pytest's ``pytester`` fixture.

The canonical way to test a pytest plugin is to spawn a sub-session
using pytest's own ``pytester`` harness. We write a tiny spec +
test file into ``pytester``'s tmp rootdir, monkeypatch the suite
cache to return canned ``SuiteResult`` / ``SwayScore`` values, and
then assert the observed pytest outcomes match what the plugin's
verdict translation claims to do.
"""

from __future__ import annotations

from datetime import UTC, datetime
from typing import Any

import pytest

from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict

pytest_plugins = ["pytester"]


# ----------------------------------------------------------------------
# Canned suite / score helpers
# ----------------------------------------------------------------------


def _suite_with(probes: list[ProbeResult]) -> SuiteResult:
    t0 = datetime(2026, 1, 1, 12, 0, 0, tzinfo=UTC)
    return SuiteResult(
        spec_path="sway.yaml",
        started_at=t0,
        finished_at=t0,
        base_model_id="test/base",
        adapter_id="",
        sway_version="0.0.0",
        probes=tuple(probes),
    )


def _score(overall: float) -> SwayScore:
    return SwayScore(overall=overall, components={}, band=SwayScore.band_for(overall))


def _stub_cache(monkeypatch: pytest.MonkeyPatch, suite: SuiteResult, score: SwayScore) -> None:
    """Replace ``_SuiteCache.get_or_run`` with a lambda that returns canned data."""
    from dlm_sway.pytest_plugin import _SuiteCache

    def _canned(
        self: _SuiteCache, spec_path: Any, *, weights: Any = None
    ) -> tuple[SuiteResult, SwayScore]:
        del spec_path, weights
        return (suite, score)

    monkeypatch.setattr(_SuiteCache, "get_or_run", _canned)


# ----------------------------------------------------------------------
# Minimal spec + test file written into pytester's rootdir
# ----------------------------------------------------------------------


_MIN_SPEC = """\
version: 1
models:
  base:
    base: "test/base"
  ft:
    base: "test/base"
suite:
  - name: "dk"
    kind: "delta_kl"
    prompts: ["p1", "p2"]
  - name: "sis"
    kind: "section_internalization"
"""


def _write_spec(pytester: pytest.Pytester, content: str = _MIN_SPEC) -> None:
    pytester.makefile(".yaml", sway=content)


# ----------------------------------------------------------------------
# Tests
# ----------------------------------------------------------------------


class TestMarkerRegistration:
    def test_marker_shows_in_help(self, pytester: pytest.Pytester) -> None:
        """``pytest --markers`` lists ``sway`` after the plugin loads."""
        result = pytester.runpytest_inprocess("--markers")
        assert result.ret == 0
        assert any("sway(" in line for line in result.stdout.lines)


class TestExpansion:
    def test_one_item_per_probe(
        self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """@pytest.mark.sway expands a single function into N items."""
        _write_spec(pytester)
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway(spec="sway.yaml")
            def test_demo():
                pass
            """
        )
        suite = _suite_with(
            [
                ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9),
                ProbeResult(
                    name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8
                ),
            ]
        )
        _stub_cache(monkeypatch, suite, _score(0.85))
        result = pytester.runpytest_inprocess("-v")
        result.assert_outcomes(passed=2)
        # The synthetic item names carry the probe labels.
        stdout = "\n".join(result.stdout.lines)
        assert "test_demo::dk" in stdout
        assert "test_demo::sis" in stdout

    def test_fail_verdict_propagates(
        self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        _write_spec(pytester)
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway(spec="sway.yaml")
            def test_demo():
                pass
            """
        )
        suite = _suite_with(
            [
                ProbeResult(
                    name="dk",
                    kind="delta_kl",
                    verdict=Verdict.FAIL,
                    score=0.2,
                    message="adapter didn't move the needle",
                ),
                ProbeResult(
                    name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.9
                ),
            ]
        )
        _stub_cache(monkeypatch, suite, _score(0.55))
        result = pytester.runpytest_inprocess("-v")
        result.assert_outcomes(passed=1, failed=1)
        stdout = "\n".join(result.stdout.lines)
        assert "test_demo::dk" in stdout  # the failing one
        assert "adapter didn't move the needle" in stdout

    def test_skip_verdict_propagates(
        self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        _write_spec(pytester)
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway(spec="sway.yaml")
            def test_demo(): ...
            """
        )
        suite = _suite_with(
            [
                ProbeResult(
                    name="dk",
                    kind="delta_kl",
                    verdict=Verdict.SKIP,
                    score=None,
                    message="no calibration",
                ),
                ProbeResult(
                    name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.9
                ),
            ]
        )
        _stub_cache(monkeypatch, suite, _score(0.8))
        result = pytester.runpytest_inprocess("-v")
        result.assert_outcomes(passed=1, skipped=1)

    def test_error_verdict_fails(
        self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        _write_spec(pytester)
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway(spec="sway.yaml")
            def test_demo(): ...
            """
        )
        suite = _suite_with(
            [
                ProbeResult(
                    name="dk",
                    kind="delta_kl",
                    verdict=Verdict.ERROR,
                    score=None,
                    message="non-finite raw",
                ),
                ProbeResult(
                    name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.9
                ),
            ]
        )
        _stub_cache(monkeypatch, suite, _score(0.5))
        result = pytester.runpytest_inprocess("-v")
        result.assert_outcomes(passed=1, failed=1)


class TestGate:
    def test_threshold_below_fails_gate(
        self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        _write_spec(pytester)
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway(spec="sway.yaml", threshold=0.8)
            def test_demo(): ...
            """
        )
        suite = _suite_with(
            [
                ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.7),
                ProbeResult(
                    name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.6
                ),
            ]
        )
        _stub_cache(monkeypatch, suite, _score(0.65))  # below 0.8 → gate fails
        result = pytester.runpytest_inprocess("-v")
        # Two PASS probes + one __gate__ fail = passed=2, failed=1.
        result.assert_outcomes(passed=2, failed=1)
        stdout = "\n".join(result.stdout.lines)
        assert "__gate__" in stdout
        assert "0.65" in stdout
        assert "0.80" in stdout

    def test_threshold_above_passes_gate(
        self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        _write_spec(pytester)
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway(spec="sway.yaml", threshold=0.5)
            def test_demo(): ...
            """
        )
        suite = _suite_with(
            [
                ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9),
                ProbeResult(
                    name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8
                ),
            ]
        )
        _stub_cache(monkeypatch, suite, _score(0.85))
        result = pytester.runpytest_inprocess("-v")
        result.assert_outcomes(passed=3)  # 2 probes + 1 __gate__

    def test_threshold_zero_skips_gate_item(
        self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """No threshold → no synthetic ``__gate__`` item at all."""
        _write_spec(pytester)
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway(spec="sway.yaml")
            def test_demo(): ...
            """
        )
        suite = _suite_with(
            [
                ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9),
                ProbeResult(
                    name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8
                ),
            ]
        )
        _stub_cache(monkeypatch, suite, _score(0.85))
        result = pytester.runpytest_inprocess("-v")
        result.assert_outcomes(passed=2)
        stdout = "\n".join(result.stdout.lines)
        assert "__gate__" not in stdout


class TestErrorPaths:
    def test_missing_spec_kwarg(self, pytester: pytest.Pytester) -> None:
        """No spec kwarg → config-error item fails with the hint."""
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway()
            def test_demo(): ...
            """
        )
        result = pytester.runpytest_inprocess("-v")
        result.assert_outcomes(failed=1)
        stdout = "\n".join(result.stdout.lines)
        assert "requires a `spec`" in stdout

    def test_nonexistent_spec_file(self, pytester: pytest.Pytester) -> None:
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway(spec="does_not_exist.yaml")
            def test_demo(): ...
            """
        )
        result = pytester.runpytest_inprocess("-v")
        result.assert_outcomes(failed=1)

    def test_bad_threshold(self, pytester: pytest.Pytester) -> None:
        _write_spec(pytester)
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway(spec="sway.yaml", threshold="not-a-number")
            def test_demo(): ...
            """
        )
        result = pytester.runpytest_inprocess("-v")
        result.assert_outcomes(failed=1)
        stdout = "\n".join(result.stdout.lines)
        assert "threshold" in stdout

    def test_unexpected_kwarg(self, pytester: pytest.Pytester) -> None:
        _write_spec(pytester)
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway(spec="sway.yaml", nonsense="x")
            def test_demo(): ...
            """
        )
        result = pytester.runpytest_inprocess("-v")
        result.assert_outcomes(failed=1)
        stdout = "\n".join(result.stdout.lines)
        assert "unexpected arguments" in stdout


class TestSuiteReuse:
    def test_cache_shared_across_decorated_tests(
        self, pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """Two decorators against the same spec share one suite run."""
        _write_spec(pytester)
        pytester.makepyfile(
            """
            import pytest

            @pytest.mark.sway(spec="sway.yaml")
            def test_a(): ...

            @pytest.mark.sway(spec="sway.yaml")
            def test_b(): ...
            """
        )
        call_count = {"n": 0}
        suite = _suite_with(
            [
                ProbeResult(name="dk", kind="delta_kl", verdict=Verdict.PASS, score=0.9),
                ProbeResult(
                    name="sis", kind="section_internalization", verdict=Verdict.PASS, score=0.8
                ),
            ]
        )
        score = _score(0.85)

        from dlm_sway.pytest_plugin import _SuiteCache

        original = _SuiteCache.get_or_run

        def _counted(self: _SuiteCache, *args: Any, **kwargs: Any) -> Any:
            if not hasattr(self, "_was_called"):
                call_count["n"] += 1
                self._was_called = True  # type: ignore[attr-defined]
                self._cache[("x", ())] = (suite, score)
            return (suite, score)

        monkeypatch.setattr(_SuiteCache, "get_or_run", _counted)
        result = pytester.runpytest_inprocess("-v")
        result.assert_outcomes(passed=4)  # 2 tests × 2 probes
        # In a normal (non-stubbed) environment, call_count would be 1
        # — our stub records whether the real path got invoked once per
        # unique (spec, weights) pair. This test covers the assertion
        # that the cache key is being shared correctly.
        assert call_count["n"] <= 1
        del original  # keep ruff happy