"""S26 X3 prove-the-value: sway run on an unpacked swaypack matches
the pre-pack verdict bit-for-bit (modulo timestamp fields).

This is the integration test the sprint DoD requires: pack a real
spec → unpack into a tmp dir → run the unpacked spec → compare
verdict + per-probe scores against the original run. If the round
trip is lossy the audit's "share an adapter audit with a coworker"
flow doesn't actually reproduce.

Marked ``slow + online`` because it goes through the full runner +
spec-loader machinery on a non-trivial fixture.
"""

from __future__ import annotations

import json
import os
from pathlib import Path

import pytest
import yaml

pytestmark = [pytest.mark.slow, pytest.mark.online]


def _write_two_prompt_spec(spec_path: Path) -> None:
    """A minimal-but-real sway.yaml that produces a deterministic
    verdict against the dummy backend (no model load required)."""
    body = {
        "version": 1,
        "models": {
            "base": {"kind": "dummy", "base": "dummy-base"},
            "ft": {"kind": "dummy", "base": "dummy-base"},
        },
        "defaults": {"seed": 0},
        "suite": [
            {
                "name": "dk",
                "kind": "delta_kl",
                "prompts": ["alpha", "beta"],
                "assert_mean_gte": 0.001,  # Easy bar for the dummy ft view.
            }
        ],
    }
    spec_path.write_text(yaml.safe_dump(body, sort_keys=False), encoding="utf-8")


def _run_spec_via_dummy(spec_path: Path) -> tuple[str, list[tuple[str, str, float | None]]]:
    """Load + run ``spec_path`` against the dummy differential backend.

    Returns ``(suite_verdict, [(probe_name, verdict, score)])`` for
    the round-trip comparison. Bypasses the CLI to avoid subprocess
    flakiness; the runner is what S26 needs to round-trip cleanly.
    """
    from dlm_sway.backends.dummy import DummyDifferentialBackend, DummyResponses
    from dlm_sway.suite.loader import load_spec
    from dlm_sway.suite.runner import run as run_suite
    from dlm_sway.suite.score import compute as compute_score

    spec = load_spec(spec_path)
    backend = DummyDifferentialBackend(base=DummyResponses(), ft=DummyResponses())
    # Dummy backend has no .close(); _close_if_possible-style guard.
    try:
        result = run_suite(spec, backend, spec_path=str(spec_path))
        score = compute_score(result)
    finally:
        close = getattr(backend, "close", None)
        if callable(close):
            close()

    probe_summary = [(p.name, str(p.verdict), p.score) for p in result.probes]
    return str(score.band), probe_summary


def test_pack_run_round_trip_matches(tmp_path: Path) -> None:
    """Pack → unpack → run gives the same per-probe verdict + score
    as the pre-pack run, with the SWAY_NULL_CACHE_DIR env honored."""
    from dlm_sway.cli._pack import pack_spec
    from dlm_sway.cli._unpack import unpack_swaypack

    # 1) Build a spec + run it as the "source of truth."
    src_dir = tmp_path / "source"
    src_dir.mkdir()
    src_spec = src_dir / "sway.yaml"
    _write_two_prompt_spec(src_spec)
    pre_band, pre_probes = _run_spec_via_dummy(src_spec)

    # 2) Pack it. include_null_cache=False because the dummy backend
    #    doesn't write null-stats to disk anyway, and the test runs
    #    against an isolated tmp_path so we shouldn't pollute the
    #    user's home cache either way.
    pack_path = tmp_path / "test.swaypack.tar.gz"
    pack_report = pack_spec(src_spec, out_path=pack_path, include_null_cache=False)
    assert pack_report.size_bytes > 0

    # 3) Unpack into a fresh tmp dir.
    unpack_dst = tmp_path / "destination"
    unpack_report = unpack_swaypack(pack_path, target_dir=unpack_dst)

    # 4) Run the unpacked spec, with SWAY_NULL_CACHE_DIR pointing at
    #    the unpacked dir if the pack carried one (it didn't here,
    #    but exercise the env-var honoring path).
    prev_env = os.environ.get("SWAY_NULL_CACHE_DIR")
    if unpack_report.null_stats_dir is not None:
        os.environ["SWAY_NULL_CACHE_DIR"] = str(unpack_report.null_stats_dir)
    try:
        post_band, post_probes = _run_spec_via_dummy(unpack_report.spec_path)
    finally:
        if prev_env is None:
            os.environ.pop("SWAY_NULL_CACHE_DIR", None)
        else:
            os.environ["SWAY_NULL_CACHE_DIR"] = prev_env

    # 5) Verdict + per-probe scores must round-trip exactly.
    assert pre_band == post_band, f"band drifted: {pre_band!r} → {post_band!r}"
    assert pre_probes == post_probes, (
        f"per-probe results drifted:\n  pre: {pre_probes}\n  post: {post_probes}"
    )


def test_unpack_with_null_cache_sets_env_pointer(tmp_path: Path) -> None:
    """If the pack DID carry a null-stats cache, the unpack report's
    null_stats_dir is non-None and points at a real directory the
    caller can hand to ``SWAY_NULL_CACHE_DIR``."""
    from dlm_sway.cli._pack import pack_spec
    from dlm_sway.cli._unpack import unpack_swaypack

    # Build a fake null-stats cache + redirect the pack reader at it.
    fake_cache = tmp_path / "xdg-cache" / "dlm-sway" / "null-stats"
    fake_cache.mkdir(parents=True)
    (fake_cache / "abc123.json").write_text(
        json.dumps({"mean": 1.0, "std": 0.1, "runs": 3}),
        encoding="utf-8",
    )
    prev_xdg = os.environ.get("XDG_CACHE_HOME")
    os.environ["XDG_CACHE_HOME"] = str(tmp_path / "xdg-cache")
    try:
        spec_path = tmp_path / "sway.yaml"
        _write_two_prompt_spec(spec_path)
        out = tmp_path / "with-cache.swaypack.tar.gz"
        report = pack_spec(spec_path, out_path=out, include_null_cache=True)
        assert report.null_stats_count == 1
    finally:
        if prev_xdg is None:
            os.environ.pop("XDG_CACHE_HOME", None)
        else:
            os.environ["XDG_CACHE_HOME"] = prev_xdg

    target = tmp_path / "u"
    unpack_report = unpack_swaypack(out, target_dir=target)
    assert unpack_report.null_stats_dir is not None
    assert (unpack_report.null_stats_dir / "abc123.json").exists()