tenseleyflow/sway / 3e0e0ce

Browse files

tests/integration: pack→unpack→sway run round-trip identity (S26 X3-P7)

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
3e0e0cea084e8e414d213ff1d79d69f3d1c0a414
Parents
5899776
Tree
513593b

1 changed file

StatusFile+-
A tests/integration/test_pack_run_roundtrip.py 152 0
tests/integration/test_pack_run_roundtrip.pyadded
@@ -0,0 +1,152 @@
1
+"""S26 X3 prove-the-value: sway run on an unpacked swaypack matches
2
+the pre-pack verdict bit-for-bit (modulo timestamp fields).
3
+
4
+This is the integration test the sprint DoD requires: pack a real
5
+spec → unpack into a tmp dir → run the unpacked spec → compare
6
+verdict + per-probe scores against the original run. If the round
7
+trip is lossy the audit's "share an adapter audit with a coworker"
8
+flow doesn't actually reproduce.
9
+
10
+Marked ``slow + online`` because it goes through the full runner +
11
+spec-loader machinery on a non-trivial fixture.
12
+"""
13
+
14
+from __future__ import annotations
15
+
16
+import json
17
+import os
18
+from pathlib import Path
19
+
20
+import pytest
21
+import yaml
22
+
23
+pytestmark = [pytest.mark.slow, pytest.mark.online]
24
+
25
+
26
+def _write_two_prompt_spec(spec_path: Path) -> None:
27
+    """A minimal-but-real sway.yaml that produces a deterministic
28
+    verdict against the dummy backend (no model load required)."""
29
+    body = {
30
+        "version": 1,
31
+        "models": {
32
+            "base": {"kind": "dummy", "base": "dummy-base"},
33
+            "ft": {"kind": "dummy", "base": "dummy-base"},
34
+        },
35
+        "defaults": {"seed": 0},
36
+        "suite": [
37
+            {
38
+                "name": "dk",
39
+                "kind": "delta_kl",
40
+                "prompts": ["alpha", "beta"],
41
+                "assert_mean_gte": 0.001,  # Easy bar for the dummy ft view.
42
+            }
43
+        ],
44
+    }
45
+    spec_path.write_text(yaml.safe_dump(body, sort_keys=False), encoding="utf-8")
46
+
47
+
48
+def _run_spec_via_dummy(spec_path: Path) -> tuple[str, list[tuple[str, str, float | None]]]:
49
+    """Load + run ``spec_path`` against the dummy differential backend.
50
+
51
+    Returns ``(suite_verdict, [(probe_name, verdict, score)])`` for
52
+    the round-trip comparison. Bypasses the CLI to avoid subprocess
53
+    flakiness; the runner is what S26 needs to round-trip cleanly.
54
+    """
55
+    from dlm_sway.backends.dummy import DummyDifferentialBackend, DummyResponses
56
+    from dlm_sway.suite.loader import load_spec
57
+    from dlm_sway.suite.runner import run as run_suite
58
+    from dlm_sway.suite.score import compute as compute_score
59
+
60
+    spec = load_spec(spec_path)
61
+    backend = DummyDifferentialBackend(base=DummyResponses(), ft=DummyResponses())
62
+    # Dummy backend has no .close(); _close_if_possible-style guard.
63
+    try:
64
+        result = run_suite(spec, backend, spec_path=str(spec_path))
65
+        score = compute_score(result)
66
+    finally:
67
+        close = getattr(backend, "close", None)
68
+        if callable(close):
69
+            close()
70
+
71
+    probe_summary = [(p.name, str(p.verdict), p.score) for p in result.probes]
72
+    return str(score.band), probe_summary
73
+
74
+
75
+def test_pack_run_round_trip_matches(tmp_path: Path) -> None:
76
+    """Pack → unpack → run gives the same per-probe verdict + score
77
+    as the pre-pack run, with the SWAY_NULL_CACHE_DIR env honored."""
78
+    from dlm_sway.cli._pack import pack_spec
79
+    from dlm_sway.cli._unpack import unpack_swaypack
80
+
81
+    # 1) Build a spec + run it as the "source of truth."
82
+    src_dir = tmp_path / "source"
83
+    src_dir.mkdir()
84
+    src_spec = src_dir / "sway.yaml"
85
+    _write_two_prompt_spec(src_spec)
86
+    pre_band, pre_probes = _run_spec_via_dummy(src_spec)
87
+
88
+    # 2) Pack it. include_null_cache=False because the dummy backend
89
+    #    doesn't write null-stats to disk anyway, and the test runs
90
+    #    against an isolated tmp_path so we shouldn't pollute the
91
+    #    user's home cache either way.
92
+    pack_path = tmp_path / "test.swaypack.tar.gz"
93
+    pack_report = pack_spec(src_spec, out_path=pack_path, include_null_cache=False)
94
+    assert pack_report.size_bytes > 0
95
+
96
+    # 3) Unpack into a fresh tmp dir.
97
+    unpack_dst = tmp_path / "destination"
98
+    unpack_report = unpack_swaypack(pack_path, target_dir=unpack_dst)
99
+
100
+    # 4) Run the unpacked spec, with SWAY_NULL_CACHE_DIR pointing at
101
+    #    the unpacked dir if the pack carried one (it didn't here,
102
+    #    but exercise the env-var honoring path).
103
+    prev_env = os.environ.get("SWAY_NULL_CACHE_DIR")
104
+    if unpack_report.null_stats_dir is not None:
105
+        os.environ["SWAY_NULL_CACHE_DIR"] = str(unpack_report.null_stats_dir)
106
+    try:
107
+        post_band, post_probes = _run_spec_via_dummy(unpack_report.spec_path)
108
+    finally:
109
+        if prev_env is None:
110
+            os.environ.pop("SWAY_NULL_CACHE_DIR", None)
111
+        else:
112
+            os.environ["SWAY_NULL_CACHE_DIR"] = prev_env
113
+
114
+    # 5) Verdict + per-probe scores must round-trip exactly.
115
+    assert pre_band == post_band, f"band drifted: {pre_band!r} → {post_band!r}"
116
+    assert pre_probes == post_probes, (
117
+        f"per-probe results drifted:\n  pre: {pre_probes}\n  post: {post_probes}"
118
+    )
119
+
120
+
121
+def test_unpack_with_null_cache_sets_env_pointer(tmp_path: Path) -> None:
122
+    """If the pack DID carry a null-stats cache, the unpack report's
123
+    null_stats_dir is non-None and points at a real directory the
124
+    caller can hand to ``SWAY_NULL_CACHE_DIR``."""
125
+    from dlm_sway.cli._pack import pack_spec
126
+    from dlm_sway.cli._unpack import unpack_swaypack
127
+
128
+    # Build a fake null-stats cache + redirect the pack reader at it.
129
+    fake_cache = tmp_path / "xdg-cache" / "dlm-sway" / "null-stats"
130
+    fake_cache.mkdir(parents=True)
131
+    (fake_cache / "abc123.json").write_text(
132
+        json.dumps({"mean": 1.0, "std": 0.1, "runs": 3}),
133
+        encoding="utf-8",
134
+    )
135
+    prev_xdg = os.environ.get("XDG_CACHE_HOME")
136
+    os.environ["XDG_CACHE_HOME"] = str(tmp_path / "xdg-cache")
137
+    try:
138
+        spec_path = tmp_path / "sway.yaml"
139
+        _write_two_prompt_spec(spec_path)
140
+        out = tmp_path / "with-cache.swaypack.tar.gz"
141
+        report = pack_spec(spec_path, out_path=out, include_null_cache=True)
142
+        assert report.null_stats_count == 1
143
+    finally:
144
+        if prev_xdg is None:
145
+            os.environ.pop("XDG_CACHE_HOME", None)
146
+        else:
147
+            os.environ["XDG_CACHE_HOME"] = prev_xdg
148
+
149
+    target = tmp_path / "u"
150
+    unpack_report = unpack_swaypack(out, target_dir=target)
151
+    assert unpack_report.null_stats_dir is not None
152
+    assert (unpack_report.null_stats_dir / "abc123.json").exists()