tenseleyflow/sway / 8183ee2

Browse files

tests/unit: report_html — renderer + panel divs + snapshot + missing-plotly hint

Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
8183ee28365943b8cd8d5347e76c871cce51c7df
Parents
dbf73ab
Tree
f165745

1 changed file

StatusFile+-
A tests/unit/test_report_html.py 268 0
tests/unit/test_report_html.pyadded
@@ -0,0 +1,268 @@
1
+"""Tests for :mod:`dlm_sway.suite.report_html` (S12 / F6)."""
2
+
3
+from __future__ import annotations
4
+
5
+import os
6
+import re
7
+from datetime import UTC, datetime
8
+from html.parser import HTMLParser
9
+from pathlib import Path
10
+
11
+import pytest
12
+
13
+from dlm_sway.core.result import (
14
+    ProbeResult,
15
+    SuiteResult,
16
+    SwayScore,
17
+    Verdict,
18
+)
19
+from dlm_sway.suite import report_html
20
+
21
+SNAPSHOT_DIR = Path(__file__).parent.parent / "snapshots"
22
+
23
+# Plotly is shipped via the optional [viz] extra. Skip the whole module
24
+# when it's not importable — the install hint path is covered by the
25
+# CLI test.
26
+pytest.importorskip("plotly")
27
+
28
+
29
+def _fixture_suite_and_score() -> tuple[SuiteResult, SwayScore]:
30
+    """Suite exercising every panel: section_internalization (SIS bars)
31
+    and adapter_ablation (response curve) both present."""
32
+    started = datetime(2026, 1, 1, 12, 0, 0, tzinfo=UTC)
33
+    finished = datetime(2026, 1, 1, 12, 0, 5, tzinfo=UTC)
34
+    probes = (
35
+        ProbeResult(
36
+            name="dk",
37
+            kind="delta_kl",
38
+            verdict=Verdict.PASS,
39
+            score=0.87,
40
+            raw=0.456,
41
+            z_score=5.12,
42
+            evidence={},
43
+            message="mean js=0.4560, z=+5.12σ vs null",
44
+            duration_s=0.1,
45
+        ),
46
+        ProbeResult(
47
+            name="sis",
48
+            kind="section_internalization",
49
+            verdict=Verdict.PASS,
50
+            score=0.70,
51
+            raw=0.14,
52
+            z_score=3.8,
53
+            evidence={
54
+                "per_section": [
55
+                    {"section_id": "sec01", "effective_sis": 0.18, "passed": True},
56
+                    {"section_id": "sec02", "effective_sis": 0.21, "passed": True},
57
+                    {"section_id": "sec03", "effective_sis": 0.03, "passed": False},
58
+                    {"section_id": "sec04", "effective_sis": 0.10, "passed": True},
59
+                ],
60
+                "num_sections": 4,
61
+                "passing_frac": 0.75,
62
+            },
63
+            message="3/4 sections cleared",
64
+            duration_s=0.3,
65
+        ),
66
+        ProbeResult(
67
+            name="abl",
68
+            kind="adapter_ablation",
69
+            verdict=Verdict.PASS,
70
+            score=0.75,
71
+            raw=0.92,
72
+            z_score=3.5,
73
+            evidence={
74
+                "lambdas": [0.0, 0.25, 0.5, 0.75, 1.0, 1.25],
75
+                "mean_divergence_per_lambda": [0.0, 0.05, 0.11, 0.16, 0.19, 0.20],
76
+                "linearity": 0.92,
77
+                "saturation_lambda": 0.75,
78
+                "saturation_reason": "found",
79
+                "overshoot": 1.05,
80
+            },
81
+            message="R²=0.92, sat_λ=0.75 (in band), overshoot=1.05",
82
+            duration_s=0.5,
83
+        ),
84
+        ProbeResult(
85
+            name="lk",
86
+            kind="leakage",
87
+            verdict=Verdict.SKIP,
88
+            score=None,
89
+            message="no PROSE sections to test for leakage",
90
+            duration_s=0.0,
91
+        ),
92
+    )
93
+    suite = SuiteResult(
94
+        spec_path="fixture.yaml",
95
+        started_at=started,
96
+        finished_at=finished,
97
+        base_model_id="HuggingFaceTB/SmolLM2-135M",
98
+        adapter_id="adapters/test/v1",
99
+        sway_version="0.1.0",
100
+        probes=probes,
101
+    )
102
+    score = SwayScore(
103
+        overall=0.77,
104
+        components={"adherence": 0.87, "attribution": 0.70, "calibration": 0.0, "ablation": 0.75},
105
+        weights={"adherence": 0.30, "attribution": 0.35, "calibration": 0.20, "ablation": 0.15},
106
+        band="healthy",
107
+    )
108
+    return suite, score
109
+
110
+
111
+class _WellFormednessChecker(HTMLParser):
112
+    """Trivial subclass: we only use HTMLParser to *not raise*.
113
+
114
+    The stdlib parser is tolerant; the test is 'it doesn't blow up.'
115
+    Strict XHTML well-formedness isn't what the browser enforces.
116
+    """
117
+
118
+    def error(self, message: str) -> None:  # pragma: no cover — never called with HTMLParser
119
+        raise AssertionError(f"HTMLParser rejected the output: {message}")
120
+
121
+
122
+def _parse_ok(html_text: str) -> None:
123
+    parser = _WellFormednessChecker(convert_charrefs=True)
124
+    parser.feed(html_text)
125
+    parser.close()
126
+
127
+
128
+class TestToHtml:
129
+    def test_parses_as_html(self) -> None:
130
+        suite, score = _fixture_suite_and_score()
131
+        out = report_html.to_html(suite, score)
132
+        _parse_ok(out)
133
+
134
+    def test_contains_all_probe_names(self) -> None:
135
+        suite, score = _fixture_suite_and_score()
136
+        out = report_html.to_html(suite, score)
137
+        for name in ("dk", "sis", "abl", "lk"):
138
+            assert name in out, f"probe {name!r} not in HTML"
139
+
140
+    def test_contains_all_five_panel_divs(self) -> None:
141
+        suite, score = _fixture_suite_and_score()
142
+        out = report_html.to_html(suite, score)
143
+        for div_id in ("sway-gauge", "sway-category", "sway-sis", "sway-ablation", "sway-scatter"):
144
+            assert f'id="{div_id}"' in out, f"panel div {div_id!r} missing"
145
+
146
+    def test_plotly_js_inlined_once(self) -> None:
147
+        """The ~3 MB Plotly bundle is embedded, not linked externally.
148
+
149
+        Guard: no ``<script src="http..."`` tags exist — everything
150
+        loads from the inline bundle so the page works offline.
151
+        Plotly's bundle body *does* carry the string ``cdn.plot.ly`` as
152
+        an internal default for mapbox config; that's data, not a fetch,
153
+        so we only care about ``<script src=...>`` tags.
154
+        """
155
+        suite, score = _fixture_suite_and_score()
156
+        out = report_html.to_html(suite, score)
157
+        external_scripts = re.findall(r'<script\s+[^>]*src\s*=\s*["\'](https?:[^"\']+)["\']', out)
158
+        assert external_scripts == [], (
159
+            f"HTML pulls in external scripts (should all be inlined): {external_scripts}"
160
+        )
161
+        # Sanity: output is >1 MB (JS bundle is ~3-5 MB — gives us room
162
+        # if Plotly slims down a bit between releases).
163
+        assert len(out) > 1_000_000, f"HTML output suspiciously small: {len(out)} bytes"
164
+
165
+    def test_no_sis_panel_when_probe_absent(self) -> None:
166
+        """A suite without section_internalization skips the SIS panel but
167
+        still renders the other four."""
168
+        suite, score = _fixture_suite_and_score()
169
+        pruned_probes = tuple(p for p in suite.probes if p.kind != "section_internalization")
170
+        suite = SuiteResult(
171
+            spec_path=suite.spec_path,
172
+            started_at=suite.started_at,
173
+            finished_at=suite.finished_at,
174
+            base_model_id=suite.base_model_id,
175
+            adapter_id=suite.adapter_id,
176
+            sway_version=suite.sway_version,
177
+            probes=pruned_probes,
178
+        )
179
+        out = report_html.to_html(suite, score)
180
+        assert 'id="sway-sis"' not in out
181
+        assert 'id="sway-ablation"' in out
182
+        assert 'id="sway-scatter"' in out
183
+
184
+    def test_zero_probe_suite_still_renders(self) -> None:
185
+        """Empty probes — gauge/category/scatter still emit; no crashes."""
186
+        started = datetime(2026, 1, 1, 12, 0, 0, tzinfo=UTC)
187
+        suite = SuiteResult(
188
+            spec_path="empty.yaml",
189
+            started_at=started,
190
+            finished_at=started,
191
+            base_model_id="base",
192
+            adapter_id="",
193
+            sway_version="0.1.0",
194
+            probes=(),
195
+        )
196
+        score = SwayScore(overall=0.0, components={}, band="noise")
197
+        out = report_html.to_html(suite, score)
198
+        _parse_ok(out)
199
+        assert 'id="sway-gauge"' in out
200
+        assert "no probes ran" in out
201
+
202
+    def test_raises_when_plotly_missing(self, monkeypatch: pytest.MonkeyPatch) -> None:
203
+        """Simulated ImportError surfaces the install hint."""
204
+        import builtins
205
+
206
+        real_import = builtins.__import__
207
+
208
+        def fake_import(name, *args, **kwargs):  # type: ignore[no-untyped-def]
209
+            if name.startswith("plotly"):
210
+                raise ImportError("simulated missing plotly")
211
+            return real_import(name, *args, **kwargs)
212
+
213
+        monkeypatch.setattr(builtins, "__import__", fake_import)
214
+        suite, score = _fixture_suite_and_score()
215
+        with pytest.raises(RuntimeError, match=r"plotly.*\[viz\]"):
216
+            report_html.to_html(suite, score)
217
+
218
+
219
+class TestWrapperSnapshot:
220
+    """Snapshot the Sway-owned wrapper, strip the Plotly bundle JS so the
221
+    snapshot doesn't churn on Plotly point releases.
222
+    """
223
+
224
+    #: Matches the single ``<script>...plotly_bundle...</script>`` we emit
225
+    #: in ``<head>``. Plotly's per-figure scripts live in the body and
226
+    #: carry the stable chart data — those we *do* want in the snapshot.
227
+    _HEAD_SCRIPT_RE = re.compile(
228
+        r'<script type="text/javascript">\s*/\*\*.*?</script>',
229
+        re.DOTALL,
230
+    )
231
+
232
+    def test_snapshot(self) -> None:
233
+        """Run
234
+        ``SWAY_UPDATE_SNAPSHOTS=1 uv run pytest tests/unit/test_report_html.py``
235
+        to regenerate after an intentional wrapper change. Plotly JS
236
+        bundle bumps should NOT drift this — it's stripped before compare.
237
+        """
238
+        suite, score = _fixture_suite_and_score()
239
+        raw = report_html.to_html(suite, score)
240
+
241
+        # Strip the Plotly JS bundle; confirm we actually removed it.
242
+        stripped = self._HEAD_SCRIPT_RE.sub(
243
+            '<script type="text/javascript">/* plotly bundle — stripped for snapshot */</script>',
244
+            raw,
245
+            count=1,
246
+        )
247
+        assert stripped != raw, (
248
+            "failed to strip the Plotly JS bundle from the head — regex didn't match"
249
+        )
250
+        # Further shrink: replace per-figure config UUIDs (Plotly sprinkles
251
+        # `"uuid": "..."` in some payloads) to keep snapshot stable across
252
+        # minor Plotly versions.
253
+        stripped = re.sub(r'"uid": ?"[^"]*"', '"uid": "<stripped>"', stripped)
254
+
255
+        path = SNAPSHOT_DIR / "report.html"
256
+        if os.environ.get("SWAY_UPDATE_SNAPSHOTS") == "1" or not path.exists():
257
+            path.parent.mkdir(parents=True, exist_ok=True)
258
+            path.write_text(stripped, encoding="utf-8")
259
+            pytest.skip(
260
+                "snapshot report.html written — re-run without SWAY_UPDATE_SNAPSHOTS to verify"
261
+            )
262
+        expected = path.read_text(encoding="utf-8")
263
+        assert stripped == expected, (
264
+            "report.html drifted from snapshot.\n"
265
+            "To accept the new output intentionally, run:\n"
266
+            "    SWAY_UPDATE_SNAPSHOTS=1 uv run pytest tests/unit/test_report_html.py\n"
267
+            "and commit the updated file.\n"
268
+        )