Python · 10302 bytes Raw Blame History
1 """Tests for :mod:`dlm_sway.suite.report_html` (S12 / F6)."""
2
3 from __future__ import annotations
4
5 import os
6 import re
7 from datetime import UTC, datetime
8 from html.parser import HTMLParser
9 from pathlib import Path
10
11 import pytest
12
13 from dlm_sway.core.result import (
14 ProbeResult,
15 SuiteResult,
16 SwayScore,
17 Verdict,
18 )
19 from dlm_sway.suite import report_html
20
21 SNAPSHOT_DIR = Path(__file__).parent.parent / "snapshots"
22
23 # Plotly is shipped via the optional [viz] extra. Skip the whole module
24 # when it's not importable — the install hint path is covered by the
25 # CLI test.
26 pytest.importorskip("plotly")
27
28
29 def _fixture_suite_and_score() -> tuple[SuiteResult, SwayScore]:
30 """Suite exercising every panel: section_internalization (SIS bars)
31 and adapter_ablation (response curve) both present."""
32 started = datetime(2026, 1, 1, 12, 0, 0, tzinfo=UTC)
33 finished = datetime(2026, 1, 1, 12, 0, 5, tzinfo=UTC)
34 probes = (
35 ProbeResult(
36 name="dk",
37 kind="delta_kl",
38 verdict=Verdict.PASS,
39 score=0.87,
40 raw=0.456,
41 z_score=5.12,
42 evidence={},
43 message="mean js=0.4560, z=+5.12σ vs null",
44 duration_s=0.1,
45 ),
46 ProbeResult(
47 name="sis",
48 kind="section_internalization",
49 verdict=Verdict.PASS,
50 score=0.70,
51 raw=0.14,
52 z_score=3.8,
53 evidence={
54 "per_section": [
55 {"section_id": "sec01", "effective_sis": 0.18, "passed": True},
56 {"section_id": "sec02", "effective_sis": 0.21, "passed": True},
57 {"section_id": "sec03", "effective_sis": 0.03, "passed": False},
58 {"section_id": "sec04", "effective_sis": 0.10, "passed": True},
59 ],
60 "num_sections": 4,
61 "passing_frac": 0.75,
62 },
63 message="3/4 sections cleared",
64 duration_s=0.3,
65 ),
66 ProbeResult(
67 name="abl",
68 kind="adapter_ablation",
69 verdict=Verdict.PASS,
70 score=0.75,
71 raw=0.92,
72 z_score=3.5,
73 evidence={
74 "lambdas": [0.0, 0.25, 0.5, 0.75, 1.0, 1.25],
75 "mean_divergence_per_lambda": [0.0, 0.05, 0.11, 0.16, 0.19, 0.20],
76 "linearity": 0.92,
77 "saturation_lambda": 0.75,
78 "saturation_reason": "found",
79 "overshoot": 1.05,
80 },
81 message="R²=0.92, sat_λ=0.75 (in band), overshoot=1.05",
82 duration_s=0.5,
83 ),
84 ProbeResult(
85 name="lk",
86 kind="leakage",
87 verdict=Verdict.SKIP,
88 score=None,
89 message="no PROSE sections to test for leakage",
90 duration_s=0.0,
91 ),
92 )
93 suite = SuiteResult(
94 spec_path="fixture.yaml",
95 started_at=started,
96 finished_at=finished,
97 base_model_id="HuggingFaceTB/SmolLM2-135M",
98 adapter_id="adapters/test/v1",
99 sway_version="0.1.0",
100 probes=probes,
101 )
102 score = SwayScore(
103 overall=0.77,
104 components={"adherence": 0.87, "attribution": 0.70, "calibration": 0.0, "ablation": 0.75},
105 weights={"adherence": 0.30, "attribution": 0.35, "calibration": 0.20, "ablation": 0.15},
106 band="healthy",
107 )
108 return suite, score
109
110
111 class _WellFormednessChecker(HTMLParser):
112 """Trivial subclass: we only use HTMLParser to *not raise*.
113
114 The stdlib parser is tolerant; the test is 'it doesn't blow up.'
115 Strict XHTML well-formedness isn't what the browser enforces.
116 """
117
118 def error(self, message: str) -> None: # pragma: no cover — never called with HTMLParser
119 raise AssertionError(f"HTMLParser rejected the output: {message}")
120
121
122 def _parse_ok(html_text: str) -> None:
123 parser = _WellFormednessChecker(convert_charrefs=True)
124 parser.feed(html_text)
125 parser.close()
126
127
128 class TestToHtml:
129 def test_parses_as_html(self) -> None:
130 suite, score = _fixture_suite_and_score()
131 out = report_html.to_html(suite, score)
132 _parse_ok(out)
133
134 def test_contains_all_probe_names(self) -> None:
135 suite, score = _fixture_suite_and_score()
136 out = report_html.to_html(suite, score)
137 for name in ("dk", "sis", "abl", "lk"):
138 assert name in out, f"probe {name!r} not in HTML"
139
140 def test_contains_all_five_panel_divs(self) -> None:
141 suite, score = _fixture_suite_and_score()
142 out = report_html.to_html(suite, score)
143 for div_id in ("sway-gauge", "sway-category", "sway-sis", "sway-ablation", "sway-scatter"):
144 assert f'id="{div_id}"' in out, f"panel div {div_id!r} missing"
145
146 def test_plotly_js_inlined_once(self) -> None:
147 """The ~3 MB Plotly bundle is embedded, not linked externally.
148
149 Guard: no ``<script src="http..."`` tags exist — everything
150 loads from the inline bundle so the page works offline.
151 Plotly's bundle body *does* carry the string ``cdn.plot.ly`` as
152 an internal default for mapbox config; that's data, not a fetch,
153 so we only care about ``<script src=...>`` tags.
154 """
155 suite, score = _fixture_suite_and_score()
156 out = report_html.to_html(suite, score)
157 external_scripts = re.findall(r'<script\s+[^>]*src\s*=\s*["\'](https?:[^"\']+)["\']', out)
158 assert external_scripts == [], (
159 f"HTML pulls in external scripts (should all be inlined): {external_scripts}"
160 )
161 # Sanity: output is >1 MB (JS bundle is ~3-5 MB — gives us room
162 # if Plotly slims down a bit between releases).
163 assert len(out) > 1_000_000, f"HTML output suspiciously small: {len(out)} bytes"
164
165 def test_no_sis_panel_when_probe_absent(self) -> None:
166 """A suite without section_internalization skips the SIS panel but
167 still renders the other four."""
168 suite, score = _fixture_suite_and_score()
169 pruned_probes = tuple(p for p in suite.probes if p.kind != "section_internalization")
170 suite = SuiteResult(
171 spec_path=suite.spec_path,
172 started_at=suite.started_at,
173 finished_at=suite.finished_at,
174 base_model_id=suite.base_model_id,
175 adapter_id=suite.adapter_id,
176 sway_version=suite.sway_version,
177 probes=pruned_probes,
178 )
179 out = report_html.to_html(suite, score)
180 assert 'id="sway-sis"' not in out
181 assert 'id="sway-ablation"' in out
182 assert 'id="sway-scatter"' in out
183
184 def test_zero_probe_suite_still_renders(self) -> None:
185 """Empty probes — gauge/category/scatter still emit; no crashes."""
186 started = datetime(2026, 1, 1, 12, 0, 0, tzinfo=UTC)
187 suite = SuiteResult(
188 spec_path="empty.yaml",
189 started_at=started,
190 finished_at=started,
191 base_model_id="base",
192 adapter_id="",
193 sway_version="0.1.0",
194 probes=(),
195 )
196 score = SwayScore(overall=0.0, components={}, band="noise")
197 out = report_html.to_html(suite, score)
198 _parse_ok(out)
199 assert 'id="sway-gauge"' in out
200 assert "no probes ran" in out
201
202 def test_raises_when_plotly_missing(self, monkeypatch: pytest.MonkeyPatch) -> None:
203 """Simulated ImportError surfaces the install hint."""
204 import builtins
205
206 real_import = builtins.__import__
207
208 def fake_import(name, *args, **kwargs): # type: ignore[no-untyped-def]
209 if name.startswith("plotly"):
210 raise ImportError("simulated missing plotly")
211 return real_import(name, *args, **kwargs)
212
213 monkeypatch.setattr(builtins, "__import__", fake_import)
214 suite, score = _fixture_suite_and_score()
215 with pytest.raises(RuntimeError, match=r"plotly.*\[viz\]"):
216 report_html.to_html(suite, score)
217
218
219 class TestWrapperSnapshot:
220 """Snapshot the Sway-owned wrapper, strip the Plotly bundle JS so the
221 snapshot doesn't churn on Plotly point releases.
222 """
223
224 #: Matches the single ``<script>...plotly_bundle...</script>`` we emit
225 #: in ``<head>``. Plotly's per-figure scripts live in the body and
226 #: carry the stable chart data — those we *do* want in the snapshot.
227 _HEAD_SCRIPT_RE = re.compile(
228 r'<script type="text/javascript">\s*/\*\*.*?</script>',
229 re.DOTALL,
230 )
231
232 def test_snapshot(self) -> None:
233 """Run
234 ``SWAY_UPDATE_SNAPSHOTS=1 uv run pytest tests/unit/test_report_html.py``
235 to regenerate after an intentional wrapper change. Plotly JS
236 bundle bumps should NOT drift this — it's stripped before compare.
237 """
238 suite, score = _fixture_suite_and_score()
239 raw = report_html.to_html(suite, score)
240
241 # Strip the Plotly JS bundle; confirm we actually removed it.
242 stripped = self._HEAD_SCRIPT_RE.sub(
243 '<script type="text/javascript">/* plotly bundle — stripped for snapshot */</script>',
244 raw,
245 count=1,
246 )
247 assert stripped != raw, (
248 "failed to strip the Plotly JS bundle from the head — regex didn't match"
249 )
250 # Further shrink: replace per-figure config UUIDs (Plotly sprinkles
251 # `"uuid": "..."` in some payloads) to keep snapshot stable across
252 # minor Plotly versions.
253 stripped = re.sub(r'"uid": ?"[^"]*"', '"uid": "<stripped>"', stripped)
254
255 path = SNAPSHOT_DIR / "report.html"
256 if os.environ.get("SWAY_UPDATE_SNAPSHOTS") == "1" or not path.exists():
257 path.parent.mkdir(parents=True, exist_ok=True)
258 path.write_text(stripped, encoding="utf-8")
259 pytest.skip(
260 "snapshot report.html written — re-run without SWAY_UPDATE_SNAPSHOTS to verify"
261 )
262 expected = path.read_text(encoding="utf-8")
263 assert stripped == expected, (
264 "report.html drifted from snapshot.\n"
265 "To accept the new output intentionally, run:\n"
266 " SWAY_UPDATE_SNAPSHOTS=1 uv run pytest tests/unit/test_report_html.py\n"
267 "and commit the updated file.\n"
268 )