@@ -0,0 +1,268 @@ |
| 1 | +"""Tests for :mod:`dlm_sway.suite.report_html` (S12 / F6).""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import os |
| 6 | +import re |
| 7 | +from datetime import UTC, datetime |
| 8 | +from html.parser import HTMLParser |
| 9 | +from pathlib import Path |
| 10 | + |
| 11 | +import pytest |
| 12 | + |
| 13 | +from dlm_sway.core.result import ( |
| 14 | + ProbeResult, |
| 15 | + SuiteResult, |
| 16 | + SwayScore, |
| 17 | + Verdict, |
| 18 | +) |
| 19 | +from dlm_sway.suite import report_html |
| 20 | + |
| 21 | +SNAPSHOT_DIR = Path(__file__).parent.parent / "snapshots" |
| 22 | + |
| 23 | +# Plotly is shipped via the optional [viz] extra. Skip the whole module |
| 24 | +# when it's not importable — the install hint path is covered by the |
| 25 | +# CLI test. |
| 26 | +pytest.importorskip("plotly") |
| 27 | + |
| 28 | + |
| 29 | +def _fixture_suite_and_score() -> tuple[SuiteResult, SwayScore]: |
| 30 | + """Suite exercising every panel: section_internalization (SIS bars) |
| 31 | + and adapter_ablation (response curve) both present.""" |
| 32 | + started = datetime(2026, 1, 1, 12, 0, 0, tzinfo=UTC) |
| 33 | + finished = datetime(2026, 1, 1, 12, 0, 5, tzinfo=UTC) |
| 34 | + probes = ( |
| 35 | + ProbeResult( |
| 36 | + name="dk", |
| 37 | + kind="delta_kl", |
| 38 | + verdict=Verdict.PASS, |
| 39 | + score=0.87, |
| 40 | + raw=0.456, |
| 41 | + z_score=5.12, |
| 42 | + evidence={}, |
| 43 | + message="mean js=0.4560, z=+5.12σ vs null", |
| 44 | + duration_s=0.1, |
| 45 | + ), |
| 46 | + ProbeResult( |
| 47 | + name="sis", |
| 48 | + kind="section_internalization", |
| 49 | + verdict=Verdict.PASS, |
| 50 | + score=0.70, |
| 51 | + raw=0.14, |
| 52 | + z_score=3.8, |
| 53 | + evidence={ |
| 54 | + "per_section": [ |
| 55 | + {"section_id": "sec01", "effective_sis": 0.18, "passed": True}, |
| 56 | + {"section_id": "sec02", "effective_sis": 0.21, "passed": True}, |
| 57 | + {"section_id": "sec03", "effective_sis": 0.03, "passed": False}, |
| 58 | + {"section_id": "sec04", "effective_sis": 0.10, "passed": True}, |
| 59 | + ], |
| 60 | + "num_sections": 4, |
| 61 | + "passing_frac": 0.75, |
| 62 | + }, |
| 63 | + message="3/4 sections cleared", |
| 64 | + duration_s=0.3, |
| 65 | + ), |
| 66 | + ProbeResult( |
| 67 | + name="abl", |
| 68 | + kind="adapter_ablation", |
| 69 | + verdict=Verdict.PASS, |
| 70 | + score=0.75, |
| 71 | + raw=0.92, |
| 72 | + z_score=3.5, |
| 73 | + evidence={ |
| 74 | + "lambdas": [0.0, 0.25, 0.5, 0.75, 1.0, 1.25], |
| 75 | + "mean_divergence_per_lambda": [0.0, 0.05, 0.11, 0.16, 0.19, 0.20], |
| 76 | + "linearity": 0.92, |
| 77 | + "saturation_lambda": 0.75, |
| 78 | + "saturation_reason": "found", |
| 79 | + "overshoot": 1.05, |
| 80 | + }, |
| 81 | + message="R²=0.92, sat_λ=0.75 (in band), overshoot=1.05", |
| 82 | + duration_s=0.5, |
| 83 | + ), |
| 84 | + ProbeResult( |
| 85 | + name="lk", |
| 86 | + kind="leakage", |
| 87 | + verdict=Verdict.SKIP, |
| 88 | + score=None, |
| 89 | + message="no PROSE sections to test for leakage", |
| 90 | + duration_s=0.0, |
| 91 | + ), |
| 92 | + ) |
| 93 | + suite = SuiteResult( |
| 94 | + spec_path="fixture.yaml", |
| 95 | + started_at=started, |
| 96 | + finished_at=finished, |
| 97 | + base_model_id="HuggingFaceTB/SmolLM2-135M", |
| 98 | + adapter_id="adapters/test/v1", |
| 99 | + sway_version="0.1.0", |
| 100 | + probes=probes, |
| 101 | + ) |
| 102 | + score = SwayScore( |
| 103 | + overall=0.77, |
| 104 | + components={"adherence": 0.87, "attribution": 0.70, "calibration": 0.0, "ablation": 0.75}, |
| 105 | + weights={"adherence": 0.30, "attribution": 0.35, "calibration": 0.20, "ablation": 0.15}, |
| 106 | + band="healthy", |
| 107 | + ) |
| 108 | + return suite, score |
| 109 | + |
| 110 | + |
| 111 | +class _WellFormednessChecker(HTMLParser): |
| 112 | + """Trivial subclass: we only use HTMLParser to *not raise*. |
| 113 | + |
| 114 | + The stdlib parser is tolerant; the test is 'it doesn't blow up.' |
| 115 | + Strict XHTML well-formedness isn't what the browser enforces. |
| 116 | + """ |
| 117 | + |
| 118 | + def error(self, message: str) -> None: # pragma: no cover — never called with HTMLParser |
| 119 | + raise AssertionError(f"HTMLParser rejected the output: {message}") |
| 120 | + |
| 121 | + |
| 122 | +def _parse_ok(html_text: str) -> None: |
| 123 | + parser = _WellFormednessChecker(convert_charrefs=True) |
| 124 | + parser.feed(html_text) |
| 125 | + parser.close() |
| 126 | + |
| 127 | + |
| 128 | +class TestToHtml: |
| 129 | + def test_parses_as_html(self) -> None: |
| 130 | + suite, score = _fixture_suite_and_score() |
| 131 | + out = report_html.to_html(suite, score) |
| 132 | + _parse_ok(out) |
| 133 | + |
| 134 | + def test_contains_all_probe_names(self) -> None: |
| 135 | + suite, score = _fixture_suite_and_score() |
| 136 | + out = report_html.to_html(suite, score) |
| 137 | + for name in ("dk", "sis", "abl", "lk"): |
| 138 | + assert name in out, f"probe {name!r} not in HTML" |
| 139 | + |
| 140 | + def test_contains_all_five_panel_divs(self) -> None: |
| 141 | + suite, score = _fixture_suite_and_score() |
| 142 | + out = report_html.to_html(suite, score) |
| 143 | + for div_id in ("sway-gauge", "sway-category", "sway-sis", "sway-ablation", "sway-scatter"): |
| 144 | + assert f'id="{div_id}"' in out, f"panel div {div_id!r} missing" |
| 145 | + |
| 146 | + def test_plotly_js_inlined_once(self) -> None: |
| 147 | + """The ~3 MB Plotly bundle is embedded, not linked externally. |
| 148 | + |
| 149 | + Guard: no ``<script src="http..."`` tags exist — everything |
| 150 | + loads from the inline bundle so the page works offline. |
| 151 | + Plotly's bundle body *does* carry the string ``cdn.plot.ly`` as |
| 152 | + an internal default for mapbox config; that's data, not a fetch, |
| 153 | + so we only care about ``<script src=...>`` tags. |
| 154 | + """ |
| 155 | + suite, score = _fixture_suite_and_score() |
| 156 | + out = report_html.to_html(suite, score) |
| 157 | + external_scripts = re.findall(r'<script\s+[^>]*src\s*=\s*["\'](https?:[^"\']+)["\']', out) |
| 158 | + assert external_scripts == [], ( |
| 159 | + f"HTML pulls in external scripts (should all be inlined): {external_scripts}" |
| 160 | + ) |
| 161 | + # Sanity: output is >1 MB (JS bundle is ~3-5 MB — gives us room |
| 162 | + # if Plotly slims down a bit between releases). |
| 163 | + assert len(out) > 1_000_000, f"HTML output suspiciously small: {len(out)} bytes" |
| 164 | + |
| 165 | + def test_no_sis_panel_when_probe_absent(self) -> None: |
| 166 | + """A suite without section_internalization skips the SIS panel but |
| 167 | + still renders the other four.""" |
| 168 | + suite, score = _fixture_suite_and_score() |
| 169 | + pruned_probes = tuple(p for p in suite.probes if p.kind != "section_internalization") |
| 170 | + suite = SuiteResult( |
| 171 | + spec_path=suite.spec_path, |
| 172 | + started_at=suite.started_at, |
| 173 | + finished_at=suite.finished_at, |
| 174 | + base_model_id=suite.base_model_id, |
| 175 | + adapter_id=suite.adapter_id, |
| 176 | + sway_version=suite.sway_version, |
| 177 | + probes=pruned_probes, |
| 178 | + ) |
| 179 | + out = report_html.to_html(suite, score) |
| 180 | + assert 'id="sway-sis"' not in out |
| 181 | + assert 'id="sway-ablation"' in out |
| 182 | + assert 'id="sway-scatter"' in out |
| 183 | + |
| 184 | + def test_zero_probe_suite_still_renders(self) -> None: |
| 185 | + """Empty probes — gauge/category/scatter still emit; no crashes.""" |
| 186 | + started = datetime(2026, 1, 1, 12, 0, 0, tzinfo=UTC) |
| 187 | + suite = SuiteResult( |
| 188 | + spec_path="empty.yaml", |
| 189 | + started_at=started, |
| 190 | + finished_at=started, |
| 191 | + base_model_id="base", |
| 192 | + adapter_id="", |
| 193 | + sway_version="0.1.0", |
| 194 | + probes=(), |
| 195 | + ) |
| 196 | + score = SwayScore(overall=0.0, components={}, band="noise") |
| 197 | + out = report_html.to_html(suite, score) |
| 198 | + _parse_ok(out) |
| 199 | + assert 'id="sway-gauge"' in out |
| 200 | + assert "no probes ran" in out |
| 201 | + |
| 202 | + def test_raises_when_plotly_missing(self, monkeypatch: pytest.MonkeyPatch) -> None: |
| 203 | + """Simulated ImportError surfaces the install hint.""" |
| 204 | + import builtins |
| 205 | + |
| 206 | + real_import = builtins.__import__ |
| 207 | + |
| 208 | + def fake_import(name, *args, **kwargs): # type: ignore[no-untyped-def] |
| 209 | + if name.startswith("plotly"): |
| 210 | + raise ImportError("simulated missing plotly") |
| 211 | + return real_import(name, *args, **kwargs) |
| 212 | + |
| 213 | + monkeypatch.setattr(builtins, "__import__", fake_import) |
| 214 | + suite, score = _fixture_suite_and_score() |
| 215 | + with pytest.raises(RuntimeError, match=r"plotly.*\[viz\]"): |
| 216 | + report_html.to_html(suite, score) |
| 217 | + |
| 218 | + |
| 219 | +class TestWrapperSnapshot: |
| 220 | + """Snapshot the Sway-owned wrapper, strip the Plotly bundle JS so the |
| 221 | + snapshot doesn't churn on Plotly point releases. |
| 222 | + """ |
| 223 | + |
| 224 | + #: Matches the single ``<script>...plotly_bundle...</script>`` we emit |
| 225 | + #: in ``<head>``. Plotly's per-figure scripts live in the body and |
| 226 | + #: carry the stable chart data — those we *do* want in the snapshot. |
| 227 | + _HEAD_SCRIPT_RE = re.compile( |
| 228 | + r'<script type="text/javascript">\s*/\*\*.*?</script>', |
| 229 | + re.DOTALL, |
| 230 | + ) |
| 231 | + |
| 232 | + def test_snapshot(self) -> None: |
| 233 | + """Run |
| 234 | + ``SWAY_UPDATE_SNAPSHOTS=1 uv run pytest tests/unit/test_report_html.py`` |
| 235 | + to regenerate after an intentional wrapper change. Plotly JS |
| 236 | + bundle bumps should NOT drift this — it's stripped before compare. |
| 237 | + """ |
| 238 | + suite, score = _fixture_suite_and_score() |
| 239 | + raw = report_html.to_html(suite, score) |
| 240 | + |
| 241 | + # Strip the Plotly JS bundle; confirm we actually removed it. |
| 242 | + stripped = self._HEAD_SCRIPT_RE.sub( |
| 243 | + '<script type="text/javascript">/* plotly bundle — stripped for snapshot */</script>', |
| 244 | + raw, |
| 245 | + count=1, |
| 246 | + ) |
| 247 | + assert stripped != raw, ( |
| 248 | + "failed to strip the Plotly JS bundle from the head — regex didn't match" |
| 249 | + ) |
| 250 | + # Further shrink: replace per-figure config UUIDs (Plotly sprinkles |
| 251 | + # `"uuid": "..."` in some payloads) to keep snapshot stable across |
| 252 | + # minor Plotly versions. |
| 253 | + stripped = re.sub(r'"uid": ?"[^"]*"', '"uid": "<stripped>"', stripped) |
| 254 | + |
| 255 | + path = SNAPSHOT_DIR / "report.html" |
| 256 | + if os.environ.get("SWAY_UPDATE_SNAPSHOTS") == "1" or not path.exists(): |
| 257 | + path.parent.mkdir(parents=True, exist_ok=True) |
| 258 | + path.write_text(stripped, encoding="utf-8") |
| 259 | + pytest.skip( |
| 260 | + "snapshot report.html written — re-run without SWAY_UPDATE_SNAPSHOTS to verify" |
| 261 | + ) |
| 262 | + expected = path.read_text(encoding="utf-8") |
| 263 | + assert stripped == expected, ( |
| 264 | + "report.html drifted from snapshot.\n" |
| 265 | + "To accept the new output intentionally, run:\n" |
| 266 | + " SWAY_UPDATE_SNAPSHOTS=1 uv run pytest tests/unit/test_report_html.py\n" |
| 267 | + "and commit the updated file.\n" |
| 268 | + ) |