Python · 15852 bytes Raw Blame History
1 """Interactive single-file HTML report (S12 / F6).
2
3 The terminal renderer is right for CI logs; markdown is right for
4 checked-in PR artifacts. Neither supports *exploration* — clicking
5 through per-section SIS bars, hovering over the ablation curve to
6 read exact λ values, zooming into the probe scatter. This module
7 produces a self-contained HTML page with four interactive Plotly
8 panels for the research / write-up case:
9
10 1. Composite score gauge + per-category breakdown.
11 2. Per-section SIS bar chart (when ``section_internalization`` ran).
12 3. Adapter-ablation response curve (when ``adapter_ablation`` ran).
13 4. All-probe score + z-score scatter with hover tooltips.
14
15 Plotly's JS bundle is inlined once in ``<head>``; each panel's
16 ``<div>`` gets a stable id so snapshot tests don't churn on every
17 render. The output is typically ~3.6 MB (Plotly is ~3 MB of that)
18 and loads with zero network calls.
19
20 ``plotly`` is an *optional* dependency shipped via the ``[viz]`` extra.
21 When it's not importable, :func:`to_html` raises ``RuntimeError``
22 with an install hint the CLI can surface.
23 """
24
25 from __future__ import annotations
26
27 import html
28 from typing import Any
29
30 from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict
31
32 #: Palette used across panels. Matches the terminal verdict colors so a
33 #: user scanning the HTML mapping back to the `sway run` output sees
34 #: the same color grammar.
35 _VERDICT_COLOR: dict[Verdict, str] = {
36 Verdict.PASS: "#28a745",
37 Verdict.FAIL: "#dc3545",
38 Verdict.WARN: "#ffc107",
39 Verdict.SKIP: "#6c757d",
40 Verdict.ERROR: "#9c27b0",
41 }
42
43 #: Division colors for the category bars — same per-component palette
44 #: we publish in the README.
45 _CATEGORY_COLOR: dict[str, str] = {
46 "adherence": "#0d6efd",
47 "attribution": "#198754",
48 "calibration": "#fd7e14",
49 "ablation": "#6f42c1",
50 "baseline": "#adb5bd",
51 }
52
53 #: Stable div IDs so the emitted HTML is byte-identical for the same
54 #: inputs. Plotly defaults to a random UUID per figure otherwise.
55 _DIV_GAUGE = "sway-gauge"
56 _DIV_CATEGORY = "sway-category"
57 _DIV_SIS = "sway-sis"
58 _DIV_ABLATION = "sway-ablation"
59 _DIV_SCATTER = "sway-scatter"
60
61
62 # ----------------------------------------------------------------------
63 # Entry point
64 # ----------------------------------------------------------------------
65
66
67 def to_html(suite: SuiteResult, score: SwayScore) -> str:
68 """Render a ``SuiteResult``/``SwayScore`` pair as a self-contained HTML page.
69
70 Raises
71 ------
72 RuntimeError
73 When ``plotly`` is not importable. The CLI catches this and
74 surfaces an install hint (``pip install 'dlm-sway[viz]'``).
75 """
76 try:
77 import plotly.graph_objects as go
78 import plotly.io as pio
79 from plotly.offline import get_plotlyjs
80 except ImportError as exc:
81 raise RuntimeError(
82 "plotly is required for --format html. Install with: pip install 'dlm-sway[viz]'"
83 ) from exc
84
85 gauge_fig = _gauge_figure(go, score)
86 category_fig = _category_figure(go, score)
87 sis_fig = _sis_figure(go, suite)
88 ablation_fig = _ablation_figure(go, suite)
89 scatter_fig = _scatter_figure(go, suite)
90
91 panels: list[tuple[str, str, str]] = [
92 ("Composite score", _DIV_GAUGE, _fig_to_div(pio, gauge_fig, _DIV_GAUGE)),
93 ("Category breakdown", _DIV_CATEGORY, _fig_to_div(pio, category_fig, _DIV_CATEGORY)),
94 ]
95 if sis_fig is not None:
96 panels.append(
97 ("Per-section internalization", _DIV_SIS, _fig_to_div(pio, sis_fig, _DIV_SIS))
98 )
99 if ablation_fig is not None:
100 panels.append(
101 (
102 "Adapter-ablation response",
103 _DIV_ABLATION,
104 _fig_to_div(pio, ablation_fig, _DIV_ABLATION),
105 )
106 )
107 panels.append(
108 ("Per-probe score vs. z-score", _DIV_SCATTER, _fig_to_div(pio, scatter_fig, _DIV_SCATTER))
109 )
110
111 return _assemble(suite, score, panels, plotly_js=get_plotlyjs())
112
113
114 # ----------------------------------------------------------------------
115 # Figures
116 # ----------------------------------------------------------------------
117
118
119 def _gauge_figure(go: Any, score: SwayScore) -> Any:
120 """Indicator gauge for the composite score, 0..1 with banded thresholds."""
121 overall = float(score.overall) if score.overall is not None else 0.0
122 return go.Figure(
123 go.Indicator(
124 mode="gauge+number",
125 value=overall,
126 number={"valueformat": ".2f", "font": {"size": 48}},
127 gauge={
128 "axis": {"range": [0.0, 1.0]},
129 "bar": {"color": _band_color(score.band)},
130 "steps": [
131 {"range": [0.00, 0.30], "color": "#f8d7da"}, # noise
132 {"range": [0.30, 0.60], "color": "#fff3cd"}, # partial
133 {"range": [0.60, 0.85], "color": "#d1e7dd"}, # healthy
134 {"range": [0.85, 1.00], "color": "#e2e3ff"}, # suspicious
135 ],
136 },
137 title={"text": f"<b>{score.band or 'unscored'}</b>"},
138 ),
139 layout=go.Layout(height=320, margin={"l": 20, "r": 20, "t": 60, "b": 20}),
140 )
141
142
143 def _category_figure(go: Any, score: SwayScore) -> Any:
144 """Horizontal bar chart of per-category contributions."""
145 items = [(cat, float(v)) for cat, v in score.components.items()]
146 items.sort(key=lambda pair: pair[0])
147 labels = [cat for cat, _ in items]
148 values = [v for _, v in items]
149 colors = [_CATEGORY_COLOR.get(cat, "#888888") for cat in labels]
150 return go.Figure(
151 go.Bar(
152 x=values,
153 y=labels,
154 orientation="h",
155 marker={"color": colors},
156 hovertemplate="%{y}: %{x:.3f}<extra></extra>",
157 ),
158 layout=go.Layout(
159 xaxis={"range": [0.0, 1.0], "title": "component score"},
160 yaxis={"title": ""},
161 height=260,
162 margin={"l": 100, "r": 20, "t": 30, "b": 40},
163 ),
164 )
165
166
167 def _sis_figure(go: Any, suite: SuiteResult) -> Any | None:
168 """Per-section internalization bar chart. ``None`` if no data."""
169 probe = _first_probe_of_kind(suite, "section_internalization")
170 if probe is None:
171 return None
172 per_section = probe.evidence.get("per_section")
173 if not per_section or not isinstance(per_section, list):
174 return None
175 labels = [str(row.get("section_id") or row.get("tag") or "?") for row in per_section]
176 values = [float(row.get("effective_sis", 0.0)) for row in per_section]
177 passed = [bool(row.get("passed")) for row in per_section]
178 colors = [_VERDICT_COLOR[Verdict.PASS if p else Verdict.FAIL] for p in passed]
179 return go.Figure(
180 go.Bar(
181 x=labels,
182 y=values,
183 marker={"color": colors},
184 hovertemplate="<b>%{x}</b><br>effective_sis=%{y:.3f}<extra></extra>",
185 ),
186 layout=go.Layout(
187 xaxis={"title": "section"},
188 yaxis={"title": "effective_sis (own - leak)"},
189 height=320,
190 margin={"l": 60, "r": 20, "t": 30, "b": 80},
191 ),
192 )
193
194
195 def _ablation_figure(go: Any, suite: SuiteResult) -> Any | None:
196 """λ vs. divergence response curve. ``None`` if ablation didn't run."""
197 probe = _first_probe_of_kind(suite, "adapter_ablation")
198 if probe is None:
199 return None
200 lambdas = probe.evidence.get("lambdas")
201 divs = probe.evidence.get("mean_divergence_per_lambda")
202 if not lambdas or not divs:
203 return None
204 sat = probe.evidence.get("saturation_lambda")
205 fig = go.Figure(
206 go.Scatter(
207 x=list(lambdas),
208 y=list(divs),
209 mode="lines+markers",
210 marker={"color": _CATEGORY_COLOR["ablation"], "size": 10},
211 line={"color": _CATEGORY_COLOR["ablation"], "width": 2},
212 hovertemplate="λ=%{x}<br>div=%{y:.4f}<extra></extra>",
213 name="divergence",
214 ),
215 layout=go.Layout(
216 xaxis={"title": "lambda"},
217 yaxis={"title": "mean divergence"},
218 height=320,
219 margin={"l": 60, "r": 20, "t": 30, "b": 50},
220 ),
221 )
222 if sat is not None:
223 fig.add_vline(
224 x=float(sat),
225 line_dash="dash",
226 line_color="#6c757d",
227 annotation_text=f"sat_λ={float(sat):.2f}",
228 annotation_position="top",
229 )
230 return fig
231
232
233 def _scatter_figure(go: Any, suite: SuiteResult) -> Any:
234 """Score vs. z-score scatter across every probe, colored by verdict."""
235 xs: list[float] = []
236 ys: list[float] = []
237 texts: list[str] = []
238 colors: list[str] = []
239 for p in suite.probes:
240 # Plot only probes with a numeric score; SKIP / ERROR probes
241 # without a score are summarized in the per-row annotation instead
242 # of cluttering the scatter at (0,0).
243 if p.score is None:
244 continue
245 xs.append(float(p.score))
246 ys.append(float(p.z_score) if p.z_score is not None else 0.0)
247 texts.append(
248 f"<b>{html.escape(p.name)}</b><br>"
249 f"kind: {html.escape(p.kind)}<br>"
250 f"verdict: {p.verdict.value}<br>"
251 f"score: {p.score:.3f}<br>"
252 f"z: {'—' if p.z_score is None else f'{p.z_score:+.2f}σ'}"
253 )
254 colors.append(_VERDICT_COLOR.get(p.verdict, "#888888"))
255 return go.Figure(
256 go.Scatter(
257 x=xs,
258 y=ys,
259 mode="markers",
260 marker={"size": 14, "color": colors, "line": {"color": "#333", "width": 1}},
261 text=texts,
262 hovertemplate="%{text}<extra></extra>",
263 ),
264 layout=go.Layout(
265 xaxis={"title": "score", "range": [0.0, 1.0]},
266 yaxis={"title": "z-score (σ)", "zeroline": True},
267 height=360,
268 margin={"l": 60, "r": 20, "t": 30, "b": 50},
269 ),
270 )
271
272
273 # ----------------------------------------------------------------------
274 # Assembly
275 # ----------------------------------------------------------------------
276
277
278 def _fig_to_div(pio: Any, fig: Any, div_id: str) -> str:
279 """Render one figure as a div, reusing the JS we embed once in <head>."""
280 return str(
281 pio.to_html(
282 fig,
283 include_plotlyjs=False,
284 full_html=False,
285 div_id=div_id,
286 config={"displaylogo": False, "responsive": True},
287 )
288 )
289
290
291 def _assemble(
292 suite: SuiteResult,
293 score: SwayScore,
294 panels: list[tuple[str, str, str]],
295 *,
296 plotly_js: str,
297 ) -> str:
298 """Stitch the page together: header card, panels, probe table."""
299 title = html.escape(f"sway report — {suite.adapter_id or suite.base_model_id}")
300 verdict_summary = _verdict_summary(suite)
301 header = (
302 f"<h1>{title}</h1>"
303 f"<p class='meta'>"
304 f"base: <code>{html.escape(suite.base_model_id)}</code> · "
305 f"adapter: <code>{html.escape(suite.adapter_id or '—')}</code> · "
306 f"sway {html.escape(suite.sway_version)} · "
307 f"wall: {suite.wall_seconds:.2f}s"
308 f"</p>"
309 f"<p class='summary'><b>overall</b>: {score.overall:.2f} "
310 f"({html.escape(score.band or '—')})"
311 f" · {verdict_summary}</p>"
312 )
313
314 panel_html_parts: list[str] = []
315 for title_, div_id, div_html in panels:
316 panel_html_parts.append(
317 f"<section class='panel' id='panel-{html.escape(div_id)}'>"
318 f"<h2>{html.escape(title_)}</h2>"
319 f"{div_html}"
320 f"</section>"
321 )
322
323 probe_table = _probe_table_html(suite)
324
325 return _TEMPLATE.format(
326 title=title,
327 plotly_js=plotly_js,
328 header=header,
329 panels="\n".join(panel_html_parts),
330 probe_table=probe_table,
331 )
332
333
334 def _verdict_summary(suite: SuiteResult) -> str:
335 counts: dict[Verdict, int] = {}
336 for p in suite.probes:
337 counts[p.verdict] = counts.get(p.verdict, 0) + 1
338 parts = []
339 for v in (Verdict.PASS, Verdict.FAIL, Verdict.WARN, Verdict.SKIP, Verdict.ERROR):
340 if v in counts:
341 parts.append(f"<span class='v-{v.value}'>{counts[v]} {html.escape(v.value)}</span>")
342 return " · ".join(parts) or "no probes ran"
343
344
345 def _probe_table_html(suite: SuiteResult) -> str:
346 """Textual per-probe table under the charts — same columns as markdown."""
347 rows: list[str] = []
348 for p in suite.probes:
349 rows.append(
350 "<tr>"
351 f"<td>{html.escape(p.name)}</td>"
352 f"<td><code>{html.escape(p.kind)}</code></td>"
353 f"<td class='v-{p.verdict.value}'>{html.escape(p.verdict.value)}</td>"
354 f"<td>{'—' if p.score is None else f'{p.score:.2f}'}</td>"
355 f"<td>{'—' if p.raw is None else f'{p.raw:,.3f}'}</td>"
356 f"<td>{'—' if p.z_score is None else f'{p.z_score:+.2f}σ'}</td>"
357 f"<td class='note'>{html.escape(p.message or '')}</td>"
358 "</tr>"
359 )
360 return (
361 "<section class='probe-table'>"
362 "<h2>Probes</h2>"
363 "<table>"
364 "<thead><tr><th>name</th><th>kind</th><th>verdict</th>"
365 "<th>score</th><th>raw</th><th>z</th><th>note</th></tr></thead>"
366 "<tbody>" + "".join(rows) + "</tbody></table></section>"
367 )
368
369
370 def _first_probe_of_kind(suite: SuiteResult, kind: str) -> ProbeResult | None:
371 for p in suite.probes:
372 if p.kind == kind and p.score is not None:
373 return p
374 return None
375
376
377 def _band_color(band: str) -> str:
378 return {
379 "noise": "#dc3545",
380 "partial": "#ffc107",
381 "healthy": "#28a745",
382 "suspicious": "#9c27b0",
383 }.get(band, "#6c757d")
384
385
386 # ----------------------------------------------------------------------
387 # Static template. Kept inline (no separate template file) because a
388 # single page has one consumer and a two-file split would be more
389 # ceremony than it's worth at this scale.
390 # ----------------------------------------------------------------------
391
392 _TEMPLATE = """<!doctype html>
393 <html lang="en">
394 <head>
395 <meta charset="utf-8">
396 <meta name="generator" content="sway report html">
397 <title>{title}</title>
398 <style>
399 body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
400 margin: 0; padding: 2rem; color: #222; background: #fafafa; max-width: 1100px;
401 margin-left: auto; margin-right: auto; }}
402 h1 {{ margin-bottom: 0.25rem; }}
403 p.meta {{ color: #666; margin-top: 0; }}
404 p.summary {{ font-size: 1.1rem; margin-top: 0.5rem; }}
405 section.panel {{ background: #fff; border: 1px solid #e0e0e0; border-radius: 6px;
406 padding: 1rem; margin-top: 1rem; }}
407 section.panel h2 {{ margin-top: 0; font-size: 1.1rem; color: #333; }}
408 section.probe-table {{ background: #fff; border: 1px solid #e0e0e0; border-radius: 6px;
409 padding: 1rem; margin-top: 1rem; }}
410 section.probe-table table {{ border-collapse: collapse; width: 100%; font-size: 0.9rem; }}
411 section.probe-table th, section.probe-table td {{ padding: 0.4rem 0.6rem;
412 border-bottom: 1px solid #eee;
413 text-align: left; vertical-align: top; }}
414 section.probe-table td.note {{ color: #555; }}
415 .v-pass {{ color: #28a745; font-weight: bold; }}
416 .v-fail {{ color: #dc3545; font-weight: bold; }}
417 .v-warn {{ color: #c98a00; font-weight: bold; }}
418 .v-skip {{ color: #6c757d; }}
419 .v-error {{ color: #9c27b0; font-weight: bold; }}
420 code {{ font-family: 'Menlo', 'Consolas', monospace; font-size: 0.9em;
421 background: #f0f0f0; padding: 0.05em 0.3em; border-radius: 3px; }}
422 </style>
423 <script type="text/javascript">{plotly_js}</script>
424 </head>
425 <body>
426 {header}
427 {panels}
428 {probe_table}
429 </body>
430 </html>
431 """
432
433
434 __all__ = ["to_html"]