| 1 | """Interactive single-file HTML report (S12 / F6). |
| 2 | |
| 3 | The terminal renderer is right for CI logs; markdown is right for |
| 4 | checked-in PR artifacts. Neither supports *exploration* — clicking |
| 5 | through per-section SIS bars, hovering over the ablation curve to |
| 6 | read exact λ values, zooming into the probe scatter. This module |
| 7 | produces a self-contained HTML page with four interactive Plotly |
| 8 | panels for the research / write-up case: |
| 9 | |
| 10 | 1. Composite score gauge + per-category breakdown. |
| 11 | 2. Per-section SIS bar chart (when ``section_internalization`` ran). |
| 12 | 3. Adapter-ablation response curve (when ``adapter_ablation`` ran). |
| 13 | 4. All-probe score + z-score scatter with hover tooltips. |
| 14 | |
| 15 | Plotly's JS bundle is inlined once in ``<head>``; each panel's |
| 16 | ``<div>`` gets a stable id so snapshot tests don't churn on every |
| 17 | render. The output is typically ~3.6 MB (Plotly is ~3 MB of that) |
| 18 | and loads with zero network calls. |
| 19 | |
| 20 | ``plotly`` is an *optional* dependency shipped via the ``[viz]`` extra. |
| 21 | When it's not importable, :func:`to_html` raises ``RuntimeError`` |
| 22 | with an install hint the CLI can surface. |
| 23 | """ |
| 24 | |
| 25 | from __future__ import annotations |
| 26 | |
| 27 | import html |
| 28 | from typing import Any |
| 29 | |
| 30 | from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict |
| 31 | |
| 32 | #: Palette used across panels. Matches the terminal verdict colors so a |
| 33 | #: user scanning the HTML mapping back to the `sway run` output sees |
| 34 | #: the same color grammar. |
| 35 | _VERDICT_COLOR: dict[Verdict, str] = { |
| 36 | Verdict.PASS: "#28a745", |
| 37 | Verdict.FAIL: "#dc3545", |
| 38 | Verdict.WARN: "#ffc107", |
| 39 | Verdict.SKIP: "#6c757d", |
| 40 | Verdict.ERROR: "#9c27b0", |
| 41 | } |
| 42 | |
| 43 | #: Division colors for the category bars — same per-component palette |
| 44 | #: we publish in the README. |
| 45 | _CATEGORY_COLOR: dict[str, str] = { |
| 46 | "adherence": "#0d6efd", |
| 47 | "attribution": "#198754", |
| 48 | "calibration": "#fd7e14", |
| 49 | "ablation": "#6f42c1", |
| 50 | "baseline": "#adb5bd", |
| 51 | } |
| 52 | |
| 53 | #: Stable div IDs so the emitted HTML is byte-identical for the same |
| 54 | #: inputs. Plotly defaults to a random UUID per figure otherwise. |
| 55 | _DIV_GAUGE = "sway-gauge" |
| 56 | _DIV_CATEGORY = "sway-category" |
| 57 | _DIV_SIS = "sway-sis" |
| 58 | _DIV_ABLATION = "sway-ablation" |
| 59 | _DIV_SCATTER = "sway-scatter" |
| 60 | |
| 61 | |
| 62 | # ---------------------------------------------------------------------- |
| 63 | # Entry point |
| 64 | # ---------------------------------------------------------------------- |
| 65 | |
| 66 | |
| 67 | def to_html(suite: SuiteResult, score: SwayScore) -> str: |
| 68 | """Render a ``SuiteResult``/``SwayScore`` pair as a self-contained HTML page. |
| 69 | |
| 70 | Raises |
| 71 | ------ |
| 72 | RuntimeError |
| 73 | When ``plotly`` is not importable. The CLI catches this and |
| 74 | surfaces an install hint (``pip install 'dlm-sway[viz]'``). |
| 75 | """ |
| 76 | try: |
| 77 | import plotly.graph_objects as go |
| 78 | import plotly.io as pio |
| 79 | from plotly.offline import get_plotlyjs |
| 80 | except ImportError as exc: |
| 81 | raise RuntimeError( |
| 82 | "plotly is required for --format html. Install with: pip install 'dlm-sway[viz]'" |
| 83 | ) from exc |
| 84 | |
| 85 | gauge_fig = _gauge_figure(go, score) |
| 86 | category_fig = _category_figure(go, score) |
| 87 | sis_fig = _sis_figure(go, suite) |
| 88 | ablation_fig = _ablation_figure(go, suite) |
| 89 | scatter_fig = _scatter_figure(go, suite) |
| 90 | |
| 91 | panels: list[tuple[str, str, str]] = [ |
| 92 | ("Composite score", _DIV_GAUGE, _fig_to_div(pio, gauge_fig, _DIV_GAUGE)), |
| 93 | ("Category breakdown", _DIV_CATEGORY, _fig_to_div(pio, category_fig, _DIV_CATEGORY)), |
| 94 | ] |
| 95 | if sis_fig is not None: |
| 96 | panels.append( |
| 97 | ("Per-section internalization", _DIV_SIS, _fig_to_div(pio, sis_fig, _DIV_SIS)) |
| 98 | ) |
| 99 | if ablation_fig is not None: |
| 100 | panels.append( |
| 101 | ( |
| 102 | "Adapter-ablation response", |
| 103 | _DIV_ABLATION, |
| 104 | _fig_to_div(pio, ablation_fig, _DIV_ABLATION), |
| 105 | ) |
| 106 | ) |
| 107 | panels.append( |
| 108 | ("Per-probe score vs. z-score", _DIV_SCATTER, _fig_to_div(pio, scatter_fig, _DIV_SCATTER)) |
| 109 | ) |
| 110 | |
| 111 | return _assemble(suite, score, panels, plotly_js=get_plotlyjs()) |
| 112 | |
| 113 | |
| 114 | # ---------------------------------------------------------------------- |
| 115 | # Figures |
| 116 | # ---------------------------------------------------------------------- |
| 117 | |
| 118 | |
| 119 | def _gauge_figure(go: Any, score: SwayScore) -> Any: |
| 120 | """Indicator gauge for the composite score, 0..1 with banded thresholds.""" |
| 121 | overall = float(score.overall) if score.overall is not None else 0.0 |
| 122 | return go.Figure( |
| 123 | go.Indicator( |
| 124 | mode="gauge+number", |
| 125 | value=overall, |
| 126 | number={"valueformat": ".2f", "font": {"size": 48}}, |
| 127 | gauge={ |
| 128 | "axis": {"range": [0.0, 1.0]}, |
| 129 | "bar": {"color": _band_color(score.band)}, |
| 130 | "steps": [ |
| 131 | {"range": [0.00, 0.30], "color": "#f8d7da"}, # noise |
| 132 | {"range": [0.30, 0.60], "color": "#fff3cd"}, # partial |
| 133 | {"range": [0.60, 0.85], "color": "#d1e7dd"}, # healthy |
| 134 | {"range": [0.85, 1.00], "color": "#e2e3ff"}, # suspicious |
| 135 | ], |
| 136 | }, |
| 137 | title={"text": f"<b>{score.band or 'unscored'}</b>"}, |
| 138 | ), |
| 139 | layout=go.Layout(height=320, margin={"l": 20, "r": 20, "t": 60, "b": 20}), |
| 140 | ) |
| 141 | |
| 142 | |
| 143 | def _category_figure(go: Any, score: SwayScore) -> Any: |
| 144 | """Horizontal bar chart of per-category contributions.""" |
| 145 | items = [(cat, float(v)) for cat, v in score.components.items()] |
| 146 | items.sort(key=lambda pair: pair[0]) |
| 147 | labels = [cat for cat, _ in items] |
| 148 | values = [v for _, v in items] |
| 149 | colors = [_CATEGORY_COLOR.get(cat, "#888888") for cat in labels] |
| 150 | return go.Figure( |
| 151 | go.Bar( |
| 152 | x=values, |
| 153 | y=labels, |
| 154 | orientation="h", |
| 155 | marker={"color": colors}, |
| 156 | hovertemplate="%{y}: %{x:.3f}<extra></extra>", |
| 157 | ), |
| 158 | layout=go.Layout( |
| 159 | xaxis={"range": [0.0, 1.0], "title": "component score"}, |
| 160 | yaxis={"title": ""}, |
| 161 | height=260, |
| 162 | margin={"l": 100, "r": 20, "t": 30, "b": 40}, |
| 163 | ), |
| 164 | ) |
| 165 | |
| 166 | |
| 167 | def _sis_figure(go: Any, suite: SuiteResult) -> Any | None: |
| 168 | """Per-section internalization bar chart. ``None`` if no data.""" |
| 169 | probe = _first_probe_of_kind(suite, "section_internalization") |
| 170 | if probe is None: |
| 171 | return None |
| 172 | per_section = probe.evidence.get("per_section") |
| 173 | if not per_section or not isinstance(per_section, list): |
| 174 | return None |
| 175 | labels = [str(row.get("section_id") or row.get("tag") or "?") for row in per_section] |
| 176 | values = [float(row.get("effective_sis", 0.0)) for row in per_section] |
| 177 | passed = [bool(row.get("passed")) for row in per_section] |
| 178 | colors = [_VERDICT_COLOR[Verdict.PASS if p else Verdict.FAIL] for p in passed] |
| 179 | return go.Figure( |
| 180 | go.Bar( |
| 181 | x=labels, |
| 182 | y=values, |
| 183 | marker={"color": colors}, |
| 184 | hovertemplate="<b>%{x}</b><br>effective_sis=%{y:.3f}<extra></extra>", |
| 185 | ), |
| 186 | layout=go.Layout( |
| 187 | xaxis={"title": "section"}, |
| 188 | yaxis={"title": "effective_sis (own - leak)"}, |
| 189 | height=320, |
| 190 | margin={"l": 60, "r": 20, "t": 30, "b": 80}, |
| 191 | ), |
| 192 | ) |
| 193 | |
| 194 | |
| 195 | def _ablation_figure(go: Any, suite: SuiteResult) -> Any | None: |
| 196 | """λ vs. divergence response curve. ``None`` if ablation didn't run.""" |
| 197 | probe = _first_probe_of_kind(suite, "adapter_ablation") |
| 198 | if probe is None: |
| 199 | return None |
| 200 | lambdas = probe.evidence.get("lambdas") |
| 201 | divs = probe.evidence.get("mean_divergence_per_lambda") |
| 202 | if not lambdas or not divs: |
| 203 | return None |
| 204 | sat = probe.evidence.get("saturation_lambda") |
| 205 | fig = go.Figure( |
| 206 | go.Scatter( |
| 207 | x=list(lambdas), |
| 208 | y=list(divs), |
| 209 | mode="lines+markers", |
| 210 | marker={"color": _CATEGORY_COLOR["ablation"], "size": 10}, |
| 211 | line={"color": _CATEGORY_COLOR["ablation"], "width": 2}, |
| 212 | hovertemplate="λ=%{x}<br>div=%{y:.4f}<extra></extra>", |
| 213 | name="divergence", |
| 214 | ), |
| 215 | layout=go.Layout( |
| 216 | xaxis={"title": "lambda"}, |
| 217 | yaxis={"title": "mean divergence"}, |
| 218 | height=320, |
| 219 | margin={"l": 60, "r": 20, "t": 30, "b": 50}, |
| 220 | ), |
| 221 | ) |
| 222 | if sat is not None: |
| 223 | fig.add_vline( |
| 224 | x=float(sat), |
| 225 | line_dash="dash", |
| 226 | line_color="#6c757d", |
| 227 | annotation_text=f"sat_λ={float(sat):.2f}", |
| 228 | annotation_position="top", |
| 229 | ) |
| 230 | return fig |
| 231 | |
| 232 | |
| 233 | def _scatter_figure(go: Any, suite: SuiteResult) -> Any: |
| 234 | """Score vs. z-score scatter across every probe, colored by verdict.""" |
| 235 | xs: list[float] = [] |
| 236 | ys: list[float] = [] |
| 237 | texts: list[str] = [] |
| 238 | colors: list[str] = [] |
| 239 | for p in suite.probes: |
| 240 | # Plot only probes with a numeric score; SKIP / ERROR probes |
| 241 | # without a score are summarized in the per-row annotation instead |
| 242 | # of cluttering the scatter at (0,0). |
| 243 | if p.score is None: |
| 244 | continue |
| 245 | xs.append(float(p.score)) |
| 246 | ys.append(float(p.z_score) if p.z_score is not None else 0.0) |
| 247 | texts.append( |
| 248 | f"<b>{html.escape(p.name)}</b><br>" |
| 249 | f"kind: {html.escape(p.kind)}<br>" |
| 250 | f"verdict: {p.verdict.value}<br>" |
| 251 | f"score: {p.score:.3f}<br>" |
| 252 | f"z: {'—' if p.z_score is None else f'{p.z_score:+.2f}σ'}" |
| 253 | ) |
| 254 | colors.append(_VERDICT_COLOR.get(p.verdict, "#888888")) |
| 255 | return go.Figure( |
| 256 | go.Scatter( |
| 257 | x=xs, |
| 258 | y=ys, |
| 259 | mode="markers", |
| 260 | marker={"size": 14, "color": colors, "line": {"color": "#333", "width": 1}}, |
| 261 | text=texts, |
| 262 | hovertemplate="%{text}<extra></extra>", |
| 263 | ), |
| 264 | layout=go.Layout( |
| 265 | xaxis={"title": "score", "range": [0.0, 1.0]}, |
| 266 | yaxis={"title": "z-score (σ)", "zeroline": True}, |
| 267 | height=360, |
| 268 | margin={"l": 60, "r": 20, "t": 30, "b": 50}, |
| 269 | ), |
| 270 | ) |
| 271 | |
| 272 | |
| 273 | # ---------------------------------------------------------------------- |
| 274 | # Assembly |
| 275 | # ---------------------------------------------------------------------- |
| 276 | |
| 277 | |
| 278 | def _fig_to_div(pio: Any, fig: Any, div_id: str) -> str: |
| 279 | """Render one figure as a div, reusing the JS we embed once in <head>.""" |
| 280 | return str( |
| 281 | pio.to_html( |
| 282 | fig, |
| 283 | include_plotlyjs=False, |
| 284 | full_html=False, |
| 285 | div_id=div_id, |
| 286 | config={"displaylogo": False, "responsive": True}, |
| 287 | ) |
| 288 | ) |
| 289 | |
| 290 | |
| 291 | def _assemble( |
| 292 | suite: SuiteResult, |
| 293 | score: SwayScore, |
| 294 | panels: list[tuple[str, str, str]], |
| 295 | *, |
| 296 | plotly_js: str, |
| 297 | ) -> str: |
| 298 | """Stitch the page together: header card, panels, probe table.""" |
| 299 | title = html.escape(f"sway report — {suite.adapter_id or suite.base_model_id}") |
| 300 | verdict_summary = _verdict_summary(suite) |
| 301 | header = ( |
| 302 | f"<h1>{title}</h1>" |
| 303 | f"<p class='meta'>" |
| 304 | f"base: <code>{html.escape(suite.base_model_id)}</code> · " |
| 305 | f"adapter: <code>{html.escape(suite.adapter_id or '—')}</code> · " |
| 306 | f"sway {html.escape(suite.sway_version)} · " |
| 307 | f"wall: {suite.wall_seconds:.2f}s" |
| 308 | f"</p>" |
| 309 | f"<p class='summary'><b>overall</b>: {score.overall:.2f} " |
| 310 | f"({html.escape(score.band or '—')})" |
| 311 | f" · {verdict_summary}</p>" |
| 312 | ) |
| 313 | |
| 314 | panel_html_parts: list[str] = [] |
| 315 | for title_, div_id, div_html in panels: |
| 316 | panel_html_parts.append( |
| 317 | f"<section class='panel' id='panel-{html.escape(div_id)}'>" |
| 318 | f"<h2>{html.escape(title_)}</h2>" |
| 319 | f"{div_html}" |
| 320 | f"</section>" |
| 321 | ) |
| 322 | |
| 323 | probe_table = _probe_table_html(suite) |
| 324 | |
| 325 | return _TEMPLATE.format( |
| 326 | title=title, |
| 327 | plotly_js=plotly_js, |
| 328 | header=header, |
| 329 | panels="\n".join(panel_html_parts), |
| 330 | probe_table=probe_table, |
| 331 | ) |
| 332 | |
| 333 | |
| 334 | def _verdict_summary(suite: SuiteResult) -> str: |
| 335 | counts: dict[Verdict, int] = {} |
| 336 | for p in suite.probes: |
| 337 | counts[p.verdict] = counts.get(p.verdict, 0) + 1 |
| 338 | parts = [] |
| 339 | for v in (Verdict.PASS, Verdict.FAIL, Verdict.WARN, Verdict.SKIP, Verdict.ERROR): |
| 340 | if v in counts: |
| 341 | parts.append(f"<span class='v-{v.value}'>{counts[v]} {html.escape(v.value)}</span>") |
| 342 | return " · ".join(parts) or "no probes ran" |
| 343 | |
| 344 | |
| 345 | def _probe_table_html(suite: SuiteResult) -> str: |
| 346 | """Textual per-probe table under the charts — same columns as markdown.""" |
| 347 | rows: list[str] = [] |
| 348 | for p in suite.probes: |
| 349 | rows.append( |
| 350 | "<tr>" |
| 351 | f"<td>{html.escape(p.name)}</td>" |
| 352 | f"<td><code>{html.escape(p.kind)}</code></td>" |
| 353 | f"<td class='v-{p.verdict.value}'>{html.escape(p.verdict.value)}</td>" |
| 354 | f"<td>{'—' if p.score is None else f'{p.score:.2f}'}</td>" |
| 355 | f"<td>{'—' if p.raw is None else f'{p.raw:,.3f}'}</td>" |
| 356 | f"<td>{'—' if p.z_score is None else f'{p.z_score:+.2f}σ'}</td>" |
| 357 | f"<td class='note'>{html.escape(p.message or '')}</td>" |
| 358 | "</tr>" |
| 359 | ) |
| 360 | return ( |
| 361 | "<section class='probe-table'>" |
| 362 | "<h2>Probes</h2>" |
| 363 | "<table>" |
| 364 | "<thead><tr><th>name</th><th>kind</th><th>verdict</th>" |
| 365 | "<th>score</th><th>raw</th><th>z</th><th>note</th></tr></thead>" |
| 366 | "<tbody>" + "".join(rows) + "</tbody></table></section>" |
| 367 | ) |
| 368 | |
| 369 | |
| 370 | def _first_probe_of_kind(suite: SuiteResult, kind: str) -> ProbeResult | None: |
| 371 | for p in suite.probes: |
| 372 | if p.kind == kind and p.score is not None: |
| 373 | return p |
| 374 | return None |
| 375 | |
| 376 | |
| 377 | def _band_color(band: str) -> str: |
| 378 | return { |
| 379 | "noise": "#dc3545", |
| 380 | "partial": "#ffc107", |
| 381 | "healthy": "#28a745", |
| 382 | "suspicious": "#9c27b0", |
| 383 | }.get(band, "#6c757d") |
| 384 | |
| 385 | |
| 386 | # ---------------------------------------------------------------------- |
| 387 | # Static template. Kept inline (no separate template file) because a |
| 388 | # single page has one consumer and a two-file split would be more |
| 389 | # ceremony than it's worth at this scale. |
| 390 | # ---------------------------------------------------------------------- |
| 391 | |
| 392 | _TEMPLATE = """<!doctype html> |
| 393 | <html lang="en"> |
| 394 | <head> |
| 395 | <meta charset="utf-8"> |
| 396 | <meta name="generator" content="sway report html"> |
| 397 | <title>{title}</title> |
| 398 | <style> |
| 399 | body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; |
| 400 | margin: 0; padding: 2rem; color: #222; background: #fafafa; max-width: 1100px; |
| 401 | margin-left: auto; margin-right: auto; }} |
| 402 | h1 {{ margin-bottom: 0.25rem; }} |
| 403 | p.meta {{ color: #666; margin-top: 0; }} |
| 404 | p.summary {{ font-size: 1.1rem; margin-top: 0.5rem; }} |
| 405 | section.panel {{ background: #fff; border: 1px solid #e0e0e0; border-radius: 6px; |
| 406 | padding: 1rem; margin-top: 1rem; }} |
| 407 | section.panel h2 {{ margin-top: 0; font-size: 1.1rem; color: #333; }} |
| 408 | section.probe-table {{ background: #fff; border: 1px solid #e0e0e0; border-radius: 6px; |
| 409 | padding: 1rem; margin-top: 1rem; }} |
| 410 | section.probe-table table {{ border-collapse: collapse; width: 100%; font-size: 0.9rem; }} |
| 411 | section.probe-table th, section.probe-table td {{ padding: 0.4rem 0.6rem; |
| 412 | border-bottom: 1px solid #eee; |
| 413 | text-align: left; vertical-align: top; }} |
| 414 | section.probe-table td.note {{ color: #555; }} |
| 415 | .v-pass {{ color: #28a745; font-weight: bold; }} |
| 416 | .v-fail {{ color: #dc3545; font-weight: bold; }} |
| 417 | .v-warn {{ color: #c98a00; font-weight: bold; }} |
| 418 | .v-skip {{ color: #6c757d; }} |
| 419 | .v-error {{ color: #9c27b0; font-weight: bold; }} |
| 420 | code {{ font-family: 'Menlo', 'Consolas', monospace; font-size: 0.9em; |
| 421 | background: #f0f0f0; padding: 0.05em 0.3em; border-radius: 3px; }} |
| 422 | </style> |
| 423 | <script type="text/javascript">{plotly_js}</script> |
| 424 | </head> |
| 425 | <body> |
| 426 | {header} |
| 427 | {panels} |
| 428 | {probe_table} |
| 429 | </body> |
| 430 | </html> |
| 431 | """ |
| 432 | |
| 433 | |
| 434 | __all__ = ["to_html"] |