"""Interactive single-file HTML report (S12 / F6). The terminal renderer is right for CI logs; markdown is right for checked-in PR artifacts. Neither supports *exploration* — clicking through per-section SIS bars, hovering over the ablation curve to read exact λ values, zooming into the probe scatter. This module produces a self-contained HTML page with four interactive Plotly panels for the research / write-up case: 1. Composite score gauge + per-category breakdown. 2. Per-section SIS bar chart (when ``section_internalization`` ran). 3. Adapter-ablation response curve (when ``adapter_ablation`` ran). 4. All-probe score + z-score scatter with hover tooltips. Plotly's JS bundle is inlined once in ````; each panel's ``
`` gets a stable id so snapshot tests don't churn on every render. The output is typically ~3.6 MB (Plotly is ~3 MB of that) and loads with zero network calls. ``plotly`` is an *optional* dependency shipped via the ``[viz]`` extra. When it's not importable, :func:`to_html` raises ``RuntimeError`` with an install hint the CLI can surface. """ from __future__ import annotations import html from typing import Any from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict #: Palette used across panels. Matches the terminal verdict colors so a #: user scanning the HTML mapping back to the `sway run` output sees #: the same color grammar. _VERDICT_COLOR: dict[Verdict, str] = { Verdict.PASS: "#28a745", Verdict.FAIL: "#dc3545", Verdict.WARN: "#ffc107", Verdict.SKIP: "#6c757d", Verdict.ERROR: "#9c27b0", } #: Division colors for the category bars — same per-component palette #: we publish in the README. _CATEGORY_COLOR: dict[str, str] = { "adherence": "#0d6efd", "attribution": "#198754", "calibration": "#fd7e14", "ablation": "#6f42c1", "baseline": "#adb5bd", } #: Stable div IDs so the emitted HTML is byte-identical for the same #: inputs. Plotly defaults to a random UUID per figure otherwise. _DIV_GAUGE = "sway-gauge" _DIV_CATEGORY = "sway-category" _DIV_SIS = "sway-sis" _DIV_ABLATION = "sway-ablation" _DIV_SCATTER = "sway-scatter" # ---------------------------------------------------------------------- # Entry point # ---------------------------------------------------------------------- def to_html(suite: SuiteResult, score: SwayScore) -> str: """Render a ``SuiteResult``/``SwayScore`` pair as a self-contained HTML page. Raises ------ RuntimeError When ``plotly`` is not importable. The CLI catches this and surfaces an install hint (``pip install 'dlm-sway[viz]'``). """ try: import plotly.graph_objects as go import plotly.io as pio from plotly.offline import get_plotlyjs except ImportError as exc: raise RuntimeError( "plotly is required for --format html. Install with: pip install 'dlm-sway[viz]'" ) from exc gauge_fig = _gauge_figure(go, score) category_fig = _category_figure(go, score) sis_fig = _sis_figure(go, suite) ablation_fig = _ablation_figure(go, suite) scatter_fig = _scatter_figure(go, suite) panels: list[tuple[str, str, str]] = [ ("Composite score", _DIV_GAUGE, _fig_to_div(pio, gauge_fig, _DIV_GAUGE)), ("Category breakdown", _DIV_CATEGORY, _fig_to_div(pio, category_fig, _DIV_CATEGORY)), ] if sis_fig is not None: panels.append( ("Per-section internalization", _DIV_SIS, _fig_to_div(pio, sis_fig, _DIV_SIS)) ) if ablation_fig is not None: panels.append( ( "Adapter-ablation response", _DIV_ABLATION, _fig_to_div(pio, ablation_fig, _DIV_ABLATION), ) ) panels.append( ("Per-probe score vs. z-score", _DIV_SCATTER, _fig_to_div(pio, scatter_fig, _DIV_SCATTER)) ) return _assemble(suite, score, panels, plotly_js=get_plotlyjs()) # ---------------------------------------------------------------------- # Figures # ---------------------------------------------------------------------- def _gauge_figure(go: Any, score: SwayScore) -> Any: """Indicator gauge for the composite score, 0..1 with banded thresholds.""" overall = float(score.overall) if score.overall is not None else 0.0 return go.Figure( go.Indicator( mode="gauge+number", value=overall, number={"valueformat": ".2f", "font": {"size": 48}}, gauge={ "axis": {"range": [0.0, 1.0]}, "bar": {"color": _band_color(score.band)}, "steps": [ {"range": [0.00, 0.30], "color": "#f8d7da"}, # noise {"range": [0.30, 0.60], "color": "#fff3cd"}, # partial {"range": [0.60, 0.85], "color": "#d1e7dd"}, # healthy {"range": [0.85, 1.00], "color": "#e2e3ff"}, # suspicious ], }, title={"text": f"{score.band or 'unscored'}"}, ), layout=go.Layout(height=320, margin={"l": 20, "r": 20, "t": 60, "b": 20}), ) def _category_figure(go: Any, score: SwayScore) -> Any: """Horizontal bar chart of per-category contributions.""" items = [(cat, float(v)) for cat, v in score.components.items()] items.sort(key=lambda pair: pair[0]) labels = [cat for cat, _ in items] values = [v for _, v in items] colors = [_CATEGORY_COLOR.get(cat, "#888888") for cat in labels] return go.Figure( go.Bar( x=values, y=labels, orientation="h", marker={"color": colors}, hovertemplate="%{y}: %{x:.3f}", ), layout=go.Layout( xaxis={"range": [0.0, 1.0], "title": "component score"}, yaxis={"title": ""}, height=260, margin={"l": 100, "r": 20, "t": 30, "b": 40}, ), ) def _sis_figure(go: Any, suite: SuiteResult) -> Any | None: """Per-section internalization bar chart. ``None`` if no data.""" probe = _first_probe_of_kind(suite, "section_internalization") if probe is None: return None per_section = probe.evidence.get("per_section") if not per_section or not isinstance(per_section, list): return None labels = [str(row.get("section_id") or row.get("tag") or "?") for row in per_section] values = [float(row.get("effective_sis", 0.0)) for row in per_section] passed = [bool(row.get("passed")) for row in per_section] colors = [_VERDICT_COLOR[Verdict.PASS if p else Verdict.FAIL] for p in passed] return go.Figure( go.Bar( x=labels, y=values, marker={"color": colors}, hovertemplate="%{x}
effective_sis=%{y:.3f}", ), layout=go.Layout( xaxis={"title": "section"}, yaxis={"title": "effective_sis (own - leak)"}, height=320, margin={"l": 60, "r": 20, "t": 30, "b": 80}, ), ) def _ablation_figure(go: Any, suite: SuiteResult) -> Any | None: """λ vs. divergence response curve. ``None`` if ablation didn't run.""" probe = _first_probe_of_kind(suite, "adapter_ablation") if probe is None: return None lambdas = probe.evidence.get("lambdas") divs = probe.evidence.get("mean_divergence_per_lambda") if not lambdas or not divs: return None sat = probe.evidence.get("saturation_lambda") fig = go.Figure( go.Scatter( x=list(lambdas), y=list(divs), mode="lines+markers", marker={"color": _CATEGORY_COLOR["ablation"], "size": 10}, line={"color": _CATEGORY_COLOR["ablation"], "width": 2}, hovertemplate="λ=%{x}
div=%{y:.4f}", name="divergence", ), layout=go.Layout( xaxis={"title": "lambda"}, yaxis={"title": "mean divergence"}, height=320, margin={"l": 60, "r": 20, "t": 30, "b": 50}, ), ) if sat is not None: fig.add_vline( x=float(sat), line_dash="dash", line_color="#6c757d", annotation_text=f"sat_λ={float(sat):.2f}", annotation_position="top", ) return fig def _scatter_figure(go: Any, suite: SuiteResult) -> Any: """Score vs. z-score scatter across every probe, colored by verdict.""" xs: list[float] = [] ys: list[float] = [] texts: list[str] = [] colors: list[str] = [] for p in suite.probes: # Plot only probes with a numeric score; SKIP / ERROR probes # without a score are summarized in the per-row annotation instead # of cluttering the scatter at (0,0). if p.score is None: continue xs.append(float(p.score)) ys.append(float(p.z_score) if p.z_score is not None else 0.0) texts.append( f"{html.escape(p.name)}
" f"kind: {html.escape(p.kind)}
" f"verdict: {p.verdict.value}
" f"score: {p.score:.3f}
" f"z: {'—' if p.z_score is None else f'{p.z_score:+.2f}σ'}" ) colors.append(_VERDICT_COLOR.get(p.verdict, "#888888")) return go.Figure( go.Scatter( x=xs, y=ys, mode="markers", marker={"size": 14, "color": colors, "line": {"color": "#333", "width": 1}}, text=texts, hovertemplate="%{text}", ), layout=go.Layout( xaxis={"title": "score", "range": [0.0, 1.0]}, yaxis={"title": "z-score (σ)", "zeroline": True}, height=360, margin={"l": 60, "r": 20, "t": 30, "b": 50}, ), ) # ---------------------------------------------------------------------- # Assembly # ---------------------------------------------------------------------- def _fig_to_div(pio: Any, fig: Any, div_id: str) -> str: """Render one figure as a div, reusing the JS we embed once in .""" return str( pio.to_html( fig, include_plotlyjs=False, full_html=False, div_id=div_id, config={"displaylogo": False, "responsive": True}, ) ) def _assemble( suite: SuiteResult, score: SwayScore, panels: list[tuple[str, str, str]], *, plotly_js: str, ) -> str: """Stitch the page together: header card, panels, probe table.""" title = html.escape(f"sway report — {suite.adapter_id or suite.base_model_id}") verdict_summary = _verdict_summary(suite) header = ( f"

{title}

" f"

" f"base: {html.escape(suite.base_model_id)} · " f"adapter: {html.escape(suite.adapter_id or '—')} · " f"sway {html.escape(suite.sway_version)} · " f"wall: {suite.wall_seconds:.2f}s" f"

" f"

overall: {score.overall:.2f} " f"({html.escape(score.band or '—')})" f" · {verdict_summary}

" ) panel_html_parts: list[str] = [] for title_, div_id, div_html in panels: panel_html_parts.append( f"
" f"

{html.escape(title_)}

" f"{div_html}" f"
" ) probe_table = _probe_table_html(suite) return _TEMPLATE.format( title=title, plotly_js=plotly_js, header=header, panels="\n".join(panel_html_parts), probe_table=probe_table, ) def _verdict_summary(suite: SuiteResult) -> str: counts: dict[Verdict, int] = {} for p in suite.probes: counts[p.verdict] = counts.get(p.verdict, 0) + 1 parts = [] for v in (Verdict.PASS, Verdict.FAIL, Verdict.WARN, Verdict.SKIP, Verdict.ERROR): if v in counts: parts.append(f"{counts[v]} {html.escape(v.value)}") return " · ".join(parts) or "no probes ran" def _probe_table_html(suite: SuiteResult) -> str: """Textual per-probe table under the charts — same columns as markdown.""" rows: list[str] = [] for p in suite.probes: rows.append( "" f"{html.escape(p.name)}" f"{html.escape(p.kind)}" f"{html.escape(p.verdict.value)}" f"{'—' if p.score is None else f'{p.score:.2f}'}" f"{'—' if p.raw is None else f'{p.raw:,.3f}'}" f"{'—' if p.z_score is None else f'{p.z_score:+.2f}σ'}" f"{html.escape(p.message or '')}" "" ) return ( "
" "

Probes

" "" "" "" "" + "".join(rows) + "
namekindverdictscorerawznote
" ) def _first_probe_of_kind(suite: SuiteResult, kind: str) -> ProbeResult | None: for p in suite.probes: if p.kind == kind and p.score is not None: return p return None def _band_color(band: str) -> str: return { "noise": "#dc3545", "partial": "#ffc107", "healthy": "#28a745", "suspicious": "#9c27b0", }.get(band, "#6c757d") # ---------------------------------------------------------------------- # Static template. Kept inline (no separate template file) because a # single page has one consumer and a two-file split would be more # ceremony than it's worth at this scale. # ---------------------------------------------------------------------- _TEMPLATE = """ {title} {header} {panels} {probe_table} """ __all__ = ["to_html"]