sway Public

Watch 0 Fork 0 Star 0

Python · 15852 bytes Raw Blame History

  
        1
        """Interactive single-file HTML report (S12 / F6).
      
        2
        
        3
        The terminal renderer is right for CI logs; markdown is right for
      
        4
        checked-in PR artifacts. Neither supports *exploration* — clicking
      
        5
        through per-section SIS bars, hovering over the ablation curve to
      
        6
        read exact λ values, zooming into the probe scatter. This module
      
        7
        produces a self-contained HTML page with four interactive Plotly
      
        8
        panels for the research / write-up case:
      
        9
        
        10
        1. Composite score gauge + per-category breakdown.
      
        11
        2. Per-section SIS bar chart (when ``section_internalization`` ran).
      
        12
        3. Adapter-ablation response curve (when ``adapter_ablation`` ran).
      
        13
        4. All-probe score + z-score scatter with hover tooltips.
      
        14
        
        15
        Plotly's JS bundle is inlined once in ``<head>``; each panel's
      
        16
        ``<div>`` gets a stable id so snapshot tests don't churn on every
      
        17
        render. The output is typically ~3.6 MB (Plotly is ~3 MB of that)
      
        18
        and loads with zero network calls.
      
        19
        
        20
        ``plotly`` is an *optional* dependency shipped via the ``[viz]`` extra.
      
        21
        When it's not importable, :func:`to_html` raises ``RuntimeError``
      
        22
        with an install hint the CLI can surface.
      
        23
        """
      
        24
        
        25
        from __future__ import annotations
      
        26
        
        27
        import html
      
        28
        from typing import Any
      
        29
        
        30
        from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict
      
        31
        
        32
        #: Palette used across panels. Matches the terminal verdict colors so a
      
        33
        #: user scanning the HTML mapping back to the `sway run` output sees
      
        34
        #: the same color grammar.
      
        35
        _VERDICT_COLOR: dict[Verdict, str] = {
      
        36
            Verdict.PASS: "#28a745",
      
        37
            Verdict.FAIL: "#dc3545",
      
        38
            Verdict.WARN: "#ffc107",
      
        39
            Verdict.SKIP: "#6c757d",
      
        40
            Verdict.ERROR: "#9c27b0",
      
        41
        }
      
        42
        
        43
        #: Division colors for the category bars — same per-component palette
      
        44
        #: we publish in the README.
      
        45
        _CATEGORY_COLOR: dict[str, str] = {
      
        46
            "adherence": "#0d6efd",
      
        47
            "attribution": "#198754",
      
        48
            "calibration": "#fd7e14",
      
        49
            "ablation": "#6f42c1",
      
        50
            "baseline": "#adb5bd",
      
        51
        }
      
        52
        
        53
        #: Stable div IDs so the emitted HTML is byte-identical for the same
      
        54
        #: inputs. Plotly defaults to a random UUID per figure otherwise.
      
        55
        _DIV_GAUGE = "sway-gauge"
      
        56
        _DIV_CATEGORY = "sway-category"
      
        57
        _DIV_SIS = "sway-sis"
      
        58
        _DIV_ABLATION = "sway-ablation"
      
        59
        _DIV_SCATTER = "sway-scatter"
      
        60
        
        61
        
        62
        # ----------------------------------------------------------------------
      
        63
        # Entry point
      
        64
        # ----------------------------------------------------------------------
      
        65
        
        66
        
        67
        def to_html(suite: SuiteResult, score: SwayScore) -> str:
      
        68
            """Render a ``SuiteResult``/``SwayScore`` pair as a self-contained HTML page.
      
        69
        
        70
            Raises
      
        71
            ------
      
        72
            RuntimeError
      
        73
                When ``plotly`` is not importable. The CLI catches this and
      
        74
                surfaces an install hint (``pip install 'dlm-sway[viz]'``).
      
        75
            """
      
        76
            try:
      
        77
                import plotly.graph_objects as go
      
        78
                import plotly.io as pio
      
        79
                from plotly.offline import get_plotlyjs
      
        80
            except ImportError as exc:
      
        81
                raise RuntimeError(
      
        82
                    "plotly is required for --format html. Install with: pip install 'dlm-sway[viz]'"
      
        83
                ) from exc
      
        84
        
        85
            gauge_fig = _gauge_figure(go, score)
      
        86
            category_fig = _category_figure(go, score)
      
        87
            sis_fig = _sis_figure(go, suite)
      
        88
            ablation_fig = _ablation_figure(go, suite)
      
        89
            scatter_fig = _scatter_figure(go, suite)
      
        90
        
        91
            panels: list[tuple[str, str, str]] = [
      
        92
                ("Composite score", _DIV_GAUGE, _fig_to_div(pio, gauge_fig, _DIV_GAUGE)),
      
        93
                ("Category breakdown", _DIV_CATEGORY, _fig_to_div(pio, category_fig, _DIV_CATEGORY)),
      
        94
            ]
      
        95
            if sis_fig is not None:
      
        96
                panels.append(
      
        97
                    ("Per-section internalization", _DIV_SIS, _fig_to_div(pio, sis_fig, _DIV_SIS))
      
        98
                )
      
        99
            if ablation_fig is not None:
      
        100
                panels.append(
      
        101
                    (
      
        102
                        "Adapter-ablation response",
      
        103
                        _DIV_ABLATION,
      
        104
                        _fig_to_div(pio, ablation_fig, _DIV_ABLATION),
      
        105
                    )
      
        106
                )
      
        107
            panels.append(
      
        108
                ("Per-probe score vs. z-score", _DIV_SCATTER, _fig_to_div(pio, scatter_fig, _DIV_SCATTER))
      
        109
            )
      
        110
        
        111
            return _assemble(suite, score, panels, plotly_js=get_plotlyjs())
      
        112
        
        113
        
        114
        # ----------------------------------------------------------------------
      
        115
        # Figures
      
        116
        # ----------------------------------------------------------------------
      
        117
        
        118
        
        119
        def _gauge_figure(go: Any, score: SwayScore) -> Any:
      
        120
            """Indicator gauge for the composite score, 0..1 with banded thresholds."""
      
        121
            overall = float(score.overall) if score.overall is not None else 0.0
      
        122
            return go.Figure(
      
        123
                go.Indicator(
      
        124
                    mode="gauge+number",
      
        125
                    value=overall,
      
        126
                    number={"valueformat": ".2f", "font": {"size": 48}},
      
        127
                    gauge={
      
        128
                        "axis": {"range": [0.0, 1.0]},
      
        129
                        "bar": {"color": _band_color(score.band)},
      
        130
                        "steps": [
      
        131
                            {"range": [0.00, 0.30], "color": "#f8d7da"},  # noise
      
        132
                            {"range": [0.30, 0.60], "color": "#fff3cd"},  # partial
      
        133
                            {"range": [0.60, 0.85], "color": "#d1e7dd"},  # healthy
      
        134
                            {"range": [0.85, 1.00], "color": "#e2e3ff"},  # suspicious
      
        135
                        ],
      
        136
                    },
      
        137
                    title={"text": f"<b>{score.band or 'unscored'}</b>"},
      
        138
                ),
      
        139
                layout=go.Layout(height=320, margin={"l": 20, "r": 20, "t": 60, "b": 20}),
      
        140
            )
      
        141
        
        142
        
        143
        def _category_figure(go: Any, score: SwayScore) -> Any:
      
        144
            """Horizontal bar chart of per-category contributions."""
      
        145
            items = [(cat, float(v)) for cat, v in score.components.items()]
      
        146
            items.sort(key=lambda pair: pair[0])
      
        147
            labels = [cat for cat, _ in items]
      
        148
            values = [v for _, v in items]
      
        149
            colors = [_CATEGORY_COLOR.get(cat, "#888888") for cat in labels]
      
        150
            return go.Figure(
      
        151
                go.Bar(
      
        152
                    x=values,
      
        153
                    y=labels,
      
        154
                    orientation="h",
      
        155
                    marker={"color": colors},
      
        156
                    hovertemplate="%{y}: %{x:.3f}<extra></extra>",
      
        157
                ),
      
        158
                layout=go.Layout(
      
        159
                    xaxis={"range": [0.0, 1.0], "title": "component score"},
      
        160
                    yaxis={"title": ""},
      
        161
                    height=260,
      
        162
                    margin={"l": 100, "r": 20, "t": 30, "b": 40},
      
        163
                ),
      
        164
            )
      
        165
        
        166
        
        167
        def _sis_figure(go: Any, suite: SuiteResult) -> Any | None:
      
        168
            """Per-section internalization bar chart. ``None`` if no data."""
      
        169
            probe = _first_probe_of_kind(suite, "section_internalization")
      
        170
            if probe is None:
      
        171
                return None
      
        172
            per_section = probe.evidence.get("per_section")
      
        173
            if not per_section or not isinstance(per_section, list):
      
        174
                return None
      
        175
            labels = [str(row.get("section_id") or row.get("tag") or "?") for row in per_section]
      
        176
            values = [float(row.get("effective_sis", 0.0)) for row in per_section]
      
        177
            passed = [bool(row.get("passed")) for row in per_section]
      
        178
            colors = [_VERDICT_COLOR[Verdict.PASS if p else Verdict.FAIL] for p in passed]
      
        179
            return go.Figure(
      
        180
                go.Bar(
      
        181
                    x=labels,
      
        182
                    y=values,
      
        183
                    marker={"color": colors},
      
        184
                    hovertemplate="<b>%{x}</b><br>effective_sis=%{y:.3f}<extra></extra>",
      
        185
                ),
      
        186
                layout=go.Layout(
      
        187
                    xaxis={"title": "section"},
      
        188
                    yaxis={"title": "effective_sis (own - leak)"},
      
        189
                    height=320,
      
        190
                    margin={"l": 60, "r": 20, "t": 30, "b": 80},
      
        191
                ),
      
        192
            )
      
        193
        
        194
        
        195
        def _ablation_figure(go: Any, suite: SuiteResult) -> Any | None:
      
        196
            """λ vs. divergence response curve. ``None`` if ablation didn't run."""
      
        197
            probe = _first_probe_of_kind(suite, "adapter_ablation")
      
        198
            if probe is None:
      
        199
                return None
      
        200
            lambdas = probe.evidence.get("lambdas")
      
        201
            divs = probe.evidence.get("mean_divergence_per_lambda")
      
        202
            if not lambdas or not divs:
      
        203
                return None
      
        204
            sat = probe.evidence.get("saturation_lambda")
      
        205
            fig = go.Figure(
      
        206
                go.Scatter(
      
        207
                    x=list(lambdas),
      
        208
                    y=list(divs),
      
        209
                    mode="lines+markers",
      
        210
                    marker={"color": _CATEGORY_COLOR["ablation"], "size": 10},
      
        211
                    line={"color": _CATEGORY_COLOR["ablation"], "width": 2},
      
        212
                    hovertemplate="λ=%{x}<br>div=%{y:.4f}<extra></extra>",
      
        213
                    name="divergence",
      
        214
                ),
      
        215
                layout=go.Layout(
      
        216
                    xaxis={"title": "lambda"},
      
        217
                    yaxis={"title": "mean divergence"},
      
        218
                    height=320,
      
        219
                    margin={"l": 60, "r": 20, "t": 30, "b": 50},
      
        220
                ),
      
        221
            )
      
        222
            if sat is not None:
      
        223
                fig.add_vline(
      
        224
                    x=float(sat),
      
        225
                    line_dash="dash",
      
        226
                    line_color="#6c757d",
      
        227
                    annotation_text=f"sat_λ={float(sat):.2f}",
      
        228
                    annotation_position="top",
      
        229
                )
      
        230
            return fig
      
        231
        
        232
        
        233
        def _scatter_figure(go: Any, suite: SuiteResult) -> Any:
      
        234
            """Score vs. z-score scatter across every probe, colored by verdict."""
      
        235
            xs: list[float] = []
      
        236
            ys: list[float] = []
      
        237
            texts: list[str] = []
      
        238
            colors: list[str] = []
      
        239
            for p in suite.probes:
      
        240
                # Plot only probes with a numeric score; SKIP / ERROR probes
      
        241
                # without a score are summarized in the per-row annotation instead
      
        242
                # of cluttering the scatter at (0,0).
      
        243
                if p.score is None:
      
        244
                    continue
      
        245
                xs.append(float(p.score))
      
        246
                ys.append(float(p.z_score) if p.z_score is not None else 0.0)
      
        247
                texts.append(
      
        248
                    f"<b>{html.escape(p.name)}</b><br>"
      
        249
                    f"kind: {html.escape(p.kind)}<br>"
      
        250
                    f"verdict: {p.verdict.value}<br>"
      
        251
                    f"score: {p.score:.3f}<br>"
      
        252
                    f"z: {'—' if p.z_score is None else f'{p.z_score:+.2f}σ'}"
      
        253
                )
      
        254
                colors.append(_VERDICT_COLOR.get(p.verdict, "#888888"))
      
        255
            return go.Figure(
      
        256
                go.Scatter(
      
        257
                    x=xs,
      
        258
                    y=ys,
      
        259
                    mode="markers",
      
        260
                    marker={"size": 14, "color": colors, "line": {"color": "#333", "width": 1}},
      
        261
                    text=texts,
      
        262
                    hovertemplate="%{text}<extra></extra>",
      
        263
                ),
      
        264
                layout=go.Layout(
      
        265
                    xaxis={"title": "score", "range": [0.0, 1.0]},
      
        266
                    yaxis={"title": "z-score (σ)", "zeroline": True},
      
        267
                    height=360,
      
        268
                    margin={"l": 60, "r": 20, "t": 30, "b": 50},
      
        269
                ),
      
        270
            )
      
        271
        
        272
        
        273
        # ----------------------------------------------------------------------
      
        274
        # Assembly
      
        275
        # ----------------------------------------------------------------------
      
        276
        
        277
        
        278
        def _fig_to_div(pio: Any, fig: Any, div_id: str) -> str:
      
        279
            """Render one figure as a div, reusing the JS we embed once in <head>."""
      
        280
            return str(
      
        281
                pio.to_html(
      
        282
                    fig,
      
        283
                    include_plotlyjs=False,
      
        284
                    full_html=False,
      
        285
                    div_id=div_id,
      
        286
                    config={"displaylogo": False, "responsive": True},
      
        287
                )
      
        288
            )
      
        289
        
        290
        
        291
        def _assemble(
      
        292
            suite: SuiteResult,
      
        293
            score: SwayScore,
      
        294
            panels: list[tuple[str, str, str]],
      
        295
            *,
      
        296
            plotly_js: str,
      
        297
        ) -> str:
      
        298
            """Stitch the page together: header card, panels, probe table."""
      
        299
            title = html.escape(f"sway report — {suite.adapter_id or suite.base_model_id}")
      
        300
            verdict_summary = _verdict_summary(suite)
      
        301
            header = (
      
        302
                f"<h1>{title}</h1>"
      
        303
                f"<p class='meta'>"
      
        304
                f"base: <code>{html.escape(suite.base_model_id)}</code> · "
      
        305
                f"adapter: <code>{html.escape(suite.adapter_id or '—')}</code> · "
      
        306
                f"sway {html.escape(suite.sway_version)} · "
      
        307
                f"wall: {suite.wall_seconds:.2f}s"
      
        308
                f"</p>"
      
        309
                f"<p class='summary'><b>overall</b>: {score.overall:.2f} "
      
        310
                f"({html.escape(score.band or '—')})"
      
        311
                f" · {verdict_summary}</p>"
      
        312
            )
      
        313
        
        314
            panel_html_parts: list[str] = []
      
        315
            for title_, div_id, div_html in panels:
      
        316
                panel_html_parts.append(
      
        317
                    f"<section class='panel' id='panel-{html.escape(div_id)}'>"
      
        318
                    f"<h2>{html.escape(title_)}</h2>"
      
        319
                    f"{div_html}"
      
        320
                    f"</section>"
      
        321
                )
      
        322
        
        323
            probe_table = _probe_table_html(suite)
      
        324
        
        325
            return _TEMPLATE.format(
      
        326
                title=title,
      
        327
                plotly_js=plotly_js,
      
        328
                header=header,
      
        329
                panels="\n".join(panel_html_parts),
      
        330
                probe_table=probe_table,
      
        331
            )
      
        332
        
        333
        
        334
        def _verdict_summary(suite: SuiteResult) -> str:
      
        335
            counts: dict[Verdict, int] = {}
      
        336
            for p in suite.probes:
      
        337
                counts[p.verdict] = counts.get(p.verdict, 0) + 1
      
        338
            parts = []
      
        339
            for v in (Verdict.PASS, Verdict.FAIL, Verdict.WARN, Verdict.SKIP, Verdict.ERROR):
      
        340
                if v in counts:
      
        341
                    parts.append(f"<span class='v-{v.value}'>{counts[v]} {html.escape(v.value)}</span>")
      
        342
            return " · ".join(parts) or "no probes ran"
      
        343
        
        344
        
        345
        def _probe_table_html(suite: SuiteResult) -> str:
      
        346
            """Textual per-probe table under the charts — same columns as markdown."""
      
        347
            rows: list[str] = []
      
        348
            for p in suite.probes:
      
        349
                rows.append(
      
        350
                    "<tr>"
      
        351
                    f"<td>{html.escape(p.name)}</td>"
      
        352
                    f"<td><code>{html.escape(p.kind)}</code></td>"
      
        353
                    f"<td class='v-{p.verdict.value}'>{html.escape(p.verdict.value)}</td>"
      
        354
                    f"<td>{'—' if p.score is None else f'{p.score:.2f}'}</td>"
      
        355
                    f"<td>{'—' if p.raw is None else f'{p.raw:,.3f}'}</td>"
      
        356
                    f"<td>{'—' if p.z_score is None else f'{p.z_score:+.2f}σ'}</td>"
      
        357
                    f"<td class='note'>{html.escape(p.message or '')}</td>"
      
        358
                    "</tr>"
      
        359
                )
      
        360
            return (
      
        361
                "<section class='probe-table'>"
      
        362
                "<h2>Probes</h2>"
      
        363
                "<table>"
      
        364
                "<thead><tr><th>name</th><th>kind</th><th>verdict</th>"
      
        365
                "<th>score</th><th>raw</th><th>z</th><th>note</th></tr></thead>"
      
        366
                "<tbody>" + "".join(rows) + "</tbody></table></section>"
      
        367
            )
      
        368
        
        369
        
        370
        def _first_probe_of_kind(suite: SuiteResult, kind: str) -> ProbeResult | None:
      
        371
            for p in suite.probes:
      
        372
                if p.kind == kind and p.score is not None:
      
        373
                    return p
      
        374
            return None
      
        375
        
        376
        
        377
        def _band_color(band: str) -> str:
      
        378
            return {
      
        379
                "noise": "#dc3545",
      
        380
                "partial": "#ffc107",
      
        381
                "healthy": "#28a745",
      
        382
                "suspicious": "#9c27b0",
      
        383
            }.get(band, "#6c757d")
      
        384
        
        385
        
        386
        # ----------------------------------------------------------------------
      
        387
        # Static template. Kept inline (no separate template file) because a
      
        388
        # single page has one consumer and a two-file split would be more
      
        389
        # ceremony than it's worth at this scale.
      
        390
        # ----------------------------------------------------------------------
      
        391
        
        392
        _TEMPLATE = """<!doctype html>
      
        393
        <html lang="en">
      
        394
        <head>
      
        395
          <meta charset="utf-8">
      
        396
          <meta name="generator" content="sway report html">
      
        397
          <title>{title}</title>
      
        398
          <style>
      
        399
            body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
      
        400
                   margin: 0; padding: 2rem; color: #222; background: #fafafa; max-width: 1100px;
      
        401
                   margin-left: auto; margin-right: auto; }}
      
        402
            h1 {{ margin-bottom: 0.25rem; }}
      
        403
            p.meta {{ color: #666; margin-top: 0; }}
      
        404
            p.summary {{ font-size: 1.1rem; margin-top: 0.5rem; }}
      
        405
            section.panel {{ background: #fff; border: 1px solid #e0e0e0; border-radius: 6px;
      
        406
                             padding: 1rem; margin-top: 1rem; }}
      
        407
            section.panel h2 {{ margin-top: 0; font-size: 1.1rem; color: #333; }}
      
        408
            section.probe-table {{ background: #fff; border: 1px solid #e0e0e0; border-radius: 6px;
      
        409
                                   padding: 1rem; margin-top: 1rem; }}
      
        410
            section.probe-table table {{ border-collapse: collapse; width: 100%; font-size: 0.9rem; }}
      
        411
            section.probe-table th, section.probe-table td {{ padding: 0.4rem 0.6rem;
      
        412
                                                               border-bottom: 1px solid #eee;
      
        413
                                                               text-align: left; vertical-align: top; }}
      
        414
            section.probe-table td.note {{ color: #555; }}
      
        415
            .v-pass {{ color: #28a745; font-weight: bold; }}
      
        416
            .v-fail {{ color: #dc3545; font-weight: bold; }}
      
        417
            .v-warn {{ color: #c98a00; font-weight: bold; }}
      
        418
            .v-skip {{ color: #6c757d; }}
      
        419
            .v-error {{ color: #9c27b0; font-weight: bold; }}
      
        420
            code {{ font-family: 'Menlo', 'Consolas', monospace; font-size: 0.9em;
      
        421
                    background: #f0f0f0; padding: 0.05em 0.3em; border-radius: 3px; }}
      
        422
          </style>
      
        423
          <script type="text/javascript">{plotly_js}</script>
      
        424
        </head>
      
        425
        <body>
      
        426
          {header}
      
        427
          {panels}
      
        428
          {probe_table}
      
        429
        </body>
      
        430
        </html>
      
        431
        """
      
        432
        
        433
        
        434
        __all__ = ["to_html"]

1	"""Interactive single-file HTML report (S12 / F6).
2
3	The terminal renderer is right for CI logs; markdown is right for
4	checked-in PR artifacts. Neither supports exploration — clicking
5	through per-section SIS bars, hovering over the ablation curve to
6	read exact λ values, zooming into the probe scatter. This module
7	produces a self-contained HTML page with four interactive Plotly
8	panels for the research / write-up case:
9
10	1. Composite score gauge + per-category breakdown.
11	2. Per-section SIS bar chart (when ``section_internalization`` ran).
12	3. Adapter-ablation response curve (when ``adapter_ablation`` ran).
13	4. All-probe score + z-score scatter with hover tooltips.
14
15	Plotly's JS bundle is inlined once in ``<head>``; each panel's
16	``<div>`` gets a stable id so snapshot tests don't churn on every
17	render. The output is typically ~3.6 MB (Plotly is ~3 MB of that)
18	and loads with zero network calls.
19
20	``plotly`` is an optional dependency shipped via the ``[viz]`` extra.
21	When it's not importable, :func:`to_html` raises ``RuntimeError``
22	with an install hint the CLI can surface.
23	"""
24
25	from __future__ import annotations
26
27	import html
28	from typing import Any
29
30	from dlm_sway.core.result import ProbeResult, SuiteResult, SwayScore, Verdict
31
32	#: Palette used across panels. Matches the terminal verdict colors so a
33	#: user scanning the HTML mapping back to the `sway run` output sees
34	#: the same color grammar.
35	_VERDICT_COLOR: dict[Verdict, str] = {
36	Verdict.PASS: "#28a745",
37	Verdict.FAIL: "#dc3545",
38	Verdict.WARN: "#ffc107",
39	Verdict.SKIP: "#6c757d",
40	Verdict.ERROR: "#9c27b0",
41	}
42
43	#: Division colors for the category bars — same per-component palette
44	#: we publish in the README.
45	_CATEGORY_COLOR: dict[str, str] = {
46	"adherence": "#0d6efd",
47	"attribution": "#198754",
48	"calibration": "#fd7e14",
49	"ablation": "#6f42c1",
50	"baseline": "#adb5bd",
51	}
52
53	#: Stable div IDs so the emitted HTML is byte-identical for the same
54	#: inputs. Plotly defaults to a random UUID per figure otherwise.
55	_DIV_GAUGE = "sway-gauge"
56	_DIV_CATEGORY = "sway-category"
57	_DIV_SIS = "sway-sis"
58	_DIV_ABLATION = "sway-ablation"
59	_DIV_SCATTER = "sway-scatter"
60
61
62	# ----------------------------------------------------------------------
63	# Entry point
64	# ----------------------------------------------------------------------
65
66
67	def to_html(suite: SuiteResult, score: SwayScore) -> str:
68	"""Render a ``SuiteResult``/``SwayScore`` pair as a self-contained HTML page.
69
70	Raises
71	------
72	RuntimeError
73	When ``plotly`` is not importable. The CLI catches this and
74	surfaces an install hint (``pip install 'dlm-sway[viz]'``).
75	"""
76	try:
77	import plotly.graph_objects as go
78	import plotly.io as pio
79	from plotly.offline import get_plotlyjs
80	except ImportError as exc:
81	raise RuntimeError(
82	"plotly is required for --format html. Install with: pip install 'dlm-sway[viz]'"
83	) from exc
84
85	gauge_fig = _gauge_figure(go, score)
86	category_fig = _category_figure(go, score)
87	sis_fig = _sis_figure(go, suite)
88	ablation_fig = _ablation_figure(go, suite)
89	scatter_fig = _scatter_figure(go, suite)
90
91	panels: list[tuple[str, str, str]] = [
92	("Composite score", _DIV_GAUGE, _fig_to_div(pio, gauge_fig, _DIV_GAUGE)),
93	("Category breakdown", _DIV_CATEGORY, _fig_to_div(pio, category_fig, _DIV_CATEGORY)),
94	]
95	if sis_fig is not None:
96	panels.append(
97	("Per-section internalization", _DIV_SIS, _fig_to_div(pio, sis_fig, _DIV_SIS))
98	)
99	if ablation_fig is not None:
100	panels.append(
101	(
102	"Adapter-ablation response",
103	_DIV_ABLATION,
104	_fig_to_div(pio, ablation_fig, _DIV_ABLATION),
105	)
106	)
107	panels.append(
108	("Per-probe score vs. z-score", _DIV_SCATTER, _fig_to_div(pio, scatter_fig, _DIV_SCATTER))
109	)
110
111	return _assemble(suite, score, panels, plotly_js=get_plotlyjs())
112
113
114	# ----------------------------------------------------------------------
115	# Figures
116	# ----------------------------------------------------------------------
117
118
119	def _gauge_figure(go: Any, score: SwayScore) -> Any:
120	"""Indicator gauge for the composite score, 0..1 with banded thresholds."""
121	overall = float(score.overall) if score.overall is not None else 0.0
122	return go.Figure(
123	go.Indicator(
124	mode="gauge+number",
125	value=overall,
126	number={"valueformat": ".2f", "font": {"size": 48}},
127	gauge={
128	"axis": {"range": [0.0, 1.0]},
129	"bar": {"color": _band_color(score.band)},
130	"steps": [
131	{"range": [0.00, 0.30], "color": "#f8d7da"}, # noise
132	{"range": [0.30, 0.60], "color": "#fff3cd"}, # partial
133	{"range": [0.60, 0.85], "color": "#d1e7dd"}, # healthy
134	{"range": [0.85, 1.00], "color": "#e2e3ff"}, # suspicious
135	],
136	},
137	title={"text": f"<b>{score.band or 'unscored'}</b>"},
138	),
139	layout=go.Layout(height=320, margin={"l": 20, "r": 20, "t": 60, "b": 20}),
140	)
141
142
143	def _category_figure(go: Any, score: SwayScore) -> Any:
144	"""Horizontal bar chart of per-category contributions."""
145	items = [(cat, float(v)) for cat, v in score.components.items()]
146	items.sort(key=lambda pair: pair[0])
147	labels = [cat for cat, _ in items]
148	values = [v for _, v in items]
149	colors = [_CATEGORY_COLOR.get(cat, "#888888") for cat in labels]
150	return go.Figure(
151	go.Bar(
152	x=values,
153	y=labels,
154	orientation="h",
155	marker={"color": colors},
156	hovertemplate="%{y}: %{x:.3f}<extra></extra>",
157	),
158	layout=go.Layout(
159	xaxis={"range": [0.0, 1.0], "title": "component score"},
160	yaxis={"title": ""},
161	height=260,
162	margin={"l": 100, "r": 20, "t": 30, "b": 40},
163	),
164	)
165
166
167	def _sis_figure(go: Any, suite: SuiteResult) -> Any \| None:
168	"""Per-section internalization bar chart. ``None`` if no data."""
169	probe = _first_probe_of_kind(suite, "section_internalization")
170	if probe is None:
171	return None
172	per_section = probe.evidence.get("per_section")
173	if not per_section or not isinstance(per_section, list):
174	return None
175	labels = [str(row.get("section_id") or row.get("tag") or "?") for row in per_section]
176	values = [float(row.get("effective_sis", 0.0)) for row in per_section]
177	passed = [bool(row.get("passed")) for row in per_section]
178	colors = [_VERDICT_COLOR[Verdict.PASS if p else Verdict.FAIL] for p in passed]
179	return go.Figure(
180	go.Bar(
181	x=labels,
182	y=values,
183	marker={"color": colors},
184	hovertemplate="<b>%{x}</b><br>effective_sis=%{y:.3f}<extra></extra>",
185	),
186	layout=go.Layout(
187	xaxis={"title": "section"},
188	yaxis={"title": "effective_sis (own - leak)"},
189	height=320,
190	margin={"l": 60, "r": 20, "t": 30, "b": 80},
191	),
192	)
193
194
195	def _ablation_figure(go: Any, suite: SuiteResult) -> Any \| None:
196	"""λ vs. divergence response curve. ``None`` if ablation didn't run."""
197	probe = _first_probe_of_kind(suite, "adapter_ablation")
198	if probe is None:
199	return None
200	lambdas = probe.evidence.get("lambdas")
201	divs = probe.evidence.get("mean_divergence_per_lambda")
202	if not lambdas or not divs:
203	return None
204	sat = probe.evidence.get("saturation_lambda")
205	fig = go.Figure(
206	go.Scatter(
207	x=list(lambdas),
208	y=list(divs),
209	mode="lines+markers",
210	marker={"color": _CATEGORY_COLOR["ablation"], "size": 10},
211	line={"color": _CATEGORY_COLOR["ablation"], "width": 2},
212	hovertemplate="λ=%{x}<br>div=%{y:.4f}<extra></extra>",
213	name="divergence",
214	),
215	layout=go.Layout(
216	xaxis={"title": "lambda"},
217	yaxis={"title": "mean divergence"},
218	height=320,
219	margin={"l": 60, "r": 20, "t": 30, "b": 50},
220	),
221	)
222	if sat is not None:
223	fig.add_vline(
224	x=float(sat),
225	line_dash="dash",
226	line_color="#6c757d",
227	annotation_text=f"sat_λ={float(sat):.2f}",
228	annotation_position="top",
229	)
230	return fig
231
232
233	def _scatter_figure(go: Any, suite: SuiteResult) -> Any:
234	"""Score vs. z-score scatter across every probe, colored by verdict."""
235	xs: list[float] = []
236	ys: list[float] = []
237	texts: list[str] = []
238	colors: list[str] = []
239	for p in suite.probes:
240	# Plot only probes with a numeric score; SKIP / ERROR probes
241	# without a score are summarized in the per-row annotation instead
242	# of cluttering the scatter at (0,0).
243	if p.score is None:
244	continue
245	xs.append(float(p.score))
246	ys.append(float(p.z_score) if p.z_score is not None else 0.0)
247	texts.append(
248	f"<b>{html.escape(p.name)}</b><br>"
249	f"kind: {html.escape(p.kind)}<br>"
250	f"verdict: {p.verdict.value}<br>"
251	f"score: {p.score:.3f}<br>"
252	f"z: {'—' if p.z_score is None else f'{p.z_score:+.2f}σ'}"
253	)
254	colors.append(_VERDICT_COLOR.get(p.verdict, "#888888"))
255	return go.Figure(
256	go.Scatter(
257	x=xs,
258	y=ys,
259	mode="markers",
260	marker={"size": 14, "color": colors, "line": {"color": "#333", "width": 1}},
261	text=texts,
262	hovertemplate="%{text}<extra></extra>",
263	),
264	layout=go.Layout(
265	xaxis={"title": "score", "range": [0.0, 1.0]},
266	yaxis={"title": "z-score (σ)", "zeroline": True},
267	height=360,
268	margin={"l": 60, "r": 20, "t": 30, "b": 50},
269	),
270	)
271
272
273	# ----------------------------------------------------------------------
274	# Assembly
275	# ----------------------------------------------------------------------
276
277
278	def _fig_to_div(pio: Any, fig: Any, div_id: str) -> str:
279	"""Render one figure as a div, reusing the JS we embed once in <head>."""
280	return str(
281	pio.to_html(
282	fig,
283	include_plotlyjs=False,
284	full_html=False,
285	div_id=div_id,
286	config={"displaylogo": False, "responsive": True},
287	)
288	)
289
290
291	def _assemble(
292	suite: SuiteResult,
293	score: SwayScore,
294	panels: list[tuple[str, str, str]],
295	*,
296	plotly_js: str,
297	) -> str:
298	"""Stitch the page together: header card, panels, probe table."""
299	title = html.escape(f"sway report — {suite.adapter_id or suite.base_model_id}")
300	verdict_summary = _verdict_summary(suite)
301	header = (
302	f"<h1>{title}</h1>"
303	f"<p class='meta'>"
304	f"base: <code>{html.escape(suite.base_model_id)}</code> · "
305	f"adapter: <code>{html.escape(suite.adapter_id or '—')}</code> · "
306	f"sway {html.escape(suite.sway_version)} · "
307	f"wall: {suite.wall_seconds:.2f}s"
308	f"</p>"
309	f"<p class='summary'><b>overall</b>: {score.overall:.2f} "
310	f"({html.escape(score.band or '—')})"
311	f" · {verdict_summary}</p>"
312	)
313
314	panel_html_parts: list[str] = []
315	for title_, div_id, div_html in panels:
316	panel_html_parts.append(
317	f"<section class='panel' id='panel-{html.escape(div_id)}'>"
318	f"<h2>{html.escape(title_)}</h2>"
319	f"{div_html}"
320	f"</section>"
321	)
322
323	probe_table = _probe_table_html(suite)
324
325	return _TEMPLATE.format(
326	title=title,
327	plotly_js=plotly_js,
328	header=header,
329	panels="\n".join(panel_html_parts),
330	probe_table=probe_table,
331	)
332
333
334	def _verdict_summary(suite: SuiteResult) -> str:
335	counts: dict[Verdict, int] = {}
336	for p in suite.probes:
337	counts[p.verdict] = counts.get(p.verdict, 0) + 1
338	parts = []
339	for v in (Verdict.PASS, Verdict.FAIL, Verdict.WARN, Verdict.SKIP, Verdict.ERROR):
340	if v in counts:
341	parts.append(f"<span class='v-{v.value}'>{counts[v]} {html.escape(v.value)}</span>")
342	return " · ".join(parts) or "no probes ran"
343
344
345	def _probe_table_html(suite: SuiteResult) -> str:
346	"""Textual per-probe table under the charts — same columns as markdown."""
347	rows: list[str] = []
348	for p in suite.probes:
349	rows.append(
350	"<tr>"
351	f"<td>{html.escape(p.name)}</td>"
352	f"<td><code>{html.escape(p.kind)}</code></td>"
353	f"<td class='v-{p.verdict.value}'>{html.escape(p.verdict.value)}</td>"
354	f"<td>{'—' if p.score is None else f'{p.score:.2f}'}</td>"
355	f"<td>{'—' if p.raw is None else f'{p.raw:,.3f}'}</td>"
356	f"<td>{'—' if p.z_score is None else f'{p.z_score:+.2f}σ'}</td>"
357	f"<td class='note'>{html.escape(p.message or '')}</td>"
358	"</tr>"
359	)
360	return (
361	"<section class='probe-table'>"
362	"<h2>Probes</h2>"
363	"<table>"
364	"<thead><tr><th>name</th><th>kind</th><th>verdict</th>"
365	"<th>score</th><th>raw</th><th>z</th><th>note</th></tr></thead>"
366	"<tbody>" + "".join(rows) + "</tbody></table></section>"
367	)
368
369
370	def _first_probe_of_kind(suite: SuiteResult, kind: str) -> ProbeResult \| None:
371	for p in suite.probes:
372	if p.kind == kind and p.score is not None:
373	return p
374	return None
375
376
377	def _band_color(band: str) -> str:
378	return {
379	"noise": "#dc3545",
380	"partial": "#ffc107",
381	"healthy": "#28a745",
382	"suspicious": "#9c27b0",
383	}.get(band, "#6c757d")
384
385
386	# ----------------------------------------------------------------------
387	# Static template. Kept inline (no separate template file) because a
388	# single page has one consumer and a two-file split would be more
389	# ceremony than it's worth at this scale.
390	# ----------------------------------------------------------------------
391
392	_TEMPLATE = """<!doctype html>
393	<html lang="en">
394	<head>
395	<meta charset="utf-8">
396	<meta name="generator" content="sway report html">
397	<title>{title}</title>
398	<style>
399	body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
400	margin: 0; padding: 2rem; color: #222; background: #fafafa; max-width: 1100px;
401	margin-left: auto; margin-right: auto; }}
402	h1 {{ margin-bottom: 0.25rem; }}
403	p.meta {{ color: #666; margin-top: 0; }}
404	p.summary {{ font-size: 1.1rem; margin-top: 0.5rem; }}
405	section.panel {{ background: #fff; border: 1px solid #e0e0e0; border-radius: 6px;
406	padding: 1rem; margin-top: 1rem; }}
407	section.panel h2 {{ margin-top: 0; font-size: 1.1rem; color: #333; }}
408	section.probe-table {{ background: #fff; border: 1px solid #e0e0e0; border-radius: 6px;
409	padding: 1rem; margin-top: 1rem; }}
410	section.probe-table table {{ border-collapse: collapse; width: 100%; font-size: 0.9rem; }}
411	section.probe-table th, section.probe-table td {{ padding: 0.4rem 0.6rem;
412	border-bottom: 1px solid #eee;
413	text-align: left; vertical-align: top; }}
414	section.probe-table td.note {{ color: #555; }}
415	.v-pass {{ color: #28a745; font-weight: bold; }}
416	.v-fail {{ color: #dc3545; font-weight: bold; }}
417	.v-warn {{ color: #c98a00; font-weight: bold; }}
418	.v-skip {{ color: #6c757d; }}
419	.v-error {{ color: #9c27b0; font-weight: bold; }}
420	code {{ font-family: 'Menlo', 'Consolas', monospace; font-size: 0.9em;
421	background: #f0f0f0; padding: 0.05em 0.3em; border-radius: 3px; }}
422	</style>
423	<script type="text/javascript">{plotly_js}</script>
424	</head>
425	<body>
426	{header}
427	{panels}
428	{probe_table}
429	</body>
430	</html>
431	"""
432
433
434	__all__ = ["to_html"]