`228d404`

sway(viz): matplotlib plots for SIS, adapter ablation, KL histogram (viz extra)

Authored by

espadonne 3 weeks ago

SHA: 228d4049a2215a71a26a361fe8db2926bb5a429f
Parents: 2e074c6
Tree: fe511b1

3 changed files

Status	File	+
M	`pyproject.toml`	2
A	`src/dlm_sway/visualize.py`	137
A	`tests/unit/test_visualize.py`	202

pyproject.tomlmodified

      "spacy.*",
      "textstat.*",
      "nlpaug.*",
 +    "matplotlib",
 +    "matplotlib.*",
      "huggingface_hub.*",
      "dlm.*",
+ ]

src/dlm_sway/visualize.pyadded

 +"""Optional matplotlib-based visualizations.
++
 +Behind the ``viz`` extra. Three functions cover the three plots that
 +make the sway report come alive in a notebook or saved PNG:
++
 +- :func:`plot_section_sis`: per-section bar chart of effective SIS
 +  (the flagship attribution view).
 +- :func:`plot_adapter_ablation`: the λ-scaled divergence curve — the
 +  sway signature plot.
 +- :func:`plot_kl_histogram`: distribution of per-prompt KL divergences
 +  (the raw data behind A1 DeltaKL).
++
 +Each function raises :class:`~dlm_sway.core.errors.BackendNotAvailableError`
 +with a pip hint when matplotlib isn't installed. No function writes to
 +disk on your behalf — the caller decides (``fig.savefig(...)``).
 +"""
++
 +from __future__ import annotations
++
 +from typing import Any
++
 +from dlm_sway.core.errors import BackendNotAvailableError
 +from dlm_sway.core.result import SuiteResult
++
++
 +def _require_mpl() -> Any:
 +    try:
 +        import matplotlib.pyplot as plt
++
 +        return plt
 +    except ImportError as exc:
 +        raise BackendNotAvailableError(
 +            "visualize",
 +            extra="viz",
 +            hint="sway's visualization module needs matplotlib.",
 +        ) from exc
++
++
 +def plot_section_sis(suite: SuiteResult) -> Any:
 +    """Render a per-section ``effective_sis`` bar chart.
++
 +    Returns the matplotlib ``Figure``; the caller handles display / save.
 +    """
 +    plt = _require_mpl()
++
 +    probe = _find_probe(suite, "section_internalization")
 +    if probe is None or not probe.evidence.get("per_section"):
 +        raise ValueError("suite has no section_internalization evidence to plot")
++
 +    rows: list[dict[str, Any]] = list(probe.evidence["per_section"])
 +    labels = [f"{row['tag'] or row['section_id'][:8]}\n({row['kind']})" for row in rows]
 +    values = [float(row["effective_sis"]) for row in rows]
 +    colors = ["#2ca02c" if row["passed"] else "#d62728" for row in rows]
++
 +    fig, ax = plt.subplots(figsize=(max(6.0, 0.7 * len(rows)), 4.0))
 +    ax.bar(range(len(rows)), values, color=colors)
 +    ax.axhline(
 +        float(probe.evidence.get("per_section_threshold", 0.0)),
 +        color="gray",
 +        linestyle="--",
 +        linewidth=1,
 +        label="threshold",
 +    )
 +    ax.set_xticks(range(len(rows)))
 +    ax.set_xticklabels(labels, rotation=30, ha="right")
 +    ax.set_ylabel("effective SIS")
 +    ax.set_title("Section Internalization Score")
 +    ax.legend(loc="best")
 +    fig.tight_layout()
 +    return fig
++
++
 +def plot_adapter_ablation(suite: SuiteResult) -> Any:
 +    """Render the signature λ-scaled divergence curve."""
 +    plt = _require_mpl()
++
 +    probe = _find_probe(suite, "adapter_ablation")
 +    if probe is None or not probe.evidence.get("lambdas"):
 +        raise ValueError("suite has no adapter_ablation evidence to plot")
++
 +    lambdas = list(probe.evidence["lambdas"])
 +    divs = list(probe.evidence["mean_divergence_per_lambda"])
++
 +    fig, ax = plt.subplots(figsize=(7.0, 4.0))
 +    ax.plot(lambdas, divs, marker="o", linewidth=2, color="#1f77b4")
 +    ax.axvline(1.0, color="gray", linestyle=":", linewidth=1, label="λ=1 (trained)")
 +    sat = probe.evidence.get("saturation_lambda")
 +    if sat is not None:
 +        ax.axvline(
 +            float(sat),
 +            color="#2ca02c",
 +            linestyle="--",
 +            linewidth=1,
 +            label=f"sat λ={float(sat):.2f}",
 +        )
 +    ax.set_xlabel("λ (adapter scale)")
 +    ax.set_ylabel("mean JS divergence vs λ=0")
 +    ax.set_title(
 +        f"Adapter Ablation (R²={float(probe.evidence.get('linearity', 0.0)):.2f}, "
 +        f"overshoot={float(probe.evidence.get('overshoot', 0.0)):.2f})"
 +    )
 +    ax.legend(loc="best")
 +    fig.tight_layout()
 +    return fig
++
++
 +def plot_kl_histogram(suite: SuiteResult) -> Any:
 +    """Render the per-prompt KL distribution from a DeltaKL probe."""
 +    plt = _require_mpl()
++
 +    probe = _find_probe(suite, "delta_kl")
 +    if probe is None or not probe.evidence.get("per_prompt"):
 +        raise ValueError("suite has no delta_kl evidence to plot")
++
 +    values = list(probe.evidence["per_prompt"])
 +    fig, ax = plt.subplots(figsize=(7.0, 4.0))
 +    ax.hist(values, bins=max(5, min(20, len(values) // 2)), color="#ff7f0e", edgecolor="white")
 +    ax.axvline(
 +        float(probe.raw or 0.0),
 +        color="black",
 +        linestyle="--",
 +        linewidth=1,
 +        label=f"mean={float(probe.raw or 0.0):.3f}",
 +    )
 +    ax.set_xlabel(probe.evidence.get("divergence_kind", "divergence"))
 +    ax.set_ylabel("count")
 +    ax.set_title("DeltaKL — per-prompt distribution")
 +    ax.legend(loc="best")
 +    fig.tight_layout()
 +    return fig
++
++
 +def _find_probe(suite: SuiteResult, kind: str) -> Any:
 +    for p in suite.probes:
 +        if p.kind == kind:
 +            return p
 +    return None

tests/unit/test_visualize.pyadded

 +"""Tests for :mod:`dlm_sway.visualize`.
++
 +Exercises the error path (matplotlib missing) and the happy path when
 +the module is present by stubbing ``matplotlib.pyplot`` via sys.modules.
 +"""
++
 +from __future__ import annotations
++
 +import sys
 +import types
 +from datetime import timedelta
++
 +import pytest
++
 +from dlm_sway.core.errors import BackendNotAvailableError
 +from dlm_sway.core.result import ProbeResult, SuiteResult, Verdict, utcnow
++
++
 +def _suite_with(*probes: ProbeResult) -> SuiteResult:
 +    started = utcnow()
 +    return SuiteResult(
 +        spec_path="sway.yaml",
 +        started_at=started,
 +        finished_at=started + timedelta(seconds=1),
 +        base_model_id="b",
 +        adapter_id="a",
 +        sway_version="0.1.0.dev0",
 +        probes=probes,
 +    )
++
++
 +class _FakeFig:
 +    def tight_layout(self) -> None:  # pragma: no cover — trivial
 +        return None
++
++
 +class _FakeAx:
 +    def __init__(self) -> None:
 +        self.calls: list[str] = []
++
 +    def bar(self, *a, **k):  # type: ignore[no-untyped-def]
 +        self.calls.append("bar")
++
 +    def plot(self, *a, **k):  # type: ignore[no-untyped-def]
 +        self.calls.append("plot")
++
 +    def hist(self, *a, **k):  # type: ignore[no-untyped-def]
 +        self.calls.append("hist")
++
 +    def axhline(self, *a, **k):  # type: ignore[no-untyped-def]
 +        return None
++
 +    def axvline(self, *a, **k):  # type: ignore[no-untyped-def]
 +        return None
++
 +    def set_xticks(self, *a, **k):  # type: ignore[no-untyped-def]
 +        return None
++
 +    def set_xticklabels(self, *a, **k):  # type: ignore[no-untyped-def]
 +        return None
++
 +    def set_xlabel(self, *a, **k):  # type: ignore[no-untyped-def]
 +        return None
++
 +    def set_ylabel(self, *a, **k):  # type: ignore[no-untyped-def]
 +        return None
++
 +    def set_title(self, *a, **k):  # type: ignore[no-untyped-def]
 +        return None
++
 +    def legend(self, *a, **k):  # type: ignore[no-untyped-def]
 +        return None
++
++
 +@pytest.fixture
 +def fake_mpl(monkeypatch: pytest.MonkeyPatch) -> _FakeAx:
 +    ax = _FakeAx()
++
 +    def _subplots(*a, **k):  # type: ignore[no-untyped-def]
 +        return _FakeFig(), ax
++
 +    plt = types.ModuleType("matplotlib.pyplot")
 +    plt.subplots = _subplots  # type: ignore[attr-defined]
 +    mpl_pkg = types.ModuleType("matplotlib")
 +    monkeypatch.setitem(sys.modules, "matplotlib", mpl_pkg)
 +    monkeypatch.setitem(sys.modules, "matplotlib.pyplot", plt)
 +    return ax
++
++
 +def test_section_sis_plot_uses_per_section_evidence(fake_mpl: _FakeAx) -> None:
 +    from dlm_sway.visualize import plot_section_sis
++
 +    suite = _suite_with(
 +        ProbeResult(
 +            name="sis",
 +            kind="section_internalization",
 +            verdict=Verdict.PASS,
 +            score=0.75,
 +            raw=0.1,
 +            evidence={
 +                "per_section": [
 +                    {
 +                        "section_id": "a",
 +                        "kind": "prose",
 +                        "tag": None,
 +                        "base_nll": 3.0,
 +                        "ft_nll": 2.5,
 +                        "own_lift": 0.17,
 +                        "leak_lift": 0.02,
 +                        "effective_sis": 0.15,
 +                        "passed": True,
 +                    },
 +                    {
 +                        "section_id": "b",
 +                        "kind": "instruction",
 +                        "tag": "intro",
 +                        "base_nll": 4.0,
 +                        "ft_nll": 3.9,
 +                        "own_lift": 0.025,
 +                        "leak_lift": 0.03,
 +                        "effective_sis": -0.005,
 +                        "passed": False,
 +                    },
 +                ],
 +                "per_section_threshold": 0.05,
 +            },
 +        )
 +    )
 +    plot_section_sis(suite)
 +    assert "bar" in fake_mpl.calls
++
++
 +def test_adapter_ablation_plot(fake_mpl: _FakeAx) -> None:
 +    from dlm_sway.visualize import plot_adapter_ablation
++
 +    suite = _suite_with(
 +        ProbeResult(
 +            name="abl",
 +            kind="adapter_ablation",
 +            verdict=Verdict.PASS,
 +            score=0.8,
 +            raw=0.9,
 +            evidence={
 +                "lambdas": [0.0, 0.5, 1.0, 1.25],
 +                "mean_divergence_per_lambda": [0.0, 0.5, 1.0, 1.1],
 +                "linearity": 0.91,
 +                "saturation_lambda": 0.75,
 +                "overshoot": 1.1,
 +            },
 +        )
 +    )
 +    plot_adapter_ablation(suite)
 +    assert "plot" in fake_mpl.calls
++
++
 +def test_kl_histogram_plot(fake_mpl: _FakeAx) -> None:
 +    from dlm_sway.visualize import plot_kl_histogram
++
 +    suite = _suite_with(
 +        ProbeResult(
 +            name="dk",
 +            kind="delta_kl",
 +            verdict=Verdict.PASS,
 +            score=0.7,
 +            raw=0.1,
 +            evidence={"per_prompt": [0.05, 0.1, 0.12, 0.09, 0.15], "divergence_kind": "js"},
 +        )
 +    )
 +    plot_kl_histogram(suite)
 +    assert "hist" in fake_mpl.calls
++
++
 +def test_raises_when_matplotlib_missing(monkeypatch: pytest.MonkeyPatch) -> None:
 +    # Purge matplotlib modules and block imports.
 +    for mod in list(sys.modules):
 +        if mod == "matplotlib" or mod.startswith("matplotlib."):
 +            monkeypatch.delitem(sys.modules, mod, raising=False)
++
 +    import builtins
++
 +    real_import = builtins.__import__
++
 +    def fake_import(name: str, *a, **k):  # type: ignore[no-untyped-def]
 +        if name == "matplotlib" or name.startswith("matplotlib."):
 +            raise ImportError("matplotlib missing in this venv")
 +        return real_import(name, *a, **k)
++
 +    monkeypatch.setattr(builtins, "__import__", fake_import)
++
 +    from dlm_sway.visualize import plot_section_sis
++
 +    suite = _suite_with()
 +    with pytest.raises(BackendNotAvailableError):
 +        plot_section_sis(suite)
++
++
 +def test_raises_when_no_matching_probe(fake_mpl: _FakeAx) -> None:
 +    from dlm_sway.visualize import plot_section_sis
++
 +    suite = _suite_with()  # empty — no section_internalization probe
 +    with pytest.raises(ValueError, match="section_internalization"):
 +        plot_section_sis(suite)