`3732e0b`

sway: scaffold standalone subproject (pyproject, LICENSE, README)

Authored by

espadonne 3 weeks ago

SHA: 3732e0b0b7a7c8764be3e80c92afacfb5be47391
Tree: bd74e8a

16 changed files

Status	File	+
A	`LICENSE`	21
A	`README.md`	101
A	`pyproject.toml`	203
A	`src/dlm_sway/backends/__init__.py`	1
A	`src/dlm_sway/cli/__init__.py`	1
A	`src/dlm_sway/core/__init__.py`	1
A	`src/dlm_sway/integrations/__init__.py`	1
A	`src/dlm_sway/integrations/dlm/__init__.py`	1
A	`src/dlm_sway/probes/__init__.py`	1
A	`src/dlm_sway/py.typed`	0
A	`src/dlm_sway/suite/__init__.py`	1
A	`tests/__init__.py`	0
A	`tests/conftest.py`	24
A	`tests/fixtures/__init__.py`	0
A	`tests/integration/__init__.py`	0
A	`tests/unit/__init__.py`	0

LICENSEadded

 +MIT License
++
 +Copyright (c) 2026 Matt Wolffe
++
 +Permission is hereby granted, free of charge, to any person obtaining a copy
 +of this software and associated documentation files (the "Software"), to deal
 +in the Software without restriction, including without limitation the rights
 +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 +copies of the Software, and to permit persons to whom the Software is
 +furnished to do so, subject to the following conditions:
++
 +The above copyright notice and this permission notice shall be included in all
 +copies or substantial portions of the Software.
++
 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 +SOFTWARE.

README.mdadded

 +# dlm-sway
++
 +Differential testing for fine-tuned causal language models.
++
 +**One question:** *did LoRA/QLoRA training actually change model behavior
 +in a meaningful way, or is the model just defaulting to the pretrained
 +base?*
++
 +`dlm-sway` gives you a trustworthy, reproducible answer with eleven
 +purpose-built primitives, each z-scored against a null-adapter baseline.
 +No LLM judges. No external APIs. Deterministic on CPU where possible.
++
 +## Install
++
 +```bash
 +pip install "dlm-sway[hf]"                # HuggingFace + PEFT backend
 +pip install "dlm-sway[hf,style,semsim]"   # full primitive battery
 +pip install "dlm-sway[all]"               # everything including optional viz
 +pip install "dlm-sway[dlm]"               # auto-generate tests from a .dlm file
 +```
++
 +## 90-second smoke test
++
 +```bash
 +dlm-sway check path/to/adapter --base HuggingFaceTB/SmolLM2-135M-Instruct
 +```
++
 +Outputs a verdict in under a minute on CPU for small models: *your
 +adapter is 4.2σ above noise* ✅ or *indistinguishable from a null
 +adapter* ❌.
++
 +## Full suite
++
 +```yaml
 +# sway.yaml
 +version: 1
 +models:
 +  base: {kind: hf, base: "HuggingFaceTB/SmolLM2-135M-Instruct"}
 +  ft:   {kind: hf, base: "HuggingFaceTB/SmolLM2-135M-Instruct",
 +         adapter: "./runs/adapter/v0003"}
 +suite:
 +  - {name: knows_concept, kind: dir,
 +     prompt: "The Dunning-Kruger effect describes",
 +     target: " a cognitive bias where",
 +     distractor: " a programming language"}
 +  - {name: no_reversion, kind: adapter_revert, paraphrases: 4}
 +  - {name: section_attribution, kind: section_internalization}
 +```
++
 +```bash
 +dlm-sway run sway.yaml              # full report to terminal + JSON
 +dlm-sway gate sway.yaml --junit     # CI-friendly; non-zero on fail
 +```
++
 +## Why it exists
++
 +Standard benchmarks (MMLU, HellaSwag) ask *"how good is this model?"*
 +That's the wrong question after a targeted LoRA fine-tune on a small
 +user-authored document. The right question is *"did the adapter actually
 +move the model toward what I wrote?"* — and existing tools answer this
 +poorly.
++
 +`dlm-sway` answers it directly via eleven primitives across four
 +categories:
++
 +| Category      | Primitives                                            |
 +|---------------|-------------------------------------------------------|
 +| Adherence     | `delta_kl`, `adapter_revert`, `prompt_collapse`       |
 +| Attribution   | `section_internalization`, `paraphrase_invariance`, `preference_flip` |
 +| Calibration   | `style_fingerprint`, `calibration_drift`, `leakage`   |
 +| Ablation      | `adapter_ablation` ← the signature primitive          |
++
 +**The signature primitive.** `adapter_ablation` scales the LoRA additive
 +term by λ ∈ {0, 0.25, 0.5, 0.75, 1.0, 1.25} and measures the divergence
 +curve. A healthy fine-tune shows a smooth, monotonic, non-saturated
 +response. A degenerate one shows a step function or an overshoot-then-
 +crash. Nobody else does this because nobody else gets this close to the
 +adapter math.
++
 +## The `.dlm` integration
++
 +If you trained your adapter via the [DocumentLanguageModel
 +project](https://github.com/tenseleyFlow/DocumentLanguageModel), sway
 +can auto-generate a test suite from your document's sections:
++
 +```bash
 +pip install "dlm-sway[hf,dlm]"
 +dlm-sway autogen path/to/doc.dlm -o sway.yaml
 +dlm-sway run sway.yaml
 +```
++
 +Per-section attribution tells you *which* parts of your document
 +actually moved the model — a kind of signal no other tool provides.
++
 +## Status
++
 +Pre-alpha. API will break. Version `0.1.0` is the first tag.
++
 +## License
++
 +MIT

pyproject.tomladded

 +[project]
 +name = "dlm-sway"
 +version = "0.1.0.dev0"
 +description = "Differential testing for fine-tuned causal LMs: did LoRA/QLoRA training actually change behavior, or is the model defaulting to the pretrained base?"
 +readme = "README.md"
 +requires-python = ">=3.11"
 +license = { text = "MIT" }
 +authors = [{ name = "Matt Wolffe", email = "mfwolffe@outlook.com" }]
 +keywords = [
 +    "lora",
 +    "qlora",
 +    "peft",
 +    "fine-tuning",
 +    "evaluation",
 +    "llm",
 +    "differential-testing",
 +]
 +classifiers = [
 +    "Development Status :: 3 - Alpha",
 +    "Intended Audience :: Developers",
 +    "Intended Audience :: Science/Research",
 +    "License :: OSI Approved :: MIT License",
 +    "Programming Language :: Python :: 3",
 +    "Programming Language :: Python :: 3.11",
 +    "Programming Language :: Python :: 3.12",
 +    "Topic :: Scientific/Engineering :: Artificial Intelligence",
 +]
++
 +# Core deps: spec loading, orchestration, reporting. No torch — a user
 +# who only defines specs or writes a custom backend shouldn't pull 3 GB
 +# of CUDA wheels.
 +dependencies = [
 +    "pydantic>=2.9",
 +    "pyyaml>=6.0",
 +    "typer>=0.12",
 +    "rich>=13.7",
 +    "numpy>=1.26",
 +    "packaging>=24.0",
 +]
++
 +[project.optional-dependencies]
 +# HuggingFace + PEFT scoring backend. The canonical path.
 +hf = [
 +    "torch>=2.4",
 +    "transformers>=4.45",
 +    "peft>=0.13",
 +    "safetensors>=0.4",
 +]
 +# Apple Silicon inference. Env markers keep `uv sync --extra mlx` a no-op
 +# on non-Apple hosts so Linux/CUDA contributors' wheel resolution stays
 +# sane.
 +mlx = [
 +    "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
 +    "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
 +]
 +# Stylistic fingerprinting (C1). spaCy models pull at runtime via
 +# `python -m spacy download`.
 +style = [
 +    "spacy>=3.7",
 +    "textstat>=0.7",
 +    "nlpaug>=1.1",
 +]
 +# Semantic similarity (A2). MiniLM ~80 MB, CPU-friendly.
 +semsim = [
 +    "sentence-transformers>=3.0",
 +]
 +# Optional .dlm integration. Only imported inside dlm_sway.integrations.dlm.
 +dlm = [
 +    "dlm>=0.9",
 +]
 +# Visualization (P9).
 +viz = [
 +    "matplotlib>=3.8",
 +]
 +all = [
 +    "torch>=2.4",
 +    "transformers>=4.45",
 +    "peft>=0.13",
 +    "safetensors>=0.4",
 +    "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
 +    "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
 +    "spacy>=3.7",
 +    "textstat>=0.7",
 +    "nlpaug>=1.1",
 +    "sentence-transformers>=3.0",
 +    "matplotlib>=3.8",
 +]
++
 +[project.scripts]
 +dlm-sway = "dlm_sway.cli.app:main"
++
 +[project.urls]
 +Homepage = "https://github.com/tenseleyFlow/DocumentLanguageModel"
 +Issues = "https://github.com/tenseleyFlow/DocumentLanguageModel/issues"
++
 +[dependency-groups]
 +dev = [
 +    "pytest>=8.0",
 +    "pytest-cov>=5.0",
 +    "mypy>=1.11",
 +    "ruff>=0.6",
 +    "types-pyyaml>=6.0",
 +    "hypothesis>=6.152.1",
 +]
++
 +[build-system]
 +requires = ["hatchling"]
 +build-backend = "hatchling.build"
++
 +[tool.hatch.build.targets.wheel]
 +packages = ["src/dlm_sway"]
++
 +# -------- ruff --------
 +[tool.ruff]
 +line-length = 100
 +target-version = "py311"
 +src = ["src", "tests"]
++
 +[tool.ruff.lint]
 +select = [
 +    "E",    # pycodestyle errors
 +    "F",    # pyflakes
 +    "W",    # pycodestyle warnings
 +    "I",    # isort
 +    "UP",   # pyupgrade
 +    "B",    # bugbear
 +    "N",    # pep8-naming
 +    "C4",   # comprehensions
 +    "SIM",  # simplify
 +    "PT",   # pytest
 +    "RET",  # return
 +    "ARG",  # unused args
 +    "PTH",  # use pathlib
 +    "TID",  # tidy imports
 +]
 +ignore = [
 +    "E501",  # handled by formatter
 +]
++
 +[tool.ruff.lint.per-file-ignores]
 +"tests/**/*.py" = ["ARG", "PT011", "SIM117"]
++
 +[tool.ruff.lint.flake8-tidy-imports.banned-api]
 +# Hard architectural boundary: the `dlm` package is only importable
 +# from inside the optional integration shim. This keeps dlm-sway
 +# usable for anyone with just a HuggingFace base + PEFT adapter.
 +"dlm".msg = "Import `dlm` only from dlm_sway.integrations.dlm (the optional extra)."
++
 +[tool.ruff.format]
 +quote-style = "double"
 +indent-style = "space"
++
 +# -------- mypy --------
 +[tool.mypy]
 +strict = true
 +python_version = "3.11"
 +packages = ["dlm_sway"]
 +mypy_path = "src"
 +warn_return_any = true
 +warn_unused_ignores = true
 +warn_redundant_casts = true
 +no_implicit_optional = true
 +disallow_untyped_decorators = true
 +plugins = ["pydantic.mypy"]
++
 +[tool.pydantic-mypy]
 +init_forbid_extra = true
 +init_typed = true
 +warn_required_dynamic_aliases = true
++
 +# Stubless ML ecosystem packages. Narrow boundaries in backends/* import
 +# them explicitly; the rest of the codebase stays strict.
 +[[tool.mypy.overrides]]
 +module = [
 +    "torch",
 +    "torch.*",
 +    "transformers.*",
 +    "peft.*",
 +    "safetensors.*",
 +    "mlx.*",
 +    "mlx_lm.*",
 +    "sentence_transformers.*",
 +    "spacy.*",
 +    "textstat.*",
 +    "nlpaug.*",
 +    "huggingface_hub.*",
 +    "dlm.*",
 +]
 +ignore_missing_imports = true
 +disable_error_code = ["no-untyped-call"]
++
 +# -------- pytest --------
 +[tool.pytest.ini_options]
 +testpaths = ["tests"]
 +addopts = [
 +    "-ra",
 +    "-m", "not slow and not gpu and not online",
 +]
 +markers = [
 +    "slow: expensive; deselected by default",
 +    "gpu: requires CUDA; skipped on CPU/MPS runners",
 +    "online: touches the network; skipped in offline CI",
 +]

src/dlm_sway/backends/__init__.pyadded

`@@ -0,0 +1,1 @@`
	1	+"""Scoring backends: HuggingFace (``hf``), MLX (``mlx``), dummy, custom."""

src/dlm_sway/cli/__init__.pyadded

`@@ -0,0 +1,1 @@`
	1	+"""Command-line interface (entry point: ``dlm-sway``)."""

src/dlm_sway/core/__init__.pyadded

`@@ -0,0 +1,1 @@`
	1	+"""Core abstractions: protocols, results, errors, determinism."""

src/dlm_sway/integrations/__init__.pyadded

`@@ -0,0 +1,1 @@`
	1	+"""Optional integrations with upstream fine-tuning tools."""

src/dlm_sway/integrations/dlm/__init__.pyadded

`@@ -0,0 +1,1 @@`
	1	+"""DLM project integration. Imports the ``dlm`` package; requires ``dlm-sway[dlm]``."""

src/dlm_sway/probes/__init__.pyadded

`@@ -0,0 +1,1 @@`
	1	+"""Probe primitives. Each module in this package implements one primitive."""

src/dlm_sway/py.typedadded

src/dlm_sway/suite/__init__.pyadded

`@@ -0,0 +1,1 @@`
	1	+"""Suite plumbing: spec models, loader, runner, report, composite score."""

tests/__init__.pyadded

tests/conftest.pyadded

 +"""Shared test fixtures.
++
 +Keep the default fast-test environment offline and deterministic so unit
 +tests stay below ~1 s per file. Integration tests override these via
 +their own ``conftest`` when they need network access.
 +"""
++
 +from __future__ import annotations
++
 +import pytest
++
++
 +@pytest.fixture(autouse=True)
 +def _offline_and_no_telemetry(monkeypatch: pytest.MonkeyPatch) -> None:
 +    """Unit tests never touch the network.
++
 +    Any backend test that needs HF should be marked ``@pytest.mark.online``
 +    and clear these vars explicitly.
 +    """
 +    monkeypatch.setenv("HF_HUB_OFFLINE", "1")
 +    monkeypatch.setenv("TRANSFORMERS_OFFLINE", "1")
 +    monkeypatch.setenv("HF_DATASETS_OFFLINE", "1")
 +    monkeypatch.setenv("HF_HUB_DISABLE_TELEMETRY", "1")
 +    monkeypatch.setenv("DO_NOT_TRACK", "1")

tests/fixtures/__init__.pyadded

tests/integration/__init__.pyadded

tests/unit/__init__.pyadded