sway: scaffold standalone subproject (pyproject, LICENSE, README)
- SHA
3732e0b0b7a7c8764be3e80c92afacfb5be47391- Tree
bd74e8a
3732e0b
3732e0b0b7a7c8764be3e80c92afacfb5be47391bd74e8a| Status | File | + | - |
|---|---|---|---|
| A |
LICENSE
|
21 | 0 |
| A |
README.md
|
101 | 0 |
| A |
pyproject.toml
|
203 | 0 |
| A |
src/dlm_sway/backends/__init__.py
|
1 | 0 |
| A |
src/dlm_sway/cli/__init__.py
|
1 | 0 |
| A |
src/dlm_sway/core/__init__.py
|
1 | 0 |
| A |
src/dlm_sway/integrations/__init__.py
|
1 | 0 |
| A |
src/dlm_sway/integrations/dlm/__init__.py
|
1 | 0 |
| A |
src/dlm_sway/probes/__init__.py
|
1 | 0 |
| A |
src/dlm_sway/py.typed
|
0 | 0 |
| A |
src/dlm_sway/suite/__init__.py
|
1 | 0 |
| A |
tests/__init__.py
|
0 | 0 |
| A |
tests/conftest.py
|
24 | 0 |
| A |
tests/fixtures/__init__.py
|
0 | 0 |
| A |
tests/integration/__init__.py
|
0 | 0 |
| A |
tests/unit/__init__.py
|
0 | 0 |
LICENSEadded@@ -0,0 +1,21 @@ | ||
| 1 | +MIT License | |
| 2 | + | |
| 3 | +Copyright (c) 2026 Matt Wolffe | |
| 4 | + | |
| 5 | +Permission is hereby granted, free of charge, to any person obtaining a copy | |
| 6 | +of this software and associated documentation files (the "Software"), to deal | |
| 7 | +in the Software without restriction, including without limitation the rights | |
| 8 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| 9 | +copies of the Software, and to permit persons to whom the Software is | |
| 10 | +furnished to do so, subject to the following conditions: | |
| 11 | + | |
| 12 | +The above copyright notice and this permission notice shall be included in all | |
| 13 | +copies or substantial portions of the Software. | |
| 14 | + | |
| 15 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| 16 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| 17 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| 18 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| 19 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 20 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
| 21 | +SOFTWARE. | |
README.mdadded@@ -0,0 +1,101 @@ | ||
| 1 | +# dlm-sway | |
| 2 | + | |
| 3 | +Differential testing for fine-tuned causal language models. | |
| 4 | + | |
| 5 | +**One question:** *did LoRA/QLoRA training actually change model behavior | |
| 6 | +in a meaningful way, or is the model just defaulting to the pretrained | |
| 7 | +base?* | |
| 8 | + | |
| 9 | +`dlm-sway` gives you a trustworthy, reproducible answer with eleven | |
| 10 | +purpose-built primitives, each z-scored against a null-adapter baseline. | |
| 11 | +No LLM judges. No external APIs. Deterministic on CPU where possible. | |
| 12 | + | |
| 13 | +## Install | |
| 14 | + | |
| 15 | +```bash | |
| 16 | +pip install "dlm-sway[hf]" # HuggingFace + PEFT backend | |
| 17 | +pip install "dlm-sway[hf,style,semsim]" # full primitive battery | |
| 18 | +pip install "dlm-sway[all]" # everything including optional viz | |
| 19 | +pip install "dlm-sway[dlm]" # auto-generate tests from a .dlm file | |
| 20 | +``` | |
| 21 | + | |
| 22 | +## 90-second smoke test | |
| 23 | + | |
| 24 | +```bash | |
| 25 | +dlm-sway check path/to/adapter --base HuggingFaceTB/SmolLM2-135M-Instruct | |
| 26 | +``` | |
| 27 | + | |
| 28 | +Outputs a verdict in under a minute on CPU for small models: *your | |
| 29 | +adapter is 4.2σ above noise* ✅ or *indistinguishable from a null | |
| 30 | +adapter* ❌. | |
| 31 | + | |
| 32 | +## Full suite | |
| 33 | + | |
| 34 | +```yaml | |
| 35 | +# sway.yaml | |
| 36 | +version: 1 | |
| 37 | +models: | |
| 38 | + base: {kind: hf, base: "HuggingFaceTB/SmolLM2-135M-Instruct"} | |
| 39 | + ft: {kind: hf, base: "HuggingFaceTB/SmolLM2-135M-Instruct", | |
| 40 | + adapter: "./runs/adapter/v0003"} | |
| 41 | +suite: | |
| 42 | + - {name: knows_concept, kind: dir, | |
| 43 | + prompt: "The Dunning-Kruger effect describes", | |
| 44 | + target: " a cognitive bias where", | |
| 45 | + distractor: " a programming language"} | |
| 46 | + - {name: no_reversion, kind: adapter_revert, paraphrases: 4} | |
| 47 | + - {name: section_attribution, kind: section_internalization} | |
| 48 | +``` | |
| 49 | + | |
| 50 | +```bash | |
| 51 | +dlm-sway run sway.yaml # full report to terminal + JSON | |
| 52 | +dlm-sway gate sway.yaml --junit # CI-friendly; non-zero on fail | |
| 53 | +``` | |
| 54 | + | |
| 55 | +## Why it exists | |
| 56 | + | |
| 57 | +Standard benchmarks (MMLU, HellaSwag) ask *"how good is this model?"* | |
| 58 | +That's the wrong question after a targeted LoRA fine-tune on a small | |
| 59 | +user-authored document. The right question is *"did the adapter actually | |
| 60 | +move the model toward what I wrote?"* — and existing tools answer this | |
| 61 | +poorly. | |
| 62 | + | |
| 63 | +`dlm-sway` answers it directly via eleven primitives across four | |
| 64 | +categories: | |
| 65 | + | |
| 66 | +| Category | Primitives | | |
| 67 | +|---------------|-------------------------------------------------------| | |
| 68 | +| Adherence | `delta_kl`, `adapter_revert`, `prompt_collapse` | | |
| 69 | +| Attribution | `section_internalization`, `paraphrase_invariance`, `preference_flip` | | |
| 70 | +| Calibration | `style_fingerprint`, `calibration_drift`, `leakage` | | |
| 71 | +| Ablation | `adapter_ablation` ← the signature primitive | | |
| 72 | + | |
| 73 | +**The signature primitive.** `adapter_ablation` scales the LoRA additive | |
| 74 | +term by λ ∈ {0, 0.25, 0.5, 0.75, 1.0, 1.25} and measures the divergence | |
| 75 | +curve. A healthy fine-tune shows a smooth, monotonic, non-saturated | |
| 76 | +response. A degenerate one shows a step function or an overshoot-then- | |
| 77 | +crash. Nobody else does this because nobody else gets this close to the | |
| 78 | +adapter math. | |
| 79 | + | |
| 80 | +## The `.dlm` integration | |
| 81 | + | |
| 82 | +If you trained your adapter via the [DocumentLanguageModel | |
| 83 | +project](https://github.com/tenseleyFlow/DocumentLanguageModel), sway | |
| 84 | +can auto-generate a test suite from your document's sections: | |
| 85 | + | |
| 86 | +```bash | |
| 87 | +pip install "dlm-sway[hf,dlm]" | |
| 88 | +dlm-sway autogen path/to/doc.dlm -o sway.yaml | |
| 89 | +dlm-sway run sway.yaml | |
| 90 | +``` | |
| 91 | + | |
| 92 | +Per-section attribution tells you *which* parts of your document | |
| 93 | +actually moved the model — a kind of signal no other tool provides. | |
| 94 | + | |
| 95 | +## Status | |
| 96 | + | |
| 97 | +Pre-alpha. API will break. Version `0.1.0` is the first tag. | |
| 98 | + | |
| 99 | +## License | |
| 100 | + | |
| 101 | +MIT | |
pyproject.tomladded@@ -0,0 +1,203 @@ | ||
| 1 | +[project] | |
| 2 | +name = "dlm-sway" | |
| 3 | +version = "0.1.0.dev0" | |
| 4 | +description = "Differential testing for fine-tuned causal LMs: did LoRA/QLoRA training actually change behavior, or is the model defaulting to the pretrained base?" | |
| 5 | +readme = "README.md" | |
| 6 | +requires-python = ">=3.11" | |
| 7 | +license = { text = "MIT" } | |
| 8 | +authors = [{ name = "Matt Wolffe", email = "mfwolffe@outlook.com" }] | |
| 9 | +keywords = [ | |
| 10 | + "lora", | |
| 11 | + "qlora", | |
| 12 | + "peft", | |
| 13 | + "fine-tuning", | |
| 14 | + "evaluation", | |
| 15 | + "llm", | |
| 16 | + "differential-testing", | |
| 17 | +] | |
| 18 | +classifiers = [ | |
| 19 | + "Development Status :: 3 - Alpha", | |
| 20 | + "Intended Audience :: Developers", | |
| 21 | + "Intended Audience :: Science/Research", | |
| 22 | + "License :: OSI Approved :: MIT License", | |
| 23 | + "Programming Language :: Python :: 3", | |
| 24 | + "Programming Language :: Python :: 3.11", | |
| 25 | + "Programming Language :: Python :: 3.12", | |
| 26 | + "Topic :: Scientific/Engineering :: Artificial Intelligence", | |
| 27 | +] | |
| 28 | + | |
| 29 | +# Core deps: spec loading, orchestration, reporting. No torch — a user | |
| 30 | +# who only defines specs or writes a custom backend shouldn't pull 3 GB | |
| 31 | +# of CUDA wheels. | |
| 32 | +dependencies = [ | |
| 33 | + "pydantic>=2.9", | |
| 34 | + "pyyaml>=6.0", | |
| 35 | + "typer>=0.12", | |
| 36 | + "rich>=13.7", | |
| 37 | + "numpy>=1.26", | |
| 38 | + "packaging>=24.0", | |
| 39 | +] | |
| 40 | + | |
| 41 | +[project.optional-dependencies] | |
| 42 | +# HuggingFace + PEFT scoring backend. The canonical path. | |
| 43 | +hf = [ | |
| 44 | + "torch>=2.4", | |
| 45 | + "transformers>=4.45", | |
| 46 | + "peft>=0.13", | |
| 47 | + "safetensors>=0.4", | |
| 48 | +] | |
| 49 | +# Apple Silicon inference. Env markers keep `uv sync --extra mlx` a no-op | |
| 50 | +# on non-Apple hosts so Linux/CUDA contributors' wheel resolution stays | |
| 51 | +# sane. | |
| 52 | +mlx = [ | |
| 53 | + "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'", | |
| 54 | + "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'", | |
| 55 | +] | |
| 56 | +# Stylistic fingerprinting (C1). spaCy models pull at runtime via | |
| 57 | +# `python -m spacy download`. | |
| 58 | +style = [ | |
| 59 | + "spacy>=3.7", | |
| 60 | + "textstat>=0.7", | |
| 61 | + "nlpaug>=1.1", | |
| 62 | +] | |
| 63 | +# Semantic similarity (A2). MiniLM ~80 MB, CPU-friendly. | |
| 64 | +semsim = [ | |
| 65 | + "sentence-transformers>=3.0", | |
| 66 | +] | |
| 67 | +# Optional .dlm integration. Only imported inside dlm_sway.integrations.dlm. | |
| 68 | +dlm = [ | |
| 69 | + "dlm>=0.9", | |
| 70 | +] | |
| 71 | +# Visualization (P9). | |
| 72 | +viz = [ | |
| 73 | + "matplotlib>=3.8", | |
| 74 | +] | |
| 75 | +all = [ | |
| 76 | + "torch>=2.4", | |
| 77 | + "transformers>=4.45", | |
| 78 | + "peft>=0.13", | |
| 79 | + "safetensors>=0.4", | |
| 80 | + "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'", | |
| 81 | + "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'", | |
| 82 | + "spacy>=3.7", | |
| 83 | + "textstat>=0.7", | |
| 84 | + "nlpaug>=1.1", | |
| 85 | + "sentence-transformers>=3.0", | |
| 86 | + "matplotlib>=3.8", | |
| 87 | +] | |
| 88 | + | |
| 89 | +[project.scripts] | |
| 90 | +dlm-sway = "dlm_sway.cli.app:main" | |
| 91 | + | |
| 92 | +[project.urls] | |
| 93 | +Homepage = "https://github.com/tenseleyFlow/DocumentLanguageModel" | |
| 94 | +Issues = "https://github.com/tenseleyFlow/DocumentLanguageModel/issues" | |
| 95 | + | |
| 96 | +[dependency-groups] | |
| 97 | +dev = [ | |
| 98 | + "pytest>=8.0", | |
| 99 | + "pytest-cov>=5.0", | |
| 100 | + "mypy>=1.11", | |
| 101 | + "ruff>=0.6", | |
| 102 | + "types-pyyaml>=6.0", | |
| 103 | + "hypothesis>=6.152.1", | |
| 104 | +] | |
| 105 | + | |
| 106 | +[build-system] | |
| 107 | +requires = ["hatchling"] | |
| 108 | +build-backend = "hatchling.build" | |
| 109 | + | |
| 110 | +[tool.hatch.build.targets.wheel] | |
| 111 | +packages = ["src/dlm_sway"] | |
| 112 | + | |
| 113 | +# -------- ruff -------- | |
| 114 | +[tool.ruff] | |
| 115 | +line-length = 100 | |
| 116 | +target-version = "py311" | |
| 117 | +src = ["src", "tests"] | |
| 118 | + | |
| 119 | +[tool.ruff.lint] | |
| 120 | +select = [ | |
| 121 | + "E", # pycodestyle errors | |
| 122 | + "F", # pyflakes | |
| 123 | + "W", # pycodestyle warnings | |
| 124 | + "I", # isort | |
| 125 | + "UP", # pyupgrade | |
| 126 | + "B", # bugbear | |
| 127 | + "N", # pep8-naming | |
| 128 | + "C4", # comprehensions | |
| 129 | + "SIM", # simplify | |
| 130 | + "PT", # pytest | |
| 131 | + "RET", # return | |
| 132 | + "ARG", # unused args | |
| 133 | + "PTH", # use pathlib | |
| 134 | + "TID", # tidy imports | |
| 135 | +] | |
| 136 | +ignore = [ | |
| 137 | + "E501", # handled by formatter | |
| 138 | +] | |
| 139 | + | |
| 140 | +[tool.ruff.lint.per-file-ignores] | |
| 141 | +"tests/**/*.py" = ["ARG", "PT011", "SIM117"] | |
| 142 | + | |
| 143 | +[tool.ruff.lint.flake8-tidy-imports.banned-api] | |
| 144 | +# Hard architectural boundary: the `dlm` package is only importable | |
| 145 | +# from inside the optional integration shim. This keeps dlm-sway | |
| 146 | +# usable for anyone with just a HuggingFace base + PEFT adapter. | |
| 147 | +"dlm".msg = "Import `dlm` only from dlm_sway.integrations.dlm (the optional extra)." | |
| 148 | + | |
| 149 | +[tool.ruff.format] | |
| 150 | +quote-style = "double" | |
| 151 | +indent-style = "space" | |
| 152 | + | |
| 153 | +# -------- mypy -------- | |
| 154 | +[tool.mypy] | |
| 155 | +strict = true | |
| 156 | +python_version = "3.11" | |
| 157 | +packages = ["dlm_sway"] | |
| 158 | +mypy_path = "src" | |
| 159 | +warn_return_any = true | |
| 160 | +warn_unused_ignores = true | |
| 161 | +warn_redundant_casts = true | |
| 162 | +no_implicit_optional = true | |
| 163 | +disallow_untyped_decorators = true | |
| 164 | +plugins = ["pydantic.mypy"] | |
| 165 | + | |
| 166 | +[tool.pydantic-mypy] | |
| 167 | +init_forbid_extra = true | |
| 168 | +init_typed = true | |
| 169 | +warn_required_dynamic_aliases = true | |
| 170 | + | |
| 171 | +# Stubless ML ecosystem packages. Narrow boundaries in backends/* import | |
| 172 | +# them explicitly; the rest of the codebase stays strict. | |
| 173 | +[[tool.mypy.overrides]] | |
| 174 | +module = [ | |
| 175 | + "torch", | |
| 176 | + "torch.*", | |
| 177 | + "transformers.*", | |
| 178 | + "peft.*", | |
| 179 | + "safetensors.*", | |
| 180 | + "mlx.*", | |
| 181 | + "mlx_lm.*", | |
| 182 | + "sentence_transformers.*", | |
| 183 | + "spacy.*", | |
| 184 | + "textstat.*", | |
| 185 | + "nlpaug.*", | |
| 186 | + "huggingface_hub.*", | |
| 187 | + "dlm.*", | |
| 188 | +] | |
| 189 | +ignore_missing_imports = true | |
| 190 | +disable_error_code = ["no-untyped-call"] | |
| 191 | + | |
| 192 | +# -------- pytest -------- | |
| 193 | +[tool.pytest.ini_options] | |
| 194 | +testpaths = ["tests"] | |
| 195 | +addopts = [ | |
| 196 | + "-ra", | |
| 197 | + "-m", "not slow and not gpu and not online", | |
| 198 | +] | |
| 199 | +markers = [ | |
| 200 | + "slow: expensive; deselected by default", | |
| 201 | + "gpu: requires CUDA; skipped on CPU/MPS runners", | |
| 202 | + "online: touches the network; skipped in offline CI", | |
| 203 | +] | |
src/dlm_sway/backends/__init__.pyadded@@ -0,0 +1,1 @@ | ||
| 1 | +"""Scoring backends: HuggingFace (``hf``), MLX (``mlx``), dummy, custom.""" | |
src/dlm_sway/cli/__init__.pyadded@@ -0,0 +1,1 @@ | ||
| 1 | +"""Command-line interface (entry point: ``dlm-sway``).""" | |
src/dlm_sway/core/__init__.pyadded@@ -0,0 +1,1 @@ | ||
| 1 | +"""Core abstractions: protocols, results, errors, determinism.""" | |
src/dlm_sway/integrations/__init__.pyadded@@ -0,0 +1,1 @@ | ||
| 1 | +"""Optional integrations with upstream fine-tuning tools.""" | |
src/dlm_sway/integrations/dlm/__init__.pyadded@@ -0,0 +1,1 @@ | ||
| 1 | +"""DLM project integration. Imports the ``dlm`` package; requires ``dlm-sway[dlm]``.""" | |
src/dlm_sway/probes/__init__.pyadded@@ -0,0 +1,1 @@ | ||
| 1 | +"""Probe primitives. Each module in this package implements one primitive.""" | |
src/dlm_sway/py.typedaddedsrc/dlm_sway/suite/__init__.pyadded@@ -0,0 +1,1 @@ | ||
| 1 | +"""Suite plumbing: spec models, loader, runner, report, composite score.""" | |
tests/__init__.pyaddedtests/conftest.pyadded@@ -0,0 +1,24 @@ | ||
| 1 | +"""Shared test fixtures. | |
| 2 | + | |
| 3 | +Keep the default fast-test environment offline and deterministic so unit | |
| 4 | +tests stay below ~1 s per file. Integration tests override these via | |
| 5 | +their own ``conftest`` when they need network access. | |
| 6 | +""" | |
| 7 | + | |
| 8 | +from __future__ import annotations | |
| 9 | + | |
| 10 | +import pytest | |
| 11 | + | |
| 12 | + | |
| 13 | +@pytest.fixture(autouse=True) | |
| 14 | +def _offline_and_no_telemetry(monkeypatch: pytest.MonkeyPatch) -> None: | |
| 15 | + """Unit tests never touch the network. | |
| 16 | + | |
| 17 | + Any backend test that needs HF should be marked ``@pytest.mark.online`` | |
| 18 | + and clear these vars explicitly. | |
| 19 | + """ | |
| 20 | + monkeypatch.setenv("HF_HUB_OFFLINE", "1") | |
| 21 | + monkeypatch.setenv("TRANSFORMERS_OFFLINE", "1") | |
| 22 | + monkeypatch.setenv("HF_DATASETS_OFFLINE", "1") | |
| 23 | + monkeypatch.setenv("HF_HUB_DISABLE_TELEMETRY", "1") | |
| 24 | + monkeypatch.setenv("DO_NOT_TRACK", "1") | |
tests/fixtures/__init__.pyaddedtests/integration/__init__.pyaddedtests/unit/__init__.pyadded