tenseleyflow/sway / 3732e0b

Browse files

sway: scaffold standalone subproject (pyproject, LICENSE, README)

Authored by espadonne
SHA
3732e0b0b7a7c8764be3e80c92afacfb5be47391
Tree
bd74e8a

16 changed files

StatusFile+-
A LICENSE 21 0
A README.md 101 0
A pyproject.toml 203 0
A src/dlm_sway/backends/__init__.py 1 0
A src/dlm_sway/cli/__init__.py 1 0
A src/dlm_sway/core/__init__.py 1 0
A src/dlm_sway/integrations/__init__.py 1 0
A src/dlm_sway/integrations/dlm/__init__.py 1 0
A src/dlm_sway/probes/__init__.py 1 0
A src/dlm_sway/py.typed 0 0
A src/dlm_sway/suite/__init__.py 1 0
A tests/__init__.py 0 0
A tests/conftest.py 24 0
A tests/fixtures/__init__.py 0 0
A tests/integration/__init__.py 0 0
A tests/unit/__init__.py 0 0
LICENSEadded
@@ -0,0 +1,21 @@
1
+MIT License
2
+
3
+Copyright (c) 2026 Matt Wolffe
4
+
5
+Permission is hereby granted, free of charge, to any person obtaining a copy
6
+of this software and associated documentation files (the "Software"), to deal
7
+in the Software without restriction, including without limitation the rights
8
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+copies of the Software, and to permit persons to whom the Software is
10
+furnished to do so, subject to the following conditions:
11
+
12
+The above copyright notice and this permission notice shall be included in all
13
+copies or substantial portions of the Software.
14
+
15
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+SOFTWARE.
README.mdadded
@@ -0,0 +1,101 @@
1
+# dlm-sway
2
+
3
+Differential testing for fine-tuned causal language models.
4
+
5
+**One question:** *did LoRA/QLoRA training actually change model behavior
6
+in a meaningful way, or is the model just defaulting to the pretrained
7
+base?*
8
+
9
+`dlm-sway` gives you a trustworthy, reproducible answer with eleven
10
+purpose-built primitives, each z-scored against a null-adapter baseline.
11
+No LLM judges. No external APIs. Deterministic on CPU where possible.
12
+
13
+## Install
14
+
15
+```bash
16
+pip install "dlm-sway[hf]"                # HuggingFace + PEFT backend
17
+pip install "dlm-sway[hf,style,semsim]"   # full primitive battery
18
+pip install "dlm-sway[all]"               # everything including optional viz
19
+pip install "dlm-sway[dlm]"               # auto-generate tests from a .dlm file
20
+```
21
+
22
+## 90-second smoke test
23
+
24
+```bash
25
+dlm-sway check path/to/adapter --base HuggingFaceTB/SmolLM2-135M-Instruct
26
+```
27
+
28
+Outputs a verdict in under a minute on CPU for small models: *your
29
+adapter is 4.2σ above noise* ✅ or *indistinguishable from a null
30
+adapter* ❌.
31
+
32
+## Full suite
33
+
34
+```yaml
35
+# sway.yaml
36
+version: 1
37
+models:
38
+  base: {kind: hf, base: "HuggingFaceTB/SmolLM2-135M-Instruct"}
39
+  ft:   {kind: hf, base: "HuggingFaceTB/SmolLM2-135M-Instruct",
40
+         adapter: "./runs/adapter/v0003"}
41
+suite:
42
+  - {name: knows_concept, kind: dir,
43
+     prompt: "The Dunning-Kruger effect describes",
44
+     target: " a cognitive bias where",
45
+     distractor: " a programming language"}
46
+  - {name: no_reversion, kind: adapter_revert, paraphrases: 4}
47
+  - {name: section_attribution, kind: section_internalization}
48
+```
49
+
50
+```bash
51
+dlm-sway run sway.yaml              # full report to terminal + JSON
52
+dlm-sway gate sway.yaml --junit     # CI-friendly; non-zero on fail
53
+```
54
+
55
+## Why it exists
56
+
57
+Standard benchmarks (MMLU, HellaSwag) ask *"how good is this model?"*
58
+That's the wrong question after a targeted LoRA fine-tune on a small
59
+user-authored document. The right question is *"did the adapter actually
60
+move the model toward what I wrote?"* — and existing tools answer this
61
+poorly.
62
+
63
+`dlm-sway` answers it directly via eleven primitives across four
64
+categories:
65
+
66
+| Category      | Primitives                                            |
67
+|---------------|-------------------------------------------------------|
68
+| Adherence     | `delta_kl`, `adapter_revert`, `prompt_collapse`       |
69
+| Attribution   | `section_internalization`, `paraphrase_invariance`, `preference_flip` |
70
+| Calibration   | `style_fingerprint`, `calibration_drift`, `leakage`   |
71
+| Ablation      | `adapter_ablation` ← the signature primitive          |
72
+
73
+**The signature primitive.** `adapter_ablation` scales the LoRA additive
74
+term by λ ∈ {0, 0.25, 0.5, 0.75, 1.0, 1.25} and measures the divergence
75
+curve. A healthy fine-tune shows a smooth, monotonic, non-saturated
76
+response. A degenerate one shows a step function or an overshoot-then-
77
+crash. Nobody else does this because nobody else gets this close to the
78
+adapter math.
79
+
80
+## The `.dlm` integration
81
+
82
+If you trained your adapter via the [DocumentLanguageModel
83
+project](https://github.com/tenseleyFlow/DocumentLanguageModel), sway
84
+can auto-generate a test suite from your document's sections:
85
+
86
+```bash
87
+pip install "dlm-sway[hf,dlm]"
88
+dlm-sway autogen path/to/doc.dlm -o sway.yaml
89
+dlm-sway run sway.yaml
90
+```
91
+
92
+Per-section attribution tells you *which* parts of your document
93
+actually moved the model — a kind of signal no other tool provides.
94
+
95
+## Status
96
+
97
+Pre-alpha. API will break. Version `0.1.0` is the first tag.
98
+
99
+## License
100
+
101
+MIT
pyproject.tomladded
@@ -0,0 +1,203 @@
1
+[project]
2
+name = "dlm-sway"
3
+version = "0.1.0.dev0"
4
+description = "Differential testing for fine-tuned causal LMs: did LoRA/QLoRA training actually change behavior, or is the model defaulting to the pretrained base?"
5
+readme = "README.md"
6
+requires-python = ">=3.11"
7
+license = { text = "MIT" }
8
+authors = [{ name = "Matt Wolffe", email = "mfwolffe@outlook.com" }]
9
+keywords = [
10
+    "lora",
11
+    "qlora",
12
+    "peft",
13
+    "fine-tuning",
14
+    "evaluation",
15
+    "llm",
16
+    "differential-testing",
17
+]
18
+classifiers = [
19
+    "Development Status :: 3 - Alpha",
20
+    "Intended Audience :: Developers",
21
+    "Intended Audience :: Science/Research",
22
+    "License :: OSI Approved :: MIT License",
23
+    "Programming Language :: Python :: 3",
24
+    "Programming Language :: Python :: 3.11",
25
+    "Programming Language :: Python :: 3.12",
26
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
27
+]
28
+
29
+# Core deps: spec loading, orchestration, reporting. No torch — a user
30
+# who only defines specs or writes a custom backend shouldn't pull 3 GB
31
+# of CUDA wheels.
32
+dependencies = [
33
+    "pydantic>=2.9",
34
+    "pyyaml>=6.0",
35
+    "typer>=0.12",
36
+    "rich>=13.7",
37
+    "numpy>=1.26",
38
+    "packaging>=24.0",
39
+]
40
+
41
+[project.optional-dependencies]
42
+# HuggingFace + PEFT scoring backend. The canonical path.
43
+hf = [
44
+    "torch>=2.4",
45
+    "transformers>=4.45",
46
+    "peft>=0.13",
47
+    "safetensors>=0.4",
48
+]
49
+# Apple Silicon inference. Env markers keep `uv sync --extra mlx` a no-op
50
+# on non-Apple hosts so Linux/CUDA contributors' wheel resolution stays
51
+# sane.
52
+mlx = [
53
+    "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
54
+    "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
55
+]
56
+# Stylistic fingerprinting (C1). spaCy models pull at runtime via
57
+# `python -m spacy download`.
58
+style = [
59
+    "spacy>=3.7",
60
+    "textstat>=0.7",
61
+    "nlpaug>=1.1",
62
+]
63
+# Semantic similarity (A2). MiniLM ~80 MB, CPU-friendly.
64
+semsim = [
65
+    "sentence-transformers>=3.0",
66
+]
67
+# Optional .dlm integration. Only imported inside dlm_sway.integrations.dlm.
68
+dlm = [
69
+    "dlm>=0.9",
70
+]
71
+# Visualization (P9).
72
+viz = [
73
+    "matplotlib>=3.8",
74
+]
75
+all = [
76
+    "torch>=2.4",
77
+    "transformers>=4.45",
78
+    "peft>=0.13",
79
+    "safetensors>=0.4",
80
+    "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
81
+    "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
82
+    "spacy>=3.7",
83
+    "textstat>=0.7",
84
+    "nlpaug>=1.1",
85
+    "sentence-transformers>=3.0",
86
+    "matplotlib>=3.8",
87
+]
88
+
89
+[project.scripts]
90
+dlm-sway = "dlm_sway.cli.app:main"
91
+
92
+[project.urls]
93
+Homepage = "https://github.com/tenseleyFlow/DocumentLanguageModel"
94
+Issues = "https://github.com/tenseleyFlow/DocumentLanguageModel/issues"
95
+
96
+[dependency-groups]
97
+dev = [
98
+    "pytest>=8.0",
99
+    "pytest-cov>=5.0",
100
+    "mypy>=1.11",
101
+    "ruff>=0.6",
102
+    "types-pyyaml>=6.0",
103
+    "hypothesis>=6.152.1",
104
+]
105
+
106
+[build-system]
107
+requires = ["hatchling"]
108
+build-backend = "hatchling.build"
109
+
110
+[tool.hatch.build.targets.wheel]
111
+packages = ["src/dlm_sway"]
112
+
113
+# -------- ruff --------
114
+[tool.ruff]
115
+line-length = 100
116
+target-version = "py311"
117
+src = ["src", "tests"]
118
+
119
+[tool.ruff.lint]
120
+select = [
121
+    "E",    # pycodestyle errors
122
+    "F",    # pyflakes
123
+    "W",    # pycodestyle warnings
124
+    "I",    # isort
125
+    "UP",   # pyupgrade
126
+    "B",    # bugbear
127
+    "N",    # pep8-naming
128
+    "C4",   # comprehensions
129
+    "SIM",  # simplify
130
+    "PT",   # pytest
131
+    "RET",  # return
132
+    "ARG",  # unused args
133
+    "PTH",  # use pathlib
134
+    "TID",  # tidy imports
135
+]
136
+ignore = [
137
+    "E501",  # handled by formatter
138
+]
139
+
140
+[tool.ruff.lint.per-file-ignores]
141
+"tests/**/*.py" = ["ARG", "PT011", "SIM117"]
142
+
143
+[tool.ruff.lint.flake8-tidy-imports.banned-api]
144
+# Hard architectural boundary: the `dlm` package is only importable
145
+# from inside the optional integration shim. This keeps dlm-sway
146
+# usable for anyone with just a HuggingFace base + PEFT adapter.
147
+"dlm".msg = "Import `dlm` only from dlm_sway.integrations.dlm (the optional extra)."
148
+
149
+[tool.ruff.format]
150
+quote-style = "double"
151
+indent-style = "space"
152
+
153
+# -------- mypy --------
154
+[tool.mypy]
155
+strict = true
156
+python_version = "3.11"
157
+packages = ["dlm_sway"]
158
+mypy_path = "src"
159
+warn_return_any = true
160
+warn_unused_ignores = true
161
+warn_redundant_casts = true
162
+no_implicit_optional = true
163
+disallow_untyped_decorators = true
164
+plugins = ["pydantic.mypy"]
165
+
166
+[tool.pydantic-mypy]
167
+init_forbid_extra = true
168
+init_typed = true
169
+warn_required_dynamic_aliases = true
170
+
171
+# Stubless ML ecosystem packages. Narrow boundaries in backends/* import
172
+# them explicitly; the rest of the codebase stays strict.
173
+[[tool.mypy.overrides]]
174
+module = [
175
+    "torch",
176
+    "torch.*",
177
+    "transformers.*",
178
+    "peft.*",
179
+    "safetensors.*",
180
+    "mlx.*",
181
+    "mlx_lm.*",
182
+    "sentence_transformers.*",
183
+    "spacy.*",
184
+    "textstat.*",
185
+    "nlpaug.*",
186
+    "huggingface_hub.*",
187
+    "dlm.*",
188
+]
189
+ignore_missing_imports = true
190
+disable_error_code = ["no-untyped-call"]
191
+
192
+# -------- pytest --------
193
+[tool.pytest.ini_options]
194
+testpaths = ["tests"]
195
+addopts = [
196
+    "-ra",
197
+    "-m", "not slow and not gpu and not online",
198
+]
199
+markers = [
200
+    "slow: expensive; deselected by default",
201
+    "gpu: requires CUDA; skipped on CPU/MPS runners",
202
+    "online: touches the network; skipped in offline CI",
203
+]
src/dlm_sway/backends/__init__.pyadded
@@ -0,0 +1,1 @@
1
+"""Scoring backends: HuggingFace (``hf``), MLX (``mlx``), dummy, custom."""
src/dlm_sway/cli/__init__.pyadded
@@ -0,0 +1,1 @@
1
+"""Command-line interface (entry point: ``dlm-sway``)."""
src/dlm_sway/core/__init__.pyadded
@@ -0,0 +1,1 @@
1
+"""Core abstractions: protocols, results, errors, determinism."""
src/dlm_sway/integrations/__init__.pyadded
@@ -0,0 +1,1 @@
1
+"""Optional integrations with upstream fine-tuning tools."""
src/dlm_sway/integrations/dlm/__init__.pyadded
@@ -0,0 +1,1 @@
1
+"""DLM project integration. Imports the ``dlm`` package; requires ``dlm-sway[dlm]``."""
src/dlm_sway/probes/__init__.pyadded
@@ -0,0 +1,1 @@
1
+"""Probe primitives. Each module in this package implements one primitive."""
src/dlm_sway/py.typedadded
src/dlm_sway/suite/__init__.pyadded
@@ -0,0 +1,1 @@
1
+"""Suite plumbing: spec models, loader, runner, report, composite score."""
tests/__init__.pyadded
tests/conftest.pyadded
@@ -0,0 +1,24 @@
1
+"""Shared test fixtures.
2
+
3
+Keep the default fast-test environment offline and deterministic so unit
4
+tests stay below ~1 s per file. Integration tests override these via
5
+their own ``conftest`` when they need network access.
6
+"""
7
+
8
+from __future__ import annotations
9
+
10
+import pytest
11
+
12
+
13
+@pytest.fixture(autouse=True)
14
+def _offline_and_no_telemetry(monkeypatch: pytest.MonkeyPatch) -> None:
15
+    """Unit tests never touch the network.
16
+
17
+    Any backend test that needs HF should be marked ``@pytest.mark.online``
18
+    and clear these vars explicitly.
19
+    """
20
+    monkeypatch.setenv("HF_HUB_OFFLINE", "1")
21
+    monkeypatch.setenv("TRANSFORMERS_OFFLINE", "1")
22
+    monkeypatch.setenv("HF_DATASETS_OFFLINE", "1")
23
+    monkeypatch.setenv("HF_HUB_DISABLE_TELEMETRY", "1")
24
+    monkeypatch.setenv("DO_NOT_TRACK", "1")
tests/fixtures/__init__.pyadded
tests/integration/__init__.pyadded
tests/unit/__init__.pyadded