sway Public

Watch 0 Fork 0 Star 0

TOML · 8545 bytes Raw Blame History

  
        1
        [project]
      
        2
        name = "dlm-sway"
      
        3
        version = "0.1.0"
      
        4
        description = "Differential testing for fine-tuned causal LMs: did LoRA/QLoRA training actually change behavior, or is the model defaulting to the pretrained base?"
      
        5
        readme = "README.md"
      
        6
        requires-python = ">=3.11"
      
        7
        license = { text = "MIT" }
      
        8
        authors = [{ name = "Matt Wolffe", email = "mfwolffe@outlook.com" }]
      
        9
        keywords = [
      
        10
            "lora",
      
        11
            "qlora",
      
        12
            "peft",
      
        13
            "fine-tuning",
      
        14
            "evaluation",
      
        15
            "llm",
      
        16
            "differential-testing",
      
        17
        ]
      
        18
        classifiers = [
      
        19
            "Development Status :: 3 - Alpha",
      
        20
            "Intended Audience :: Developers",
      
        21
            "Intended Audience :: Science/Research",
      
        22
            "License :: OSI Approved :: MIT License",
      
        23
            "Programming Language :: Python :: 3",
      
        24
            "Programming Language :: Python :: 3.11",
      
        25
            "Programming Language :: Python :: 3.12",
      
        26
            "Topic :: Scientific/Engineering :: Artificial Intelligence",
      
        27
        ]
      
        28
        
        29
        # Core deps: spec loading, orchestration, reporting. No torch — a user
      
        30
        # who only defines specs or writes a custom backend shouldn't pull 3 GB
      
        31
        # of CUDA wheels.
      
        32
        dependencies = [
      
        33
            "pydantic>=2.9",
      
        34
            "pyyaml>=6.0",
      
        35
            "typer>=0.12",
      
        36
            "rich>=13.7",
      
        37
            "numpy>=1.26",
      
        38
            "packaging>=24.0",
      
        39
        ]
      
        40
        
        41
        [project.optional-dependencies]
      
        42
        # HuggingFace + PEFT scoring backend. The canonical path.
      
        43
        hf = [
      
        44
            "torch>=2.4",
      
        45
            "transformers>=4.45",
      
        46
            "peft>=0.13",
      
        47
            "safetensors>=0.4",
      
        48
        ]
      
        49
        # Apple Silicon inference. Env markers keep `uv sync --extra mlx` a no-op
      
        50
        # on non-Apple hosts so Linux/CUDA contributors' wheel resolution stays
      
        51
        # sane.
      
        52
        mlx = [
      
        53
            "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
      
        54
            "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
      
        55
        ]
      
        56
        # Stylistic fingerprinting (C1). spaCy models pull at runtime via
      
        57
        # `python -m spacy download`.
      
        58
        style = [
      
        59
            "spacy>=3.7",
      
        60
            "textstat>=0.7",
      
        61
            "nlpaug>=1.1",
      
        62
        ]
      
        63
        # Semantic similarity (A2) + cluster-coherent KL (S16 / F8). The
      
        64
        # SentenceTransformer and k-means clustering pair ride the same
      
        65
        # ~80 MB MiniLM load; putting scikit-learn in the same extra keeps
      
        66
        # users from hitting "wait, which extra?" friction.
      
        67
        semsim = [
      
        68
            "sentence-transformers>=3.0",
      
        69
            "scikit-learn>=1.4",
      
        70
        ]
      
        71
        # Optional .dlm integration. Only imported inside dlm_sway.integrations.dlm.
      
        72
        # Upper bound pins to the major pre-1.0 range the integration test
      
        73
        # (tests/integration/test_dlm_api_compat.py) has validated against —
      
        74
        # bump when dlm cuts v1.0 and the resolver's ``.hf_id`` contract is
      
        75
        # re-verified.
      
        76
        dlm = [
      
        77
            "dlm>=0.9,<1.0",
      
        78
        ]
      
        79
        # OpenAI-compatible HTTP scoring backend (S13 / F7). Unlocks hosted
      
        80
        # fine-tunes (OpenAI platform, vLLM serve, Ollama) without pulling
      
        81
        # torch. httpx + tenacity together are a few hundred KB of deps vs
      
        82
        # the 3 GB the [hf] extra costs.
      
        83
        api = [
      
        84
            "httpx>=0.27",
      
        85
            "tenacity>=9.0",
      
        86
        ]
      
        87
        # pytest integration (S15 / F10). The plugin is discovered via the
      
        88
        # `pytest11` entry point below — ``pip install 'dlm-sway[pytest]'``
      
        89
        # adds pytest if the user doesn't have it, then pytest auto-loads
      
        90
        # the plugin on next invocation.
      
        91
        pytest = [
      
        92
            "pytest>=8.0",
      
        93
        ]
      
        94
        # Long-running daemon mode (S36). FastAPI + uvicorn give us a warm-
      
        95
        # backend HTTP API that turns iterative ``sway run`` calls from
      
        96
        # 15-second cold-loads into 2-second warm dispatches. uvicorn[standard]
      
        97
        # pulls httptools + uvloop for production-quality serving on
      
        98
        # Linux/macOS.
      
        99
        serve = [
      
        100
            "fastapi>=0.110",
      
        101
            "uvicorn[standard]>=0.30",
      
        102
            "httpx>=0.27",
      
        103
        ]
      
        104
        # Visualization (P9 + S12 HTML report).
      
        105
        viz = [
      
        106
            "matplotlib>=3.8",
      
        107
            "plotly>=5.20",
      
        108
        ]
      
        109
        all = [
      
        110
            "torch>=2.4",
      
        111
            "transformers>=4.45",
      
        112
            "peft>=0.13",
      
        113
            "safetensors>=0.4",
      
        114
            "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
      
        115
            "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
      
        116
            "spacy>=3.7",
      
        117
            "textstat>=0.7",
      
        118
            "nlpaug>=1.1",
      
        119
            "sentence-transformers>=3.0",
      
        120
            "scikit-learn>=1.4",
      
        121
            "httpx>=0.27",
      
        122
            "tenacity>=9.0",
      
        123
            "pytest>=8.0",
      
        124
            "matplotlib>=3.8",
      
        125
            "plotly>=5.20",
      
        126
            "fastapi>=0.110",
      
        127
            "uvicorn[standard]>=0.30",
      
        128
        ]
      
        129
        
        130
        [project.scripts]
      
        131
        sway = "dlm_sway.cli.app:main"
      
        132
        
        133
        # S15 / F10: pytest plugin discovered via the canonical pytest11
      
        134
        # entry-point. Gets auto-loaded the moment the wheel is installed,
      
        135
        # even without an ``@pytest.mark.sway`` import — consistent with how
      
        136
        # pytest-cov, pytest-xdist, etc. ship their plugins.
      
        137
        [project.entry-points.pytest11]
      
        138
        sway = "dlm_sway.pytest_plugin"
      
        139
        
        140
        [project.urls]
      
        141
        Homepage = "https://github.com/tenseleyFlow/sway"
      
        142
        Issues = "https://github.com/tenseleyFlow/sway/issues"
      
        143
        "Related project" = "https://github.com/tenseleyFlow/DocumentLanguageModel"
      
        144
        
        145
        [dependency-groups]
      
        146
        dev = [
      
        147
            "pytest>=8.0",
      
        148
            "pytest-cov>=5.0",
      
        149
            "mypy>=1.11",
      
        150
            "ruff>=0.6",
      
        151
            "types-pyyaml>=6.0",
      
        152
            "hypothesis>=6.152.1",
      
        153
            # Required by the tiny_model fixture (snapshot_download) used by every
      
        154
            # slow+online integration test. Not transitively guaranteed by hf
      
        155
            # optional-dep because contributors may want to run integration tests
      
        156
            # without the full [hf] extra installed.
      
        157
            "huggingface_hub>=0.25",
      
        158
            # S19: the pre-commit-hook integration test spawns ``pre-commit``
      
        159
            # as a subprocess. Keeps the tool out of the user's runtime deps.
      
        160
            "pre-commit>=3.8",
      
        161
            # S21 / F03: pytest-timeout wraps slow+online tests so a
      
        162
            # silent network hang surfaces as a failed test rather than a
      
        163
            # job-level timeout (macOS CI observed 20m hang on Sprint 19
      
        164
            # merge run 24747915467).
      
        165
            "pytest-timeout>=2.3",
      
        166
            # S21 / F03: tenacity's retry helpers wrap
      
        167
            # ``huggingface_hub.snapshot_download`` in the tiny_model
      
        168
            # fixture. Already in the ``[api]`` extra; duplicated here so
      
        169
            # slow-lane CI jobs don't need to install ``[api]`` just to
      
        170
            # get the fixture retry.
      
        171
            "tenacity>=9.0",
      
        172
        ]
      
        173
        
        174
        [build-system]
      
        175
        requires = ["hatchling"]
      
        176
        build-backend = "hatchling.build"
      
        177
        
        178
        [tool.hatch.build.targets.wheel]
      
        179
        packages = ["src/dlm_sway"]
      
        180
        
        181
        # -------- ruff --------
      
        182
        [tool.ruff]
      
        183
        line-length = 100
      
        184
        target-version = "py311"
      
        185
        src = ["src", "tests"]
      
        186
        
        187
        [tool.ruff.lint]
      
        188
        select = [
      
        189
            "E",    # pycodestyle errors
      
        190
            "F",    # pyflakes
      
        191
            "W",    # pycodestyle warnings
      
        192
            "I",    # isort
      
        193
            "UP",   # pyupgrade
      
        194
            "B",    # bugbear
      
        195
            "N",    # pep8-naming
      
        196
            "C4",   # comprehensions
      
        197
            "SIM",  # simplify
      
        198
            "PT",   # pytest
      
        199
            "RET",  # return
      
        200
            "ARG",  # unused args
      
        201
            "PTH",  # use pathlib
      
        202
            "TID",  # tidy imports
      
        203
        ]
      
        204
        ignore = [
      
        205
            "E501",  # handled by formatter
      
        206
        ]
      
        207
        
        208
        [tool.ruff.lint.per-file-ignores]
      
        209
        "tests/**/*.py" = ["ARG", "PT011", "SIM117"]
      
        210
        # PyTorch's canonical `import torch.nn.functional as F` is universally
      
        211
        # read, so we allow the naming exception in the HF backend only.
      
        212
        "src/dlm_sway/backends/hf.py" = ["N812"]
      
        213
        # The .dlm bridge is the one place allowed to import the ``dlm`` package.
      
        214
        "src/dlm_sway/integrations/dlm/*.py" = ["TID251"]
      
        215
        
        216
        [tool.ruff.lint.flake8-tidy-imports.banned-api]
      
        217
        # Hard architectural boundary: the `dlm` package is only importable
      
        218
        # from inside the optional integration shim. This keeps dlm-sway
      
        219
        # usable for anyone with just a HuggingFace base + PEFT adapter.
      
        220
        "dlm".msg = "Import `dlm` only from dlm_sway.integrations.dlm (the optional extra)."
      
        221
        
        222
        [tool.ruff.format]
      
        223
        quote-style = "double"
      
        224
        indent-style = "space"
      
        225
        
        226
        # -------- mypy --------
      
        227
        [tool.mypy]
      
        228
        strict = true
      
        229
        python_version = "3.11"
      
        230
        packages = ["dlm_sway"]
      
        231
        mypy_path = "src"
      
        232
        warn_return_any = true
      
        233
        warn_unused_ignores = true
      
        234
        warn_redundant_casts = true
      
        235
        no_implicit_optional = true
      
        236
        disallow_untyped_decorators = true
      
        237
        plugins = ["pydantic.mypy"]
      
        238
        
        239
        [tool.pydantic-mypy]
      
        240
        init_forbid_extra = true
      
        241
        init_typed = true
      
        242
        warn_required_dynamic_aliases = true
      
        243
        
        244
        # Stubless ML ecosystem packages. Narrow boundaries in backends/* import
      
        245
        # them explicitly; the rest of the codebase stays strict.
      
        246
        [[tool.mypy.overrides]]
      
        247
        module = [
      
        248
            "torch",
      
        249
            "torch.*",
      
        250
            "transformers.*",
      
        251
            "peft.*",
      
        252
            "safetensors.*",
      
        253
            "mlx.*",
      
        254
            "mlx_lm.*",
      
        255
            "sentence_transformers.*",
      
        256
            "sklearn",
      
        257
            "sklearn.*",
      
        258
            "spacy.*",
      
        259
            "textstat.*",
      
        260
            "nlpaug.*",
      
        261
            "matplotlib",
      
        262
            "matplotlib.*",
      
        263
            "plotly",
      
        264
            "plotly.*",
      
        265
            "tenacity",
      
        266
            "tenacity.*",
      
        267
            "httpx",
      
        268
            "httpx.*",
      
        269
            "huggingface_hub.*",
      
        270
            "dlm.*",
      
        271
        ]
      
        272
        ignore_missing_imports = true
      
        273
        disable_error_code = ["no-untyped-call"]
      
        274
        
        275
        # -------- pytest --------
      
        276
        [tool.pytest.ini_options]
      
        277
        testpaths = ["tests"]
      
        278
        addopts = [
      
        279
            "-ra",
      
        280
            "-m", "not slow and not gpu and not online",
      
        281
        ]
      
        282
        markers = [
      
        283
            "slow: expensive; deselected by default",
      
        284
            "gpu: requires CUDA; skipped on CPU/MPS runners",
      
        285
            "online: touches the network; skipped in offline CI",
      
        286
        ]

1	[project]
2	name = "dlm-sway"
3	version = "0.1.0"
4	description = "Differential testing for fine-tuned causal LMs: did LoRA/QLoRA training actually change behavior, or is the model defaulting to the pretrained base?"
5	readme = "README.md"
6	requires-python = ">=3.11"
7	license = { text = "MIT" }
8	authors = [{ name = "Matt Wolffe", email = "mfwolffe@outlook.com" }]
9	keywords = [
10	"lora",
11	"qlora",
12	"peft",
13	"fine-tuning",
14	"evaluation",
15	"llm",
16	"differential-testing",
17	]
18	classifiers = [
19	"Development Status :: 3 - Alpha",
20	"Intended Audience :: Developers",
21	"Intended Audience :: Science/Research",
22	"License :: OSI Approved :: MIT License",
23	"Programming Language :: Python :: 3",
24	"Programming Language :: Python :: 3.11",
25	"Programming Language :: Python :: 3.12",
26	"Topic :: Scientific/Engineering :: Artificial Intelligence",
27	]
28
29	# Core deps: spec loading, orchestration, reporting. No torch — a user
30	# who only defines specs or writes a custom backend shouldn't pull 3 GB
31	# of CUDA wheels.
32	dependencies = [
33	"pydantic>=2.9",
34	"pyyaml>=6.0",
35	"typer>=0.12",
36	"rich>=13.7",
37	"numpy>=1.26",
38	"packaging>=24.0",
39	]
40
41	[project.optional-dependencies]
42	# HuggingFace + PEFT scoring backend. The canonical path.
43	hf = [
44	"torch>=2.4",
45	"transformers>=4.45",
46	"peft>=0.13",
47	"safetensors>=0.4",
48	]
49	# Apple Silicon inference. Env markers keep `uv sync --extra mlx` a no-op
50	# on non-Apple hosts so Linux/CUDA contributors' wheel resolution stays
51	# sane.
52	mlx = [
53	"mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
54	"mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
55	]
56	# Stylistic fingerprinting (C1). spaCy models pull at runtime via
57	# `python -m spacy download`.
58	style = [
59	"spacy>=3.7",
60	"textstat>=0.7",
61	"nlpaug>=1.1",
62	]
63	# Semantic similarity (A2) + cluster-coherent KL (S16 / F8). The
64	# SentenceTransformer and k-means clustering pair ride the same
65	# ~80 MB MiniLM load; putting scikit-learn in the same extra keeps
66	# users from hitting "wait, which extra?" friction.
67	semsim = [
68	"sentence-transformers>=3.0",
69	"scikit-learn>=1.4",
70	]
71	# Optional .dlm integration. Only imported inside dlm_sway.integrations.dlm.
72	# Upper bound pins to the major pre-1.0 range the integration test
73	# (tests/integration/test_dlm_api_compat.py) has validated against —
74	# bump when dlm cuts v1.0 and the resolver's ``.hf_id`` contract is
75	# re-verified.
76	dlm = [
77	"dlm>=0.9,<1.0",
78	]
79	# OpenAI-compatible HTTP scoring backend (S13 / F7). Unlocks hosted
80	# fine-tunes (OpenAI platform, vLLM serve, Ollama) without pulling
81	# torch. httpx + tenacity together are a few hundred KB of deps vs
82	# the 3 GB the [hf] extra costs.
83	api = [
84	"httpx>=0.27",
85	"tenacity>=9.0",
86	]
87	# pytest integration (S15 / F10). The plugin is discovered via the
88	# `pytest11` entry point below — ``pip install 'dlm-sway[pytest]'``
89	# adds pytest if the user doesn't have it, then pytest auto-loads
90	# the plugin on next invocation.
91	pytest = [
92	"pytest>=8.0",
93	]
94	# Long-running daemon mode (S36). FastAPI + uvicorn give us a warm-
95	# backend HTTP API that turns iterative ``sway run`` calls from
96	# 15-second cold-loads into 2-second warm dispatches. uvicorn[standard]
97	# pulls httptools + uvloop for production-quality serving on
98	# Linux/macOS.
99	serve = [
100	"fastapi>=0.110",
101	"uvicorn[standard]>=0.30",
102	"httpx>=0.27",
103	]
104	# Visualization (P9 + S12 HTML report).
105	viz = [
106	"matplotlib>=3.8",
107	"plotly>=5.20",
108	]
109	all = [
110	"torch>=2.4",
111	"transformers>=4.45",
112	"peft>=0.13",
113	"safetensors>=0.4",
114	"mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
115	"mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
116	"spacy>=3.7",
117	"textstat>=0.7",
118	"nlpaug>=1.1",
119	"sentence-transformers>=3.0",
120	"scikit-learn>=1.4",
121	"httpx>=0.27",
122	"tenacity>=9.0",
123	"pytest>=8.0",
124	"matplotlib>=3.8",
125	"plotly>=5.20",
126	"fastapi>=0.110",
127	"uvicorn[standard]>=0.30",
128	]
129
130	[project.scripts]
131	sway = "dlm_sway.cli.app:main"
132
133	# S15 / F10: pytest plugin discovered via the canonical pytest11
134	# entry-point. Gets auto-loaded the moment the wheel is installed,
135	# even without an ``@pytest.mark.sway`` import — consistent with how
136	# pytest-cov, pytest-xdist, etc. ship their plugins.
137	[project.entry-points.pytest11]
138	sway = "dlm_sway.pytest_plugin"
139
140	[project.urls]
141	Homepage = "https://github.com/tenseleyFlow/sway"
142	Issues = "https://github.com/tenseleyFlow/sway/issues"
143	"Related project" = "https://github.com/tenseleyFlow/DocumentLanguageModel"
144
145	[dependency-groups]
146	dev = [
147	"pytest>=8.0",
148	"pytest-cov>=5.0",
149	"mypy>=1.11",
150	"ruff>=0.6",
151	"types-pyyaml>=6.0",
152	"hypothesis>=6.152.1",
153	# Required by the tiny_model fixture (snapshot_download) used by every
154	# slow+online integration test. Not transitively guaranteed by hf
155	# optional-dep because contributors may want to run integration tests
156	# without the full [hf] extra installed.
157	"huggingface_hub>=0.25",
158	# S19: the pre-commit-hook integration test spawns ``pre-commit``
159	# as a subprocess. Keeps the tool out of the user's runtime deps.
160	"pre-commit>=3.8",
161	# S21 / F03: pytest-timeout wraps slow+online tests so a
162	# silent network hang surfaces as a failed test rather than a
163	# job-level timeout (macOS CI observed 20m hang on Sprint 19
164	# merge run 24747915467).
165	"pytest-timeout>=2.3",
166	# S21 / F03: tenacity's retry helpers wrap
167	# ``huggingface_hub.snapshot_download`` in the tiny_model
168	# fixture. Already in the ``[api]`` extra; duplicated here so
169	# slow-lane CI jobs don't need to install ``[api]`` just to
170	# get the fixture retry.
171	"tenacity>=9.0",
172	]
173
174	[build-system]
175	requires = ["hatchling"]
176	build-backend = "hatchling.build"
177
178	[tool.hatch.build.targets.wheel]
179	packages = ["src/dlm_sway"]
180
181	# -------- ruff --------
182	[tool.ruff]
183	line-length = 100
184	target-version = "py311"
185	src = ["src", "tests"]
186
187	[tool.ruff.lint]
188	select = [
189	"E", # pycodestyle errors
190	"F", # pyflakes
191	"W", # pycodestyle warnings
192	"I", # isort
193	"UP", # pyupgrade
194	"B", # bugbear
195	"N", # pep8-naming
196	"C4", # comprehensions
197	"SIM", # simplify
198	"PT", # pytest
199	"RET", # return
200	"ARG", # unused args
201	"PTH", # use pathlib
202	"TID", # tidy imports
203	]
204	ignore = [
205	"E501", # handled by formatter
206	]
207
208	[tool.ruff.lint.per-file-ignores]
209	"tests/*/.py" = ["ARG", "PT011", "SIM117"]
210	# PyTorch's canonical `import torch.nn.functional as F` is universally
211	# read, so we allow the naming exception in the HF backend only.
212	"src/dlm_sway/backends/hf.py" = ["N812"]
213	# The .dlm bridge is the one place allowed to import the ``dlm`` package.
214	"src/dlm_sway/integrations/dlm/*.py" = ["TID251"]
215
216	[tool.ruff.lint.flake8-tidy-imports.banned-api]
217	# Hard architectural boundary: the `dlm` package is only importable
218	# from inside the optional integration shim. This keeps dlm-sway
219	# usable for anyone with just a HuggingFace base + PEFT adapter.
220	"dlm".msg = "Import `dlm` only from dlm_sway.integrations.dlm (the optional extra)."
221
222	[tool.ruff.format]
223	quote-style = "double"
224	indent-style = "space"
225
226	# -------- mypy --------
227	[tool.mypy]
228	strict = true
229	python_version = "3.11"
230	packages = ["dlm_sway"]
231	mypy_path = "src"
232	warn_return_any = true
233	warn_unused_ignores = true
234	warn_redundant_casts = true
235	no_implicit_optional = true
236	disallow_untyped_decorators = true
237	plugins = ["pydantic.mypy"]
238
239	[tool.pydantic-mypy]
240	init_forbid_extra = true
241	init_typed = true
242	warn_required_dynamic_aliases = true
243
244	# Stubless ML ecosystem packages. Narrow boundaries in backends/* import
245	# them explicitly; the rest of the codebase stays strict.
246	[[tool.mypy.overrides]]
247	module = [
248	"torch",
249	"torch.*",
250	"transformers.*",
251	"peft.*",
252	"safetensors.*",
253	"mlx.*",
254	"mlx_lm.*",
255	"sentence_transformers.*",
256	"sklearn",
257	"sklearn.*",
258	"spacy.*",
259	"textstat.*",
260	"nlpaug.*",
261	"matplotlib",
262	"matplotlib.*",
263	"plotly",
264	"plotly.*",
265	"tenacity",
266	"tenacity.*",
267	"httpx",
268	"httpx.*",
269	"huggingface_hub.*",
270	"dlm.*",
271	]
272	ignore_missing_imports = true
273	disable_error_code = ["no-untyped-call"]
274
275	# -------- pytest --------
276	[tool.pytest.ini_options]
277	testpaths = ["tests"]
278	addopts = [
279	"-ra",
280	"-m", "not slow and not gpu and not online",
281	]
282	markers = [
283	"slow: expensive; deselected by default",
284	"gpu: requires CUDA; skipped on CPU/MPS runners",
285	"online: touches the network; skipped in offline CI",
286	]