TOML · 8545 bytes Raw Blame History
1 [project]
2 name = "dlm-sway"
3 version = "0.1.0"
4 description = "Differential testing for fine-tuned causal LMs: did LoRA/QLoRA training actually change behavior, or is the model defaulting to the pretrained base?"
5 readme = "README.md"
6 requires-python = ">=3.11"
7 license = { text = "MIT" }
8 authors = [{ name = "Matt Wolffe", email = "mfwolffe@outlook.com" }]
9 keywords = [
10 "lora",
11 "qlora",
12 "peft",
13 "fine-tuning",
14 "evaluation",
15 "llm",
16 "differential-testing",
17 ]
18 classifiers = [
19 "Development Status :: 3 - Alpha",
20 "Intended Audience :: Developers",
21 "Intended Audience :: Science/Research",
22 "License :: OSI Approved :: MIT License",
23 "Programming Language :: Python :: 3",
24 "Programming Language :: Python :: 3.11",
25 "Programming Language :: Python :: 3.12",
26 "Topic :: Scientific/Engineering :: Artificial Intelligence",
27 ]
28
29 # Core deps: spec loading, orchestration, reporting. No torch — a user
30 # who only defines specs or writes a custom backend shouldn't pull 3 GB
31 # of CUDA wheels.
32 dependencies = [
33 "pydantic>=2.9",
34 "pyyaml>=6.0",
35 "typer>=0.12",
36 "rich>=13.7",
37 "numpy>=1.26",
38 "packaging>=24.0",
39 ]
40
41 [project.optional-dependencies]
42 # HuggingFace + PEFT scoring backend. The canonical path.
43 hf = [
44 "torch>=2.4",
45 "transformers>=4.45",
46 "peft>=0.13",
47 "safetensors>=0.4",
48 ]
49 # Apple Silicon inference. Env markers keep `uv sync --extra mlx` a no-op
50 # on non-Apple hosts so Linux/CUDA contributors' wheel resolution stays
51 # sane.
52 mlx = [
53 "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
54 "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
55 ]
56 # Stylistic fingerprinting (C1). spaCy models pull at runtime via
57 # `python -m spacy download`.
58 style = [
59 "spacy>=3.7",
60 "textstat>=0.7",
61 "nlpaug>=1.1",
62 ]
63 # Semantic similarity (A2) + cluster-coherent KL (S16 / F8). The
64 # SentenceTransformer and k-means clustering pair ride the same
65 # ~80 MB MiniLM load; putting scikit-learn in the same extra keeps
66 # users from hitting "wait, which extra?" friction.
67 semsim = [
68 "sentence-transformers>=3.0",
69 "scikit-learn>=1.4",
70 ]
71 # Optional .dlm integration. Only imported inside dlm_sway.integrations.dlm.
72 # Upper bound pins to the major pre-1.0 range the integration test
73 # (tests/integration/test_dlm_api_compat.py) has validated against —
74 # bump when dlm cuts v1.0 and the resolver's ``.hf_id`` contract is
75 # re-verified.
76 dlm = [
77 "dlm>=0.9,<1.0",
78 ]
79 # OpenAI-compatible HTTP scoring backend (S13 / F7). Unlocks hosted
80 # fine-tunes (OpenAI platform, vLLM serve, Ollama) without pulling
81 # torch. httpx + tenacity together are a few hundred KB of deps vs
82 # the 3 GB the [hf] extra costs.
83 api = [
84 "httpx>=0.27",
85 "tenacity>=9.0",
86 ]
87 # pytest integration (S15 / F10). The plugin is discovered via the
88 # `pytest11` entry point below — ``pip install 'dlm-sway[pytest]'``
89 # adds pytest if the user doesn't have it, then pytest auto-loads
90 # the plugin on next invocation.
91 pytest = [
92 "pytest>=8.0",
93 ]
94 # Long-running daemon mode (S36). FastAPI + uvicorn give us a warm-
95 # backend HTTP API that turns iterative ``sway run`` calls from
96 # 15-second cold-loads into 2-second warm dispatches. uvicorn[standard]
97 # pulls httptools + uvloop for production-quality serving on
98 # Linux/macOS.
99 serve = [
100 "fastapi>=0.110",
101 "uvicorn[standard]>=0.30",
102 "httpx>=0.27",
103 ]
104 # Visualization (P9 + S12 HTML report).
105 viz = [
106 "matplotlib>=3.8",
107 "plotly>=5.20",
108 ]
109 all = [
110 "torch>=2.4",
111 "transformers>=4.45",
112 "peft>=0.13",
113 "safetensors>=0.4",
114 "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
115 "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
116 "spacy>=3.7",
117 "textstat>=0.7",
118 "nlpaug>=1.1",
119 "sentence-transformers>=3.0",
120 "scikit-learn>=1.4",
121 "httpx>=0.27",
122 "tenacity>=9.0",
123 "pytest>=8.0",
124 "matplotlib>=3.8",
125 "plotly>=5.20",
126 "fastapi>=0.110",
127 "uvicorn[standard]>=0.30",
128 ]
129
130 [project.scripts]
131 sway = "dlm_sway.cli.app:main"
132
133 # S15 / F10: pytest plugin discovered via the canonical pytest11
134 # entry-point. Gets auto-loaded the moment the wheel is installed,
135 # even without an ``@pytest.mark.sway`` import — consistent with how
136 # pytest-cov, pytest-xdist, etc. ship their plugins.
137 [project.entry-points.pytest11]
138 sway = "dlm_sway.pytest_plugin"
139
140 [project.urls]
141 Homepage = "https://github.com/tenseleyFlow/sway"
142 Issues = "https://github.com/tenseleyFlow/sway/issues"
143 "Related project" = "https://github.com/tenseleyFlow/DocumentLanguageModel"
144
145 [dependency-groups]
146 dev = [
147 "pytest>=8.0",
148 "pytest-cov>=5.0",
149 "mypy>=1.11",
150 "ruff>=0.6",
151 "types-pyyaml>=6.0",
152 "hypothesis>=6.152.1",
153 # Required by the tiny_model fixture (snapshot_download) used by every
154 # slow+online integration test. Not transitively guaranteed by hf
155 # optional-dep because contributors may want to run integration tests
156 # without the full [hf] extra installed.
157 "huggingface_hub>=0.25",
158 # S19: the pre-commit-hook integration test spawns ``pre-commit``
159 # as a subprocess. Keeps the tool out of the user's runtime deps.
160 "pre-commit>=3.8",
161 # S21 / F03: pytest-timeout wraps slow+online tests so a
162 # silent network hang surfaces as a failed test rather than a
163 # job-level timeout (macOS CI observed 20m hang on Sprint 19
164 # merge run 24747915467).
165 "pytest-timeout>=2.3",
166 # S21 / F03: tenacity's retry helpers wrap
167 # ``huggingface_hub.snapshot_download`` in the tiny_model
168 # fixture. Already in the ``[api]`` extra; duplicated here so
169 # slow-lane CI jobs don't need to install ``[api]`` just to
170 # get the fixture retry.
171 "tenacity>=9.0",
172 ]
173
174 [build-system]
175 requires = ["hatchling"]
176 build-backend = "hatchling.build"
177
178 [tool.hatch.build.targets.wheel]
179 packages = ["src/dlm_sway"]
180
181 # -------- ruff --------
182 [tool.ruff]
183 line-length = 100
184 target-version = "py311"
185 src = ["src", "tests"]
186
187 [tool.ruff.lint]
188 select = [
189 "E", # pycodestyle errors
190 "F", # pyflakes
191 "W", # pycodestyle warnings
192 "I", # isort
193 "UP", # pyupgrade
194 "B", # bugbear
195 "N", # pep8-naming
196 "C4", # comprehensions
197 "SIM", # simplify
198 "PT", # pytest
199 "RET", # return
200 "ARG", # unused args
201 "PTH", # use pathlib
202 "TID", # tidy imports
203 ]
204 ignore = [
205 "E501", # handled by formatter
206 ]
207
208 [tool.ruff.lint.per-file-ignores]
209 "tests/**/*.py" = ["ARG", "PT011", "SIM117"]
210 # PyTorch's canonical `import torch.nn.functional as F` is universally
211 # read, so we allow the naming exception in the HF backend only.
212 "src/dlm_sway/backends/hf.py" = ["N812"]
213 # The .dlm bridge is the one place allowed to import the ``dlm`` package.
214 "src/dlm_sway/integrations/dlm/*.py" = ["TID251"]
215
216 [tool.ruff.lint.flake8-tidy-imports.banned-api]
217 # Hard architectural boundary: the `dlm` package is only importable
218 # from inside the optional integration shim. This keeps dlm-sway
219 # usable for anyone with just a HuggingFace base + PEFT adapter.
220 "dlm".msg = "Import `dlm` only from dlm_sway.integrations.dlm (the optional extra)."
221
222 [tool.ruff.format]
223 quote-style = "double"
224 indent-style = "space"
225
226 # -------- mypy --------
227 [tool.mypy]
228 strict = true
229 python_version = "3.11"
230 packages = ["dlm_sway"]
231 mypy_path = "src"
232 warn_return_any = true
233 warn_unused_ignores = true
234 warn_redundant_casts = true
235 no_implicit_optional = true
236 disallow_untyped_decorators = true
237 plugins = ["pydantic.mypy"]
238
239 [tool.pydantic-mypy]
240 init_forbid_extra = true
241 init_typed = true
242 warn_required_dynamic_aliases = true
243
244 # Stubless ML ecosystem packages. Narrow boundaries in backends/* import
245 # them explicitly; the rest of the codebase stays strict.
246 [[tool.mypy.overrides]]
247 module = [
248 "torch",
249 "torch.*",
250 "transformers.*",
251 "peft.*",
252 "safetensors.*",
253 "mlx.*",
254 "mlx_lm.*",
255 "sentence_transformers.*",
256 "sklearn",
257 "sklearn.*",
258 "spacy.*",
259 "textstat.*",
260 "nlpaug.*",
261 "matplotlib",
262 "matplotlib.*",
263 "plotly",
264 "plotly.*",
265 "tenacity",
266 "tenacity.*",
267 "httpx",
268 "httpx.*",
269 "huggingface_hub.*",
270 "dlm.*",
271 ]
272 ignore_missing_imports = true
273 disable_error_code = ["no-untyped-call"]
274
275 # -------- pytest --------
276 [tool.pytest.ini_options]
277 testpaths = ["tests"]
278 addopts = [
279 "-ra",
280 "-m", "not slow and not gpu and not online",
281 ]
282 markers = [
283 "slow: expensive; deselected by default",
284 "gpu: requires CUDA; skipped on CPU/MPS runners",
285 "online: touches the network; skipped in offline CI",
286 ]