| 1 | [project] |
| 2 | name = "dlm-sway" |
| 3 | version = "0.1.0" |
| 4 | description = "Differential testing for fine-tuned causal LMs: did LoRA/QLoRA training actually change behavior, or is the model defaulting to the pretrained base?" |
| 5 | readme = "README.md" |
| 6 | requires-python = ">=3.11" |
| 7 | license = { text = "MIT" } |
| 8 | authors = [{ name = "Matt Wolffe", email = "mfwolffe@outlook.com" }] |
| 9 | keywords = [ |
| 10 | "lora", |
| 11 | "qlora", |
| 12 | "peft", |
| 13 | "fine-tuning", |
| 14 | "evaluation", |
| 15 | "llm", |
| 16 | "differential-testing", |
| 17 | ] |
| 18 | classifiers = [ |
| 19 | "Development Status :: 3 - Alpha", |
| 20 | "Intended Audience :: Developers", |
| 21 | "Intended Audience :: Science/Research", |
| 22 | "License :: OSI Approved :: MIT License", |
| 23 | "Programming Language :: Python :: 3", |
| 24 | "Programming Language :: Python :: 3.11", |
| 25 | "Programming Language :: Python :: 3.12", |
| 26 | "Topic :: Scientific/Engineering :: Artificial Intelligence", |
| 27 | ] |
| 28 | |
| 29 | # Core deps: spec loading, orchestration, reporting. No torch — a user |
| 30 | # who only defines specs or writes a custom backend shouldn't pull 3 GB |
| 31 | # of CUDA wheels. |
| 32 | dependencies = [ |
| 33 | "pydantic>=2.9", |
| 34 | "pyyaml>=6.0", |
| 35 | "typer>=0.12", |
| 36 | "rich>=13.7", |
| 37 | "numpy>=1.26", |
| 38 | "packaging>=24.0", |
| 39 | ] |
| 40 | |
| 41 | [project.optional-dependencies] |
| 42 | # HuggingFace + PEFT scoring backend. The canonical path. |
| 43 | hf = [ |
| 44 | "torch>=2.4", |
| 45 | "transformers>=4.45", |
| 46 | "peft>=0.13", |
| 47 | "safetensors>=0.4", |
| 48 | ] |
| 49 | # Apple Silicon inference. Env markers keep `uv sync --extra mlx` a no-op |
| 50 | # on non-Apple hosts so Linux/CUDA contributors' wheel resolution stays |
| 51 | # sane. |
| 52 | mlx = [ |
| 53 | "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'", |
| 54 | "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'", |
| 55 | ] |
| 56 | # Stylistic fingerprinting (C1). spaCy models pull at runtime via |
| 57 | # `python -m spacy download`. |
| 58 | style = [ |
| 59 | "spacy>=3.7", |
| 60 | "textstat>=0.7", |
| 61 | "nlpaug>=1.1", |
| 62 | ] |
| 63 | # Semantic similarity (A2) + cluster-coherent KL (S16 / F8). The |
| 64 | # SentenceTransformer and k-means clustering pair ride the same |
| 65 | # ~80 MB MiniLM load; putting scikit-learn in the same extra keeps |
| 66 | # users from hitting "wait, which extra?" friction. |
| 67 | semsim = [ |
| 68 | "sentence-transformers>=3.0", |
| 69 | "scikit-learn>=1.4", |
| 70 | ] |
| 71 | # Optional .dlm integration. Only imported inside dlm_sway.integrations.dlm. |
| 72 | # Upper bound pins to the major pre-1.0 range the integration test |
| 73 | # (tests/integration/test_dlm_api_compat.py) has validated against — |
| 74 | # bump when dlm cuts v1.0 and the resolver's ``.hf_id`` contract is |
| 75 | # re-verified. |
| 76 | dlm = [ |
| 77 | "dlm>=0.9,<1.0", |
| 78 | ] |
| 79 | # OpenAI-compatible HTTP scoring backend (S13 / F7). Unlocks hosted |
| 80 | # fine-tunes (OpenAI platform, vLLM serve, Ollama) without pulling |
| 81 | # torch. httpx + tenacity together are a few hundred KB of deps vs |
| 82 | # the 3 GB the [hf] extra costs. |
| 83 | api = [ |
| 84 | "httpx>=0.27", |
| 85 | "tenacity>=9.0", |
| 86 | ] |
| 87 | # pytest integration (S15 / F10). The plugin is discovered via the |
| 88 | # `pytest11` entry point below — ``pip install 'dlm-sway[pytest]'`` |
| 89 | # adds pytest if the user doesn't have it, then pytest auto-loads |
| 90 | # the plugin on next invocation. |
| 91 | pytest = [ |
| 92 | "pytest>=8.0", |
| 93 | ] |
| 94 | # Long-running daemon mode (S36). FastAPI + uvicorn give us a warm- |
| 95 | # backend HTTP API that turns iterative ``sway run`` calls from |
| 96 | # 15-second cold-loads into 2-second warm dispatches. uvicorn[standard] |
| 97 | # pulls httptools + uvloop for production-quality serving on |
| 98 | # Linux/macOS. |
| 99 | serve = [ |
| 100 | "fastapi>=0.110", |
| 101 | "uvicorn[standard]>=0.30", |
| 102 | "httpx>=0.27", |
| 103 | ] |
| 104 | # Visualization (P9 + S12 HTML report). |
| 105 | viz = [ |
| 106 | "matplotlib>=3.8", |
| 107 | "plotly>=5.20", |
| 108 | ] |
| 109 | all = [ |
| 110 | "torch>=2.4", |
| 111 | "transformers>=4.45", |
| 112 | "peft>=0.13", |
| 113 | "safetensors>=0.4", |
| 114 | "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'", |
| 115 | "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'", |
| 116 | "spacy>=3.7", |
| 117 | "textstat>=0.7", |
| 118 | "nlpaug>=1.1", |
| 119 | "sentence-transformers>=3.0", |
| 120 | "scikit-learn>=1.4", |
| 121 | "httpx>=0.27", |
| 122 | "tenacity>=9.0", |
| 123 | "pytest>=8.0", |
| 124 | "matplotlib>=3.8", |
| 125 | "plotly>=5.20", |
| 126 | "fastapi>=0.110", |
| 127 | "uvicorn[standard]>=0.30", |
| 128 | ] |
| 129 | |
| 130 | [project.scripts] |
| 131 | sway = "dlm_sway.cli.app:main" |
| 132 | |
| 133 | # S15 / F10: pytest plugin discovered via the canonical pytest11 |
| 134 | # entry-point. Gets auto-loaded the moment the wheel is installed, |
| 135 | # even without an ``@pytest.mark.sway`` import — consistent with how |
| 136 | # pytest-cov, pytest-xdist, etc. ship their plugins. |
| 137 | [project.entry-points.pytest11] |
| 138 | sway = "dlm_sway.pytest_plugin" |
| 139 | |
| 140 | [project.urls] |
| 141 | Homepage = "https://github.com/tenseleyFlow/sway" |
| 142 | Issues = "https://github.com/tenseleyFlow/sway/issues" |
| 143 | "Related project" = "https://github.com/tenseleyFlow/DocumentLanguageModel" |
| 144 | |
| 145 | [dependency-groups] |
| 146 | dev = [ |
| 147 | "pytest>=8.0", |
| 148 | "pytest-cov>=5.0", |
| 149 | "mypy>=1.11", |
| 150 | "ruff>=0.6", |
| 151 | "types-pyyaml>=6.0", |
| 152 | "hypothesis>=6.152.1", |
| 153 | # Required by the tiny_model fixture (snapshot_download) used by every |
| 154 | # slow+online integration test. Not transitively guaranteed by hf |
| 155 | # optional-dep because contributors may want to run integration tests |
| 156 | # without the full [hf] extra installed. |
| 157 | "huggingface_hub>=0.25", |
| 158 | # S19: the pre-commit-hook integration test spawns ``pre-commit`` |
| 159 | # as a subprocess. Keeps the tool out of the user's runtime deps. |
| 160 | "pre-commit>=3.8", |
| 161 | # S21 / F03: pytest-timeout wraps slow+online tests so a |
| 162 | # silent network hang surfaces as a failed test rather than a |
| 163 | # job-level timeout (macOS CI observed 20m hang on Sprint 19 |
| 164 | # merge run 24747915467). |
| 165 | "pytest-timeout>=2.3", |
| 166 | # S21 / F03: tenacity's retry helpers wrap |
| 167 | # ``huggingface_hub.snapshot_download`` in the tiny_model |
| 168 | # fixture. Already in the ``[api]`` extra; duplicated here so |
| 169 | # slow-lane CI jobs don't need to install ``[api]`` just to |
| 170 | # get the fixture retry. |
| 171 | "tenacity>=9.0", |
| 172 | ] |
| 173 | |
| 174 | [build-system] |
| 175 | requires = ["hatchling"] |
| 176 | build-backend = "hatchling.build" |
| 177 | |
| 178 | [tool.hatch.build.targets.wheel] |
| 179 | packages = ["src/dlm_sway"] |
| 180 | |
| 181 | # -------- ruff -------- |
| 182 | [tool.ruff] |
| 183 | line-length = 100 |
| 184 | target-version = "py311" |
| 185 | src = ["src", "tests"] |
| 186 | |
| 187 | [tool.ruff.lint] |
| 188 | select = [ |
| 189 | "E", # pycodestyle errors |
| 190 | "F", # pyflakes |
| 191 | "W", # pycodestyle warnings |
| 192 | "I", # isort |
| 193 | "UP", # pyupgrade |
| 194 | "B", # bugbear |
| 195 | "N", # pep8-naming |
| 196 | "C4", # comprehensions |
| 197 | "SIM", # simplify |
| 198 | "PT", # pytest |
| 199 | "RET", # return |
| 200 | "ARG", # unused args |
| 201 | "PTH", # use pathlib |
| 202 | "TID", # tidy imports |
| 203 | ] |
| 204 | ignore = [ |
| 205 | "E501", # handled by formatter |
| 206 | ] |
| 207 | |
| 208 | [tool.ruff.lint.per-file-ignores] |
| 209 | "tests/**/*.py" = ["ARG", "PT011", "SIM117"] |
| 210 | # PyTorch's canonical `import torch.nn.functional as F` is universally |
| 211 | # read, so we allow the naming exception in the HF backend only. |
| 212 | "src/dlm_sway/backends/hf.py" = ["N812"] |
| 213 | # The .dlm bridge is the one place allowed to import the ``dlm`` package. |
| 214 | "src/dlm_sway/integrations/dlm/*.py" = ["TID251"] |
| 215 | |
| 216 | [tool.ruff.lint.flake8-tidy-imports.banned-api] |
| 217 | # Hard architectural boundary: the `dlm` package is only importable |
| 218 | # from inside the optional integration shim. This keeps dlm-sway |
| 219 | # usable for anyone with just a HuggingFace base + PEFT adapter. |
| 220 | "dlm".msg = "Import `dlm` only from dlm_sway.integrations.dlm (the optional extra)." |
| 221 | |
| 222 | [tool.ruff.format] |
| 223 | quote-style = "double" |
| 224 | indent-style = "space" |
| 225 | |
| 226 | # -------- mypy -------- |
| 227 | [tool.mypy] |
| 228 | strict = true |
| 229 | python_version = "3.11" |
| 230 | packages = ["dlm_sway"] |
| 231 | mypy_path = "src" |
| 232 | warn_return_any = true |
| 233 | warn_unused_ignores = true |
| 234 | warn_redundant_casts = true |
| 235 | no_implicit_optional = true |
| 236 | disallow_untyped_decorators = true |
| 237 | plugins = ["pydantic.mypy"] |
| 238 | |
| 239 | [tool.pydantic-mypy] |
| 240 | init_forbid_extra = true |
| 241 | init_typed = true |
| 242 | warn_required_dynamic_aliases = true |
| 243 | |
| 244 | # Stubless ML ecosystem packages. Narrow boundaries in backends/* import |
| 245 | # them explicitly; the rest of the codebase stays strict. |
| 246 | [[tool.mypy.overrides]] |
| 247 | module = [ |
| 248 | "torch", |
| 249 | "torch.*", |
| 250 | "transformers.*", |
| 251 | "peft.*", |
| 252 | "safetensors.*", |
| 253 | "mlx.*", |
| 254 | "mlx_lm.*", |
| 255 | "sentence_transformers.*", |
| 256 | "sklearn", |
| 257 | "sklearn.*", |
| 258 | "spacy.*", |
| 259 | "textstat.*", |
| 260 | "nlpaug.*", |
| 261 | "matplotlib", |
| 262 | "matplotlib.*", |
| 263 | "plotly", |
| 264 | "plotly.*", |
| 265 | "tenacity", |
| 266 | "tenacity.*", |
| 267 | "httpx", |
| 268 | "httpx.*", |
| 269 | "huggingface_hub.*", |
| 270 | "dlm.*", |
| 271 | ] |
| 272 | ignore_missing_imports = true |
| 273 | disable_error_code = ["no-untyped-call"] |
| 274 | |
| 275 | # -------- pytest -------- |
| 276 | [tool.pytest.ini_options] |
| 277 | testpaths = ["tests"] |
| 278 | addopts = [ |
| 279 | "-ra", |
| 280 | "-m", "not slow and not gpu and not online", |
| 281 | ] |
| 282 | markers = [ |
| 283 | "slow: expensive; deselected by default", |
| 284 | "gpu: requires CUDA; skipped on CPU/MPS runners", |
| 285 | "online: touches the network; skipped in offline CI", |
| 286 | ] |