[project] name = "dlm-sway" version = "0.1.0" description = "Differential testing for fine-tuned causal LMs: did LoRA/QLoRA training actually change behavior, or is the model defaulting to the pretrained base?" readme = "README.md" requires-python = ">=3.11" license = { text = "MIT" } authors = [{ name = "Matt Wolffe", email = "mfwolffe@outlook.com" }] keywords = [ "lora", "qlora", "peft", "fine-tuning", "evaluation", "llm", "differential-testing", ] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] # Core deps: spec loading, orchestration, reporting. No torch — a user # who only defines specs or writes a custom backend shouldn't pull 3 GB # of CUDA wheels. dependencies = [ "pydantic>=2.9", "pyyaml>=6.0", "typer>=0.12", "rich>=13.7", "numpy>=1.26", "packaging>=24.0", ] [project.optional-dependencies] # HuggingFace + PEFT scoring backend. The canonical path. hf = [ "torch>=2.4", "transformers>=4.45", "peft>=0.13", "safetensors>=0.4", ] # Apple Silicon inference. Env markers keep `uv sync --extra mlx` a no-op # on non-Apple hosts so Linux/CUDA contributors' wheel resolution stays # sane. mlx = [ "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'", "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'", ] # Stylistic fingerprinting (C1). spaCy models pull at runtime via # `python -m spacy download`. style = [ "spacy>=3.7", "textstat>=0.7", "nlpaug>=1.1", ] # Semantic similarity (A2) + cluster-coherent KL (S16 / F8). The # SentenceTransformer and k-means clustering pair ride the same # ~80 MB MiniLM load; putting scikit-learn in the same extra keeps # users from hitting "wait, which extra?" friction. semsim = [ "sentence-transformers>=3.0", "scikit-learn>=1.4", ] # Optional .dlm integration. Only imported inside dlm_sway.integrations.dlm. # Upper bound pins to the major pre-1.0 range the integration test # (tests/integration/test_dlm_api_compat.py) has validated against — # bump when dlm cuts v1.0 and the resolver's ``.hf_id`` contract is # re-verified. dlm = [ "dlm>=0.9,<1.0", ] # OpenAI-compatible HTTP scoring backend (S13 / F7). Unlocks hosted # fine-tunes (OpenAI platform, vLLM serve, Ollama) without pulling # torch. httpx + tenacity together are a few hundred KB of deps vs # the 3 GB the [hf] extra costs. api = [ "httpx>=0.27", "tenacity>=9.0", ] # pytest integration (S15 / F10). The plugin is discovered via the # `pytest11` entry point below — ``pip install 'dlm-sway[pytest]'`` # adds pytest if the user doesn't have it, then pytest auto-loads # the plugin on next invocation. pytest = [ "pytest>=8.0", ] # Long-running daemon mode (S36). FastAPI + uvicorn give us a warm- # backend HTTP API that turns iterative ``sway run`` calls from # 15-second cold-loads into 2-second warm dispatches. uvicorn[standard] # pulls httptools + uvloop for production-quality serving on # Linux/macOS. serve = [ "fastapi>=0.110", "uvicorn[standard]>=0.30", "httpx>=0.27", ] # Visualization (P9 + S12 HTML report). viz = [ "matplotlib>=3.8", "plotly>=5.20", ] all = [ "torch>=2.4", "transformers>=4.45", "peft>=0.13", "safetensors>=0.4", "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'", "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'", "spacy>=3.7", "textstat>=0.7", "nlpaug>=1.1", "sentence-transformers>=3.0", "scikit-learn>=1.4", "httpx>=0.27", "tenacity>=9.0", "pytest>=8.0", "matplotlib>=3.8", "plotly>=5.20", "fastapi>=0.110", "uvicorn[standard]>=0.30", ] [project.scripts] sway = "dlm_sway.cli.app:main" # S15 / F10: pytest plugin discovered via the canonical pytest11 # entry-point. Gets auto-loaded the moment the wheel is installed, # even without an ``@pytest.mark.sway`` import — consistent with how # pytest-cov, pytest-xdist, etc. ship their plugins. [project.entry-points.pytest11] sway = "dlm_sway.pytest_plugin" [project.urls] Homepage = "https://github.com/tenseleyFlow/sway" Issues = "https://github.com/tenseleyFlow/sway/issues" "Related project" = "https://github.com/tenseleyFlow/DocumentLanguageModel" [dependency-groups] dev = [ "pytest>=8.0", "pytest-cov>=5.0", "mypy>=1.11", "ruff>=0.6", "types-pyyaml>=6.0", "hypothesis>=6.152.1", # Required by the tiny_model fixture (snapshot_download) used by every # slow+online integration test. Not transitively guaranteed by hf # optional-dep because contributors may want to run integration tests # without the full [hf] extra installed. "huggingface_hub>=0.25", # S19: the pre-commit-hook integration test spawns ``pre-commit`` # as a subprocess. Keeps the tool out of the user's runtime deps. "pre-commit>=3.8", # S21 / F03: pytest-timeout wraps slow+online tests so a # silent network hang surfaces as a failed test rather than a # job-level timeout (macOS CI observed 20m hang on Sprint 19 # merge run 24747915467). "pytest-timeout>=2.3", # S21 / F03: tenacity's retry helpers wrap # ``huggingface_hub.snapshot_download`` in the tiny_model # fixture. Already in the ``[api]`` extra; duplicated here so # slow-lane CI jobs don't need to install ``[api]`` just to # get the fixture retry. "tenacity>=9.0", ] [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["src/dlm_sway"] # -------- ruff -------- [tool.ruff] line-length = 100 target-version = "py311" src = ["src", "tests"] [tool.ruff.lint] select = [ "E", # pycodestyle errors "F", # pyflakes "W", # pycodestyle warnings "I", # isort "UP", # pyupgrade "B", # bugbear "N", # pep8-naming "C4", # comprehensions "SIM", # simplify "PT", # pytest "RET", # return "ARG", # unused args "PTH", # use pathlib "TID", # tidy imports ] ignore = [ "E501", # handled by formatter ] [tool.ruff.lint.per-file-ignores] "tests/**/*.py" = ["ARG", "PT011", "SIM117"] # PyTorch's canonical `import torch.nn.functional as F` is universally # read, so we allow the naming exception in the HF backend only. "src/dlm_sway/backends/hf.py" = ["N812"] # The .dlm bridge is the one place allowed to import the ``dlm`` package. "src/dlm_sway/integrations/dlm/*.py" = ["TID251"] [tool.ruff.lint.flake8-tidy-imports.banned-api] # Hard architectural boundary: the `dlm` package is only importable # from inside the optional integration shim. This keeps dlm-sway # usable for anyone with just a HuggingFace base + PEFT adapter. "dlm".msg = "Import `dlm` only from dlm_sway.integrations.dlm (the optional extra)." [tool.ruff.format] quote-style = "double" indent-style = "space" # -------- mypy -------- [tool.mypy] strict = true python_version = "3.11" packages = ["dlm_sway"] mypy_path = "src" warn_return_any = true warn_unused_ignores = true warn_redundant_casts = true no_implicit_optional = true disallow_untyped_decorators = true plugins = ["pydantic.mypy"] [tool.pydantic-mypy] init_forbid_extra = true init_typed = true warn_required_dynamic_aliases = true # Stubless ML ecosystem packages. Narrow boundaries in backends/* import # them explicitly; the rest of the codebase stays strict. [[tool.mypy.overrides]] module = [ "torch", "torch.*", "transformers.*", "peft.*", "safetensors.*", "mlx.*", "mlx_lm.*", "sentence_transformers.*", "sklearn", "sklearn.*", "spacy.*", "textstat.*", "nlpaug.*", "matplotlib", "matplotlib.*", "plotly", "plotly.*", "tenacity", "tenacity.*", "httpx", "httpx.*", "huggingface_hub.*", "dlm.*", ] ignore_missing_imports = true disable_error_code = ["no-untyped-call"] # -------- pytest -------- [tool.pytest.ini_options] testpaths = ["tests"] addopts = [ "-ra", "-m", "not slow and not gpu and not online", ] markers = [ "slow: expensive; deselected by default", "gpu: requires CUDA; skipped on CPU/MPS runners", "online: touches the network; skipped in offline CI", ]