[project]
name = "dlm-sway"
version = "0.1.0"
description = "Differential testing for fine-tuned causal LMs: did LoRA/QLoRA training actually change behavior, or is the model defaulting to the pretrained base?"
readme = "README.md"
requires-python = ">=3.11"
license = { text = "MIT" }
authors = [{ name = "Matt Wolffe", email = "mfwolffe@outlook.com" }]
keywords = [
    "lora",
    "qlora",
    "peft",
    "fine-tuning",
    "evaluation",
    "llm",
    "differential-testing",
]
classifiers = [
    "Development Status :: 3 - Alpha",
    "Intended Audience :: Developers",
    "Intended Audience :: Science/Research",
    "License :: OSI Approved :: MIT License",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
]

# Core deps: spec loading, orchestration, reporting. No torch — a user
# who only defines specs or writes a custom backend shouldn't pull 3 GB
# of CUDA wheels.
dependencies = [
    "pydantic>=2.9",
    "pyyaml>=6.0",
    "typer>=0.12",
    "rich>=13.7",
    "numpy>=1.26",
    "packaging>=24.0",
]

[project.optional-dependencies]
# HuggingFace + PEFT scoring backend. The canonical path.
hf = [
    "torch>=2.4",
    "transformers>=4.45",
    "peft>=0.13",
    "safetensors>=0.4",
]
# Apple Silicon inference. Env markers keep `uv sync --extra mlx` a no-op
# on non-Apple hosts so Linux/CUDA contributors' wheel resolution stays
# sane.
mlx = [
    "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
    "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
]
# Stylistic fingerprinting (C1). spaCy models pull at runtime via
# `python -m spacy download`.
style = [
    "spacy>=3.7",
    "textstat>=0.7",
    "nlpaug>=1.1",
]
# Semantic similarity (A2) + cluster-coherent KL (S16 / F8). The
# SentenceTransformer and k-means clustering pair ride the same
# ~80 MB MiniLM load; putting scikit-learn in the same extra keeps
# users from hitting "wait, which extra?" friction.
semsim = [
    "sentence-transformers>=3.0",
    "scikit-learn>=1.4",
]
# Optional .dlm integration. Only imported inside dlm_sway.integrations.dlm.
# Upper bound pins to the major pre-1.0 range the integration test
# (tests/integration/test_dlm_api_compat.py) has validated against —
# bump when dlm cuts v1.0 and the resolver's ``.hf_id`` contract is
# re-verified.
dlm = [
    "dlm>=0.9,<1.0",
]
# OpenAI-compatible HTTP scoring backend (S13 / F7). Unlocks hosted
# fine-tunes (OpenAI platform, vLLM serve, Ollama) without pulling
# torch. httpx + tenacity together are a few hundred KB of deps vs
# the 3 GB the [hf] extra costs.
api = [
    "httpx>=0.27",
    "tenacity>=9.0",
]
# pytest integration (S15 / F10). The plugin is discovered via the
# `pytest11` entry point below — ``pip install 'dlm-sway[pytest]'``
# adds pytest if the user doesn't have it, then pytest auto-loads
# the plugin on next invocation.
pytest = [
    "pytest>=8.0",
]
# Long-running daemon mode (S36). FastAPI + uvicorn give us a warm-
# backend HTTP API that turns iterative ``sway run`` calls from
# 15-second cold-loads into 2-second warm dispatches. uvicorn[standard]
# pulls httptools + uvloop for production-quality serving on
# Linux/macOS.
serve = [
    "fastapi>=0.110",
    "uvicorn[standard]>=0.30",
    "httpx>=0.27",
]
# Visualization (P9 + S12 HTML report).
viz = [
    "matplotlib>=3.8",
    "plotly>=5.20",
]
all = [
    "torch>=2.4",
    "transformers>=4.45",
    "peft>=0.13",
    "safetensors>=0.4",
    "mlx>=0.18; sys_platform == 'darwin' and platform_machine == 'arm64'",
    "mlx-lm>=0.19; sys_platform == 'darwin' and platform_machine == 'arm64'",
    "spacy>=3.7",
    "textstat>=0.7",
    "nlpaug>=1.1",
    "sentence-transformers>=3.0",
    "scikit-learn>=1.4",
    "httpx>=0.27",
    "tenacity>=9.0",
    "pytest>=8.0",
    "matplotlib>=3.8",
    "plotly>=5.20",
    "fastapi>=0.110",
    "uvicorn[standard]>=0.30",
]

[project.scripts]
sway = "dlm_sway.cli.app:main"

# S15 / F10: pytest plugin discovered via the canonical pytest11
# entry-point. Gets auto-loaded the moment the wheel is installed,
# even without an ``@pytest.mark.sway`` import — consistent with how
# pytest-cov, pytest-xdist, etc. ship their plugins.
[project.entry-points.pytest11]
sway = "dlm_sway.pytest_plugin"

[project.urls]
Homepage = "https://github.com/tenseleyFlow/sway"
Issues = "https://github.com/tenseleyFlow/sway/issues"
"Related project" = "https://github.com/tenseleyFlow/DocumentLanguageModel"

[dependency-groups]
dev = [
    "pytest>=8.0",
    "pytest-cov>=5.0",
    "mypy>=1.11",
    "ruff>=0.6",
    "types-pyyaml>=6.0",
    "hypothesis>=6.152.1",
    # Required by the tiny_model fixture (snapshot_download) used by every
    # slow+online integration test. Not transitively guaranteed by hf
    # optional-dep because contributors may want to run integration tests
    # without the full [hf] extra installed.
    "huggingface_hub>=0.25",
    # S19: the pre-commit-hook integration test spawns ``pre-commit``
    # as a subprocess. Keeps the tool out of the user's runtime deps.
    "pre-commit>=3.8",
    # S21 / F03: pytest-timeout wraps slow+online tests so a
    # silent network hang surfaces as a failed test rather than a
    # job-level timeout (macOS CI observed 20m hang on Sprint 19
    # merge run 24747915467).
    "pytest-timeout>=2.3",
    # S21 / F03: tenacity's retry helpers wrap
    # ``huggingface_hub.snapshot_download`` in the tiny_model
    # fixture. Already in the ``[api]`` extra; duplicated here so
    # slow-lane CI jobs don't need to install ``[api]`` just to
    # get the fixture retry.
    "tenacity>=9.0",
]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["src/dlm_sway"]

# -------- ruff --------
[tool.ruff]
line-length = 100
target-version = "py311"
src = ["src", "tests"]

[tool.ruff.lint]
select = [
    "E",    # pycodestyle errors
    "F",    # pyflakes
    "W",    # pycodestyle warnings
    "I",    # isort
    "UP",   # pyupgrade
    "B",    # bugbear
    "N",    # pep8-naming
    "C4",   # comprehensions
    "SIM",  # simplify
    "PT",   # pytest
    "RET",  # return
    "ARG",  # unused args
    "PTH",  # use pathlib
    "TID",  # tidy imports
]
ignore = [
    "E501",  # handled by formatter
]

[tool.ruff.lint.per-file-ignores]
"tests/**/*.py" = ["ARG", "PT011", "SIM117"]
# PyTorch's canonical `import torch.nn.functional as F` is universally
# read, so we allow the naming exception in the HF backend only.
"src/dlm_sway/backends/hf.py" = ["N812"]
# The .dlm bridge is the one place allowed to import the ``dlm`` package.
"src/dlm_sway/integrations/dlm/*.py" = ["TID251"]

[tool.ruff.lint.flake8-tidy-imports.banned-api]
# Hard architectural boundary: the `dlm` package is only importable
# from inside the optional integration shim. This keeps dlm-sway
# usable for anyone with just a HuggingFace base + PEFT adapter.
"dlm".msg = "Import `dlm` only from dlm_sway.integrations.dlm (the optional extra)."

[tool.ruff.format]
quote-style = "double"
indent-style = "space"

# -------- mypy --------
[tool.mypy]
strict = true
python_version = "3.11"
packages = ["dlm_sway"]
mypy_path = "src"
warn_return_any = true
warn_unused_ignores = true
warn_redundant_casts = true
no_implicit_optional = true
disallow_untyped_decorators = true
plugins = ["pydantic.mypy"]

[tool.pydantic-mypy]
init_forbid_extra = true
init_typed = true
warn_required_dynamic_aliases = true

# Stubless ML ecosystem packages. Narrow boundaries in backends/* import
# them explicitly; the rest of the codebase stays strict.
[[tool.mypy.overrides]]
module = [
    "torch",
    "torch.*",
    "transformers.*",
    "peft.*",
    "safetensors.*",
    "mlx.*",
    "mlx_lm.*",
    "sentence_transformers.*",
    "sklearn",
    "sklearn.*",
    "spacy.*",
    "textstat.*",
    "nlpaug.*",
    "matplotlib",
    "matplotlib.*",
    "plotly",
    "plotly.*",
    "tenacity",
    "tenacity.*",
    "httpx",
    "httpx.*",
    "huggingface_hub.*",
    "dlm.*",
]
ignore_missing_imports = true
disable_error_code = ["no-untyped-call"]

# -------- pytest --------
[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = [
    "-ra",
    "-m", "not slow and not gpu and not online",
]
markers = [
    "slow: expensive; deselected by default",
    "gpu: requires CUDA; skipped on CPU/MPS runners",
    "online: touches the network; skipped in offline CI",
]