| 1 | """S19 pre-commit hook integration test. |
| 2 | |
| 3 | Exercises the actual runtime behavior of the ``sway-gate`` hook: set |
| 4 | up a tmp git repo, drop a ``.pre-commit-config.yaml`` declaring the |
| 5 | hook inline (mirroring the shape shipped in ``.pre-commit-hooks.yaml``), |
| 6 | run ``pre-commit run sway-gate --all-files`` as a subprocess, assert |
| 7 | the exit code + stdout banner. |
| 8 | |
| 9 | Two cases cover the headline contracts: |
| 10 | |
| 11 | - **Pass case.** Gate threshold 0.0 + a probe whose dummy-backend |
| 12 | behavior passes ``assert_mean_gte: 0.0``. Exit 0. |
| 13 | - **Fail case.** Identical spec with ``assert_mean_gte: 100.0`` — |
| 14 | impossible divergence floor, probe fails. Exit 1 with |
| 15 | ``gate FAILED`` in stdout. |
| 16 | |
| 17 | **Why not exercise the shipped ``.pre-commit-hooks.yaml`` via |
| 18 | ``repo: .``?** That path requires committing the hooks file — fine |
| 19 | for production use but fragile inside a pytest fixture. The |
| 20 | ``.pre-commit-hooks.yaml`` file itself is validated via a |
| 21 | parse-and-assert smoke test at module top; the runtime behavior is |
| 22 | validated via the inline ``repo: local`` pattern below. Together they |
| 23 | cover "file is valid" + "entry point works" without tangling the two. |
| 24 | |
| 25 | Marked ``slow+online`` because the hook's entry point runs a real HF |
| 26 | backend — the fixture builds a tiny LoRA on SmolLM2-135M. |
| 27 | """ |
| 28 | |
| 29 | from __future__ import annotations |
| 30 | |
| 31 | import os |
| 32 | import shutil |
| 33 | import subprocess |
| 34 | import sys |
| 35 | import textwrap |
| 36 | from collections.abc import Iterator |
| 37 | from pathlib import Path |
| 38 | |
| 39 | import pytest |
| 40 | import yaml |
| 41 | |
| 42 | pytestmark = [pytest.mark.slow, pytest.mark.online] |
| 43 | |
| 44 | |
| 45 | REPO_ROOT = Path(__file__).resolve().parents[2] |
| 46 | HOOKS_PATH = REPO_ROOT / ".pre-commit-hooks.yaml" |
| 47 | |
| 48 | |
| 49 | def test_pre_commit_hooks_yaml_is_valid() -> None: |
| 50 | """Smoke: ``.pre-commit-hooks.yaml`` parses and declares the three |
| 51 | expected hooks (system / isolated-venv / docker_image). Cheap — |
| 52 | runs without network — and catches structural drift before the |
| 53 | slow-lane invocation.""" |
| 54 | assert HOOKS_PATH.exists(), f"missing {HOOKS_PATH}" |
| 55 | hooks = yaml.safe_load(HOOKS_PATH.read_text(encoding="utf-8")) |
| 56 | assert isinstance(hooks, list) |
| 57 | ids = [h["id"] for h in hooks] |
| 58 | assert ids == [ |
| 59 | "sway-gate", |
| 60 | "sway-gate-isolated", |
| 61 | "sway-gate-docker", |
| 62 | ], f"unexpected hook ids: {ids}" |
| 63 | |
| 64 | system_hook = next(h for h in hooks if h["id"] == "sway-gate") |
| 65 | assert system_hook["language"] == "system" |
| 66 | assert system_hook["pass_filenames"] is False |
| 67 | assert system_hook["entry"] == "sway gate" |
| 68 | |
| 69 | isolated_hook = next(h for h in hooks if h["id"] == "sway-gate-isolated") |
| 70 | assert isolated_hook["language"] == "python" |
| 71 | # Post-F05: the isolated variant pins the PyPI wheel, not a git SHA. |
| 72 | assert any( |
| 73 | dep.startswith("dlm-sway[hf]==") for dep in isolated_hook["additional_dependencies"] |
| 74 | ), f"isolated hook deps lost PyPI pin: {isolated_hook['additional_dependencies']!r}" |
| 75 | |
| 76 | docker_hook = next(h for h in hooks if h["id"] == "sway-gate-docker") |
| 77 | assert docker_hook["language"] == "docker_image" |
| 78 | assert docker_hook["pass_filenames"] is False |
| 79 | # Entry is "<image> <cmd>" — first token is the image, rest is argv. |
| 80 | assert docker_hook["entry"].startswith("ghcr.io/tenseleyflow/sway-gate:"), ( |
| 81 | f"docker hook image path changed: {docker_hook['entry']!r}" |
| 82 | ) |
| 83 | |
| 84 | |
| 85 | def _build_random_lora_adapter(base_dir: Path, out_dir: Path) -> None: |
| 86 | """Same deterministic LoRA build the other integration tests use. |
| 87 | |
| 88 | SmolLM2-135M + ``torch.manual_seed(0)`` + init-scale 0.05 lora_B. |
| 89 | Produces a real, tiny adapter the hook can actually gate against. |
| 90 | """ |
| 91 | import torch |
| 92 | from peft import LoraConfig, get_peft_model |
| 93 | from transformers import AutoModelForCausalLM, AutoTokenizer |
| 94 | |
| 95 | torch.manual_seed(0) |
| 96 | tokenizer = AutoTokenizer.from_pretrained(str(base_dir)) |
| 97 | if tokenizer.pad_token_id is None: |
| 98 | tokenizer.pad_token = tokenizer.eos_token |
| 99 | base = AutoModelForCausalLM.from_pretrained(str(base_dir), torch_dtype=torch.float32) |
| 100 | cfg = LoraConfig( |
| 101 | r=8, |
| 102 | lora_alpha=16, |
| 103 | target_modules=["q_proj", "v_proj"], |
| 104 | lora_dropout=0.0, |
| 105 | bias="none", |
| 106 | task_type="CAUSAL_LM", |
| 107 | ) |
| 108 | peft_model = get_peft_model(base, cfg) |
| 109 | with torch.no_grad(): |
| 110 | for name, param in peft_model.named_parameters(): |
| 111 | if "lora_B" in name: |
| 112 | param.copy_(torch.randn_like(param) * 0.05) |
| 113 | peft_model.save_pretrained(str(out_dir)) |
| 114 | tokenizer.save_pretrained(str(out_dir)) |
| 115 | |
| 116 | |
| 117 | @pytest.fixture(scope="module") |
| 118 | def hook_adapter(tiny_model_dir: Path, tmp_path_factory: pytest.TempPathFactory) -> Path: |
| 119 | adapter_dir = tmp_path_factory.mktemp("precommit-hook-adapter") |
| 120 | _build_random_lora_adapter(tiny_model_dir, adapter_dir) |
| 121 | return adapter_dir |
| 122 | |
| 123 | |
| 124 | def _write_spec(path: Path, *, base_dir: Path, adapter_dir: Path, assert_mean_gte: float) -> None: |
| 125 | """Write a minimal 1-probe spec at ``path``. Adjust |
| 126 | ``assert_mean_gte`` between 0.0 (pass) and 100.0 (fail).""" |
| 127 | spec = { |
| 128 | "version": 1, |
| 129 | "models": { |
| 130 | "base": { |
| 131 | "base": str(base_dir), |
| 132 | "kind": "hf", |
| 133 | "adapter": str(adapter_dir), |
| 134 | "dtype": "fp32", |
| 135 | "device": "cpu", |
| 136 | }, |
| 137 | "ft": { |
| 138 | "base": str(base_dir), |
| 139 | "kind": "hf", |
| 140 | "adapter": str(adapter_dir), |
| 141 | "dtype": "fp32", |
| 142 | "device": "cpu", |
| 143 | }, |
| 144 | }, |
| 145 | "defaults": {"seed": 0, "coverage_threshold": 0.0, "differential": True}, |
| 146 | "suite": [ |
| 147 | { |
| 148 | "name": "dk", |
| 149 | "kind": "delta_kl", |
| 150 | "prompts": ["The capital of France is", "Water boils at"], |
| 151 | "divergence": "js", |
| 152 | "assert_mean_gte": assert_mean_gte, |
| 153 | } |
| 154 | ], |
| 155 | } |
| 156 | path.write_text(yaml.safe_dump(spec), encoding="utf-8") |
| 157 | |
| 158 | |
| 159 | def _write_config(config_path: Path, spec_rel: str) -> None: |
| 160 | """Write a ``.pre-commit-config.yaml`` declaring ``sway-gate`` |
| 161 | inline via ``repo: local`` so pre-commit doesn't need to fetch |
| 162 | anything. Mirrors the shape of the shipped ``.pre-commit-hooks.yaml``.""" |
| 163 | config = textwrap.dedent( |
| 164 | f""" |
| 165 | repos: |
| 166 | - repo: local |
| 167 | hooks: |
| 168 | - id: sway-gate |
| 169 | name: sway gate |
| 170 | entry: sway gate |
| 171 | language: system |
| 172 | files: '.*' |
| 173 | pass_filenames: false |
| 174 | args: [{spec_rel}, --threshold=0.0] |
| 175 | """ |
| 176 | ).strip() |
| 177 | config_path.write_text(config + "\n", encoding="utf-8") |
| 178 | |
| 179 | |
| 180 | @pytest.fixture |
| 181 | def precommit_repo(tmp_path: Path, tiny_model_dir: Path, hook_adapter: Path) -> Iterator[Path]: |
| 182 | """Initialize a tmp git repo with a spec + pre-commit config. |
| 183 | |
| 184 | Yields the repo root. The spec file lives at |
| 185 | ``<repo>/sway.yaml``; the config at ``<repo>/.pre-commit-config.yaml``. |
| 186 | The caller chooses ``assert_mean_gte`` by rewriting the spec after |
| 187 | the fixture yields — cheaper than re-initializing git per test. |
| 188 | """ |
| 189 | subprocess.run( # noqa: S603 |
| 190 | ["git", "init", "--quiet"], cwd=tmp_path, check=True |
| 191 | ) |
| 192 | # Silence the "Please tell me who you are" commit warning on fresh |
| 193 | # CI runners without a global git identity. |
| 194 | for key, val in ( |
| 195 | ("user.email", "sway-ci@example.com"), |
| 196 | ("user.name", "sway ci"), |
| 197 | ("commit.gpgsign", "false"), |
| 198 | ): |
| 199 | subprocess.run( # noqa: S603 |
| 200 | ["git", "config", key, val], cwd=tmp_path, check=True |
| 201 | ) |
| 202 | # Default pass case — the caller rewrites the spec for fail cases. |
| 203 | _write_spec( |
| 204 | tmp_path / "sway.yaml", |
| 205 | base_dir=tiny_model_dir, |
| 206 | adapter_dir=hook_adapter, |
| 207 | assert_mean_gte=0.0, |
| 208 | ) |
| 209 | _write_config(tmp_path / ".pre-commit-config.yaml", "sway.yaml") |
| 210 | # Stage everything so ``pre-commit run --all-files`` has content. |
| 211 | subprocess.run( # noqa: S603 |
| 212 | ["git", "add", "-A"], cwd=tmp_path, check=True |
| 213 | ) |
| 214 | yield tmp_path |
| 215 | # Clean up pre-commit's cache dir too so back-to-back test |
| 216 | # invocations don't trip over stale ``language: system`` state. |
| 217 | cache = tmp_path / ".cache" |
| 218 | if cache.exists(): |
| 219 | shutil.rmtree(cache, ignore_errors=True) |
| 220 | |
| 221 | |
| 222 | def _run_hook(cwd: Path) -> subprocess.CompletedProcess[str]: |
| 223 | """Invoke ``pre-commit run sway-gate --all-files`` from ``cwd``.""" |
| 224 | env = os.environ.copy() |
| 225 | # Point pre-commit's cache at the test tmp dir so we don't |
| 226 | # pollute the user's ``~/.cache/pre-commit``. |
| 227 | env["PRE_COMMIT_HOME"] = str(cwd / ".pre-commit-cache") |
| 228 | return subprocess.run( # noqa: S603 |
| 229 | [sys.executable, "-m", "pre_commit", "run", "sway-gate", "--all-files"], |
| 230 | cwd=cwd, |
| 231 | env=env, |
| 232 | capture_output=True, |
| 233 | text=True, |
| 234 | check=False, |
| 235 | ) |
| 236 | |
| 237 | |
| 238 | def test_hook_passes_when_gate_passes(precommit_repo: Path) -> None: |
| 239 | """Pass case: ``assert_mean_gte=0.0`` — probe passes, hook exits 0.""" |
| 240 | result = _run_hook(precommit_repo) |
| 241 | # pre-commit reports ``Passed`` / ``Failed`` for each hook. |
| 242 | combined = result.stdout + result.stderr |
| 243 | assert result.returncode == 0, ( |
| 244 | f"hook failed unexpectedly (rc={result.returncode}):\n" |
| 245 | f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}" |
| 246 | ) |
| 247 | assert "sway gate" in combined.lower() or "passed" in combined.lower() |
| 248 | |
| 249 | |
| 250 | def test_hook_fails_when_gate_fails( |
| 251 | precommit_repo: Path, tiny_model_dir: Path, hook_adapter: Path |
| 252 | ) -> None: |
| 253 | """Fail case: impossible ``assert_mean_gte`` — probe FAILs, hook |
| 254 | exits non-zero with ``gate FAILED`` in the output.""" |
| 255 | # Overwrite the spec with an impossible threshold. |
| 256 | _write_spec( |
| 257 | precommit_repo / "sway.yaml", |
| 258 | base_dir=tiny_model_dir, |
| 259 | adapter_dir=hook_adapter, |
| 260 | assert_mean_gte=100.0, |
| 261 | ) |
| 262 | subprocess.run( # noqa: S603 |
| 263 | ["git", "add", "-A"], cwd=precommit_repo, check=True |
| 264 | ) |
| 265 | result = _run_hook(precommit_repo) |
| 266 | combined = result.stdout + result.stderr |
| 267 | assert result.returncode != 0, ( |
| 268 | f"hook should have failed on an impossible gate (got rc=0):\n" |
| 269 | f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}" |
| 270 | ) |
| 271 | assert "gate FAILED" in combined, ( |
| 272 | f"expected the 'gate FAILED' banner in hook output:\n" |
| 273 | f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}" |
| 274 | ) |