@@ -0,0 +1,257 @@ |
| 1 | +"""S19 pre-commit hook integration test. |
| 2 | + |
| 3 | +Exercises the actual runtime behavior of the ``sway-gate`` hook: set |
| 4 | +up a tmp git repo, drop a ``.pre-commit-config.yaml`` declaring the |
| 5 | +hook inline (mirroring the shape shipped in ``.pre-commit-hooks.yaml``), |
| 6 | +run ``pre-commit run sway-gate --all-files`` as a subprocess, assert |
| 7 | +the exit code + stdout banner. |
| 8 | + |
| 9 | +Two cases cover the headline contracts: |
| 10 | + |
| 11 | +- **Pass case.** Gate threshold 0.0 + a probe whose dummy-backend |
| 12 | + behavior passes ``assert_mean_gte: 0.0``. Exit 0. |
| 13 | +- **Fail case.** Identical spec with ``assert_mean_gte: 100.0`` — |
| 14 | + impossible divergence floor, probe fails. Exit 1 with |
| 15 | + ``gate FAILED`` in stdout. |
| 16 | + |
| 17 | +**Why not exercise the shipped ``.pre-commit-hooks.yaml`` via |
| 18 | +``repo: .``?** That path requires committing the hooks file — fine |
| 19 | +for production use but fragile inside a pytest fixture. The |
| 20 | +``.pre-commit-hooks.yaml`` file itself is validated via a |
| 21 | +parse-and-assert smoke test at module top; the runtime behavior is |
| 22 | +validated via the inline ``repo: local`` pattern below. Together they |
| 23 | +cover "file is valid" + "entry point works" without tangling the two. |
| 24 | + |
| 25 | +Marked ``slow+online`` because the hook's entry point runs a real HF |
| 26 | +backend — the fixture builds a tiny LoRA on SmolLM2-135M. |
| 27 | +""" |
| 28 | + |
| 29 | +from __future__ import annotations |
| 30 | + |
| 31 | +import os |
| 32 | +import shutil |
| 33 | +import subprocess |
| 34 | +import sys |
| 35 | +import textwrap |
| 36 | +from collections.abc import Iterator |
| 37 | +from pathlib import Path |
| 38 | + |
| 39 | +import pytest |
| 40 | +import yaml |
| 41 | + |
| 42 | +pytestmark = [pytest.mark.slow, pytest.mark.online] |
| 43 | + |
| 44 | + |
| 45 | +REPO_ROOT = Path(__file__).resolve().parents[2] |
| 46 | +HOOKS_PATH = REPO_ROOT / ".pre-commit-hooks.yaml" |
| 47 | + |
| 48 | + |
| 49 | +def test_pre_commit_hooks_yaml_is_valid() -> None: |
| 50 | + """Smoke: ``.pre-commit-hooks.yaml`` parses and declares the two |
| 51 | + expected hooks. Cheap — runs without network — and catches |
| 52 | + structural drift before the slow-lane invocation.""" |
| 53 | + assert HOOKS_PATH.exists(), f"missing {HOOKS_PATH}" |
| 54 | + hooks = yaml.safe_load(HOOKS_PATH.read_text(encoding="utf-8")) |
| 55 | + assert isinstance(hooks, list) |
| 56 | + ids = [h["id"] for h in hooks] |
| 57 | + assert ids == ["sway-gate", "sway-gate-isolated"], f"unexpected hook ids: {ids}" |
| 58 | + system_hook = next(h for h in hooks if h["id"] == "sway-gate") |
| 59 | + assert system_hook["language"] == "system" |
| 60 | + assert system_hook["pass_filenames"] is False |
| 61 | + assert system_hook["entry"] == "sway gate" |
| 62 | + |
| 63 | + |
| 64 | +def _build_random_lora_adapter(base_dir: Path, out_dir: Path) -> None: |
| 65 | + """Same deterministic LoRA build the other integration tests use. |
| 66 | + |
| 67 | + SmolLM2-135M + ``torch.manual_seed(0)`` + init-scale 0.05 lora_B. |
| 68 | + Produces a real, tiny adapter the hook can actually gate against. |
| 69 | + """ |
| 70 | + import torch |
| 71 | + from peft import LoraConfig, get_peft_model |
| 72 | + from transformers import AutoModelForCausalLM, AutoTokenizer |
| 73 | + |
| 74 | + torch.manual_seed(0) |
| 75 | + tokenizer = AutoTokenizer.from_pretrained(str(base_dir)) |
| 76 | + if tokenizer.pad_token_id is None: |
| 77 | + tokenizer.pad_token = tokenizer.eos_token |
| 78 | + base = AutoModelForCausalLM.from_pretrained(str(base_dir), torch_dtype=torch.float32) |
| 79 | + cfg = LoraConfig( |
| 80 | + r=8, |
| 81 | + lora_alpha=16, |
| 82 | + target_modules=["q_proj", "v_proj"], |
| 83 | + lora_dropout=0.0, |
| 84 | + bias="none", |
| 85 | + task_type="CAUSAL_LM", |
| 86 | + ) |
| 87 | + peft_model = get_peft_model(base, cfg) |
| 88 | + with torch.no_grad(): |
| 89 | + for name, param in peft_model.named_parameters(): |
| 90 | + if "lora_B" in name: |
| 91 | + param.copy_(torch.randn_like(param) * 0.05) |
| 92 | + peft_model.save_pretrained(str(out_dir)) |
| 93 | + tokenizer.save_pretrained(str(out_dir)) |
| 94 | + |
| 95 | + |
| 96 | +@pytest.fixture(scope="module") |
| 97 | +def hook_adapter( |
| 98 | + tiny_model_dir: Path, tmp_path_factory: pytest.TempPathFactory |
| 99 | +) -> Path: |
| 100 | + adapter_dir = tmp_path_factory.mktemp("precommit-hook-adapter") |
| 101 | + _build_random_lora_adapter(tiny_model_dir, adapter_dir) |
| 102 | + return adapter_dir |
| 103 | + |
| 104 | + |
| 105 | +def _write_spec(path: Path, *, base_dir: Path, adapter_dir: Path, assert_mean_gte: float) -> None: |
| 106 | + """Write a minimal 1-probe spec at ``path``. Adjust |
| 107 | + ``assert_mean_gte`` between 0.0 (pass) and 100.0 (fail).""" |
| 108 | + spec = { |
| 109 | + "version": 1, |
| 110 | + "models": { |
| 111 | + "base": { |
| 112 | + "base": str(base_dir), |
| 113 | + "kind": "hf", |
| 114 | + "adapter": str(adapter_dir), |
| 115 | + "dtype": "fp32", |
| 116 | + "device": "cpu", |
| 117 | + }, |
| 118 | + "ft": { |
| 119 | + "base": str(base_dir), |
| 120 | + "kind": "hf", |
| 121 | + "adapter": str(adapter_dir), |
| 122 | + "dtype": "fp32", |
| 123 | + "device": "cpu", |
| 124 | + }, |
| 125 | + }, |
| 126 | + "defaults": {"seed": 0, "coverage_threshold": 0.0, "differential": True}, |
| 127 | + "suite": [ |
| 128 | + { |
| 129 | + "name": "dk", |
| 130 | + "kind": "delta_kl", |
| 131 | + "prompts": ["The capital of France is", "Water boils at"], |
| 132 | + "divergence": "js", |
| 133 | + "assert_mean_gte": assert_mean_gte, |
| 134 | + } |
| 135 | + ], |
| 136 | + } |
| 137 | + path.write_text(yaml.safe_dump(spec), encoding="utf-8") |
| 138 | + |
| 139 | + |
| 140 | +def _write_config(config_path: Path, spec_rel: str) -> None: |
| 141 | + """Write a ``.pre-commit-config.yaml`` declaring ``sway-gate`` |
| 142 | + inline via ``repo: local`` so pre-commit doesn't need to fetch |
| 143 | + anything. Mirrors the shape of the shipped ``.pre-commit-hooks.yaml``.""" |
| 144 | + config = textwrap.dedent( |
| 145 | + f""" |
| 146 | + repos: |
| 147 | + - repo: local |
| 148 | + hooks: |
| 149 | + - id: sway-gate |
| 150 | + name: sway gate |
| 151 | + entry: sway gate |
| 152 | + language: system |
| 153 | + files: '.*' |
| 154 | + pass_filenames: false |
| 155 | + args: [{spec_rel}, --threshold=0.0] |
| 156 | + """ |
| 157 | + ).strip() |
| 158 | + config_path.write_text(config + "\n", encoding="utf-8") |
| 159 | + |
| 160 | + |
| 161 | +@pytest.fixture |
| 162 | +def precommit_repo( |
| 163 | + tmp_path: Path, tiny_model_dir: Path, hook_adapter: Path |
| 164 | +) -> Iterator[Path]: |
| 165 | + """Initialize a tmp git repo with a spec + pre-commit config. |
| 166 | + |
| 167 | + Yields the repo root. The spec file lives at |
| 168 | + ``<repo>/sway.yaml``; the config at ``<repo>/.pre-commit-config.yaml``. |
| 169 | + The caller chooses ``assert_mean_gte`` by rewriting the spec after |
| 170 | + the fixture yields — cheaper than re-initializing git per test. |
| 171 | + """ |
| 172 | + subprocess.run( # noqa: S603 |
| 173 | + ["git", "init", "--quiet"], cwd=tmp_path, check=True |
| 174 | + ) |
| 175 | + # Silence the "Please tell me who you are" commit warning on fresh |
| 176 | + # CI runners without a global git identity. |
| 177 | + for key, val in ( |
| 178 | + ("user.email", "sway-ci@example.com"), |
| 179 | + ("user.name", "sway ci"), |
| 180 | + ("commit.gpgsign", "false"), |
| 181 | + ): |
| 182 | + subprocess.run( # noqa: S603 |
| 183 | + ["git", "config", key, val], cwd=tmp_path, check=True |
| 184 | + ) |
| 185 | + # Default pass case — the caller rewrites the spec for fail cases. |
| 186 | + _write_spec( |
| 187 | + tmp_path / "sway.yaml", |
| 188 | + base_dir=tiny_model_dir, |
| 189 | + adapter_dir=hook_adapter, |
| 190 | + assert_mean_gte=0.0, |
| 191 | + ) |
| 192 | + _write_config(tmp_path / ".pre-commit-config.yaml", "sway.yaml") |
| 193 | + # Stage everything so ``pre-commit run --all-files`` has content. |
| 194 | + subprocess.run( # noqa: S603 |
| 195 | + ["git", "add", "-A"], cwd=tmp_path, check=True |
| 196 | + ) |
| 197 | + yield tmp_path |
| 198 | + # Clean up pre-commit's cache dir too so back-to-back test |
| 199 | + # invocations don't trip over stale ``language: system`` state. |
| 200 | + cache = tmp_path / ".cache" |
| 201 | + if cache.exists(): |
| 202 | + shutil.rmtree(cache, ignore_errors=True) |
| 203 | + |
| 204 | + |
| 205 | +def _run_hook(cwd: Path) -> subprocess.CompletedProcess[str]: |
| 206 | + """Invoke ``pre-commit run sway-gate --all-files`` from ``cwd``.""" |
| 207 | + env = os.environ.copy() |
| 208 | + # Point pre-commit's cache at the test tmp dir so we don't |
| 209 | + # pollute the user's ``~/.cache/pre-commit``. |
| 210 | + env["PRE_COMMIT_HOME"] = str(cwd / ".pre-commit-cache") |
| 211 | + return subprocess.run( # noqa: S603 |
| 212 | + [sys.executable, "-m", "pre_commit", "run", "sway-gate", "--all-files"], |
| 213 | + cwd=cwd, |
| 214 | + env=env, |
| 215 | + capture_output=True, |
| 216 | + text=True, |
| 217 | + check=False, |
| 218 | + ) |
| 219 | + |
| 220 | + |
| 221 | +def test_hook_passes_when_gate_passes(precommit_repo: Path) -> None: |
| 222 | + """Pass case: ``assert_mean_gte=0.0`` — probe passes, hook exits 0.""" |
| 223 | + result = _run_hook(precommit_repo) |
| 224 | + # pre-commit reports ``Passed`` / ``Failed`` for each hook. |
| 225 | + combined = result.stdout + result.stderr |
| 226 | + assert result.returncode == 0, ( |
| 227 | + f"hook failed unexpectedly (rc={result.returncode}):\n" |
| 228 | + f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}" |
| 229 | + ) |
| 230 | + assert "sway gate" in combined.lower() or "passed" in combined.lower() |
| 231 | + |
| 232 | + |
| 233 | +def test_hook_fails_when_gate_fails( |
| 234 | + precommit_repo: Path, tiny_model_dir: Path, hook_adapter: Path |
| 235 | +) -> None: |
| 236 | + """Fail case: impossible ``assert_mean_gte`` — probe FAILs, hook |
| 237 | + exits non-zero with ``gate FAILED`` in the output.""" |
| 238 | + # Overwrite the spec with an impossible threshold. |
| 239 | + _write_spec( |
| 240 | + precommit_repo / "sway.yaml", |
| 241 | + base_dir=tiny_model_dir, |
| 242 | + adapter_dir=hook_adapter, |
| 243 | + assert_mean_gte=100.0, |
| 244 | + ) |
| 245 | + subprocess.run( # noqa: S603 |
| 246 | + ["git", "add", "-A"], cwd=precommit_repo, check=True |
| 247 | + ) |
| 248 | + result = _run_hook(precommit_repo) |
| 249 | + combined = result.stdout + result.stderr |
| 250 | + assert result.returncode != 0, ( |
| 251 | + f"hook should have failed on an impossible gate (got rc=0):\n" |
| 252 | + f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}" |
| 253 | + ) |
| 254 | + assert "gate FAILED" in combined, ( |
| 255 | + f"expected the 'gate FAILED' banner in hook output:\n" |
| 256 | + f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}" |
| 257 | + ) |