| 1 | """S19 pre-commit hook integration test. |
| 2 | |
| 3 | Exercises the actual runtime behavior of the ``sway-gate`` hook: set |
| 4 | up a tmp git repo, drop a ``.pre-commit-config.yaml`` declaring the |
| 5 | hook inline (mirroring the shape shipped in ``.pre-commit-hooks.yaml``), |
| 6 | run ``pre-commit run sway-gate --all-files`` as a subprocess, assert |
| 7 | the exit code + stdout banner. |
| 8 | |
| 9 | Two cases cover the headline contracts: |
| 10 | |
| 11 | - **Pass case.** Gate threshold 0.0 + a probe whose dummy-backend |
| 12 | behavior passes ``assert_mean_gte: 0.0``. Exit 0. |
| 13 | - **Fail case.** Identical spec with ``assert_mean_gte: 100.0`` — |
| 14 | impossible divergence floor, probe fails. Exit 1 with |
| 15 | ``gate FAILED`` in stdout. |
| 16 | |
| 17 | **Why not exercise the shipped ``.pre-commit-hooks.yaml`` via |
| 18 | ``repo: .``?** That path requires committing the hooks file — fine |
| 19 | for production use but fragile inside a pytest fixture. The |
| 20 | ``.pre-commit-hooks.yaml`` file itself is validated via a |
| 21 | parse-and-assert smoke test at module top; the runtime behavior is |
| 22 | validated via the inline ``repo: local`` pattern below. Together they |
| 23 | cover "file is valid" + "entry point works" without tangling the two. |
| 24 | |
| 25 | Marked ``slow+online`` because the hook's entry point runs a real HF |
| 26 | backend — the fixture builds a tiny LoRA on SmolLM2-135M. |
| 27 | """ |
| 28 | |
| 29 | from __future__ import annotations |
| 30 | |
| 31 | import os |
| 32 | import shutil |
| 33 | import subprocess |
| 34 | import sys |
| 35 | import textwrap |
| 36 | from collections.abc import Iterator |
| 37 | from pathlib import Path |
| 38 | |
| 39 | import pytest |
| 40 | import yaml |
| 41 | |
| 42 | pytestmark = [pytest.mark.slow, pytest.mark.online] |
| 43 | |
| 44 | |
| 45 | REPO_ROOT = Path(__file__).resolve().parents[2] |
| 46 | HOOKS_PATH = REPO_ROOT / ".pre-commit-hooks.yaml" |
| 47 | |
| 48 | |
| 49 | def test_pre_commit_hooks_yaml_is_valid() -> None: |
| 50 | """Smoke: ``.pre-commit-hooks.yaml`` parses and declares the two |
| 51 | expected hooks. Cheap — runs without network — and catches |
| 52 | structural drift before the slow-lane invocation.""" |
| 53 | assert HOOKS_PATH.exists(), f"missing {HOOKS_PATH}" |
| 54 | hooks = yaml.safe_load(HOOKS_PATH.read_text(encoding="utf-8")) |
| 55 | assert isinstance(hooks, list) |
| 56 | ids = [h["id"] for h in hooks] |
| 57 | assert ids == ["sway-gate", "sway-gate-isolated"], f"unexpected hook ids: {ids}" |
| 58 | system_hook = next(h for h in hooks if h["id"] == "sway-gate") |
| 59 | assert system_hook["language"] == "system" |
| 60 | assert system_hook["pass_filenames"] is False |
| 61 | assert system_hook["entry"] == "sway gate" |
| 62 | |
| 63 | |
| 64 | def _build_random_lora_adapter(base_dir: Path, out_dir: Path) -> None: |
| 65 | """Same deterministic LoRA build the other integration tests use. |
| 66 | |
| 67 | SmolLM2-135M + ``torch.manual_seed(0)`` + init-scale 0.05 lora_B. |
| 68 | Produces a real, tiny adapter the hook can actually gate against. |
| 69 | """ |
| 70 | import torch |
| 71 | from peft import LoraConfig, get_peft_model |
| 72 | from transformers import AutoModelForCausalLM, AutoTokenizer |
| 73 | |
| 74 | torch.manual_seed(0) |
| 75 | tokenizer = AutoTokenizer.from_pretrained(str(base_dir)) |
| 76 | if tokenizer.pad_token_id is None: |
| 77 | tokenizer.pad_token = tokenizer.eos_token |
| 78 | base = AutoModelForCausalLM.from_pretrained(str(base_dir), torch_dtype=torch.float32) |
| 79 | cfg = LoraConfig( |
| 80 | r=8, |
| 81 | lora_alpha=16, |
| 82 | target_modules=["q_proj", "v_proj"], |
| 83 | lora_dropout=0.0, |
| 84 | bias="none", |
| 85 | task_type="CAUSAL_LM", |
| 86 | ) |
| 87 | peft_model = get_peft_model(base, cfg) |
| 88 | with torch.no_grad(): |
| 89 | for name, param in peft_model.named_parameters(): |
| 90 | if "lora_B" in name: |
| 91 | param.copy_(torch.randn_like(param) * 0.05) |
| 92 | peft_model.save_pretrained(str(out_dir)) |
| 93 | tokenizer.save_pretrained(str(out_dir)) |
| 94 | |
| 95 | |
| 96 | @pytest.fixture(scope="module") |
| 97 | def hook_adapter(tiny_model_dir: Path, tmp_path_factory: pytest.TempPathFactory) -> Path: |
| 98 | adapter_dir = tmp_path_factory.mktemp("precommit-hook-adapter") |
| 99 | _build_random_lora_adapter(tiny_model_dir, adapter_dir) |
| 100 | return adapter_dir |
| 101 | |
| 102 | |
| 103 | def _write_spec(path: Path, *, base_dir: Path, adapter_dir: Path, assert_mean_gte: float) -> None: |
| 104 | """Write a minimal 1-probe spec at ``path``. Adjust |
| 105 | ``assert_mean_gte`` between 0.0 (pass) and 100.0 (fail).""" |
| 106 | spec = { |
| 107 | "version": 1, |
| 108 | "models": { |
| 109 | "base": { |
| 110 | "base": str(base_dir), |
| 111 | "kind": "hf", |
| 112 | "adapter": str(adapter_dir), |
| 113 | "dtype": "fp32", |
| 114 | "device": "cpu", |
| 115 | }, |
| 116 | "ft": { |
| 117 | "base": str(base_dir), |
| 118 | "kind": "hf", |
| 119 | "adapter": str(adapter_dir), |
| 120 | "dtype": "fp32", |
| 121 | "device": "cpu", |
| 122 | }, |
| 123 | }, |
| 124 | "defaults": {"seed": 0, "coverage_threshold": 0.0, "differential": True}, |
| 125 | "suite": [ |
| 126 | { |
| 127 | "name": "dk", |
| 128 | "kind": "delta_kl", |
| 129 | "prompts": ["The capital of France is", "Water boils at"], |
| 130 | "divergence": "js", |
| 131 | "assert_mean_gte": assert_mean_gte, |
| 132 | } |
| 133 | ], |
| 134 | } |
| 135 | path.write_text(yaml.safe_dump(spec), encoding="utf-8") |
| 136 | |
| 137 | |
| 138 | def _write_config(config_path: Path, spec_rel: str) -> None: |
| 139 | """Write a ``.pre-commit-config.yaml`` declaring ``sway-gate`` |
| 140 | inline via ``repo: local`` so pre-commit doesn't need to fetch |
| 141 | anything. Mirrors the shape of the shipped ``.pre-commit-hooks.yaml``.""" |
| 142 | config = textwrap.dedent( |
| 143 | f""" |
| 144 | repos: |
| 145 | - repo: local |
| 146 | hooks: |
| 147 | - id: sway-gate |
| 148 | name: sway gate |
| 149 | entry: sway gate |
| 150 | language: system |
| 151 | files: '.*' |
| 152 | pass_filenames: false |
| 153 | args: [{spec_rel}, --threshold=0.0] |
| 154 | """ |
| 155 | ).strip() |
| 156 | config_path.write_text(config + "\n", encoding="utf-8") |
| 157 | |
| 158 | |
| 159 | @pytest.fixture |
| 160 | def precommit_repo(tmp_path: Path, tiny_model_dir: Path, hook_adapter: Path) -> Iterator[Path]: |
| 161 | """Initialize a tmp git repo with a spec + pre-commit config. |
| 162 | |
| 163 | Yields the repo root. The spec file lives at |
| 164 | ``<repo>/sway.yaml``; the config at ``<repo>/.pre-commit-config.yaml``. |
| 165 | The caller chooses ``assert_mean_gte`` by rewriting the spec after |
| 166 | the fixture yields — cheaper than re-initializing git per test. |
| 167 | """ |
| 168 | subprocess.run( # noqa: S603 |
| 169 | ["git", "init", "--quiet"], cwd=tmp_path, check=True |
| 170 | ) |
| 171 | # Silence the "Please tell me who you are" commit warning on fresh |
| 172 | # CI runners without a global git identity. |
| 173 | for key, val in ( |
| 174 | ("user.email", "sway-ci@example.com"), |
| 175 | ("user.name", "sway ci"), |
| 176 | ("commit.gpgsign", "false"), |
| 177 | ): |
| 178 | subprocess.run( # noqa: S603 |
| 179 | ["git", "config", key, val], cwd=tmp_path, check=True |
| 180 | ) |
| 181 | # Default pass case — the caller rewrites the spec for fail cases. |
| 182 | _write_spec( |
| 183 | tmp_path / "sway.yaml", |
| 184 | base_dir=tiny_model_dir, |
| 185 | adapter_dir=hook_adapter, |
| 186 | assert_mean_gte=0.0, |
| 187 | ) |
| 188 | _write_config(tmp_path / ".pre-commit-config.yaml", "sway.yaml") |
| 189 | # Stage everything so ``pre-commit run --all-files`` has content. |
| 190 | subprocess.run( # noqa: S603 |
| 191 | ["git", "add", "-A"], cwd=tmp_path, check=True |
| 192 | ) |
| 193 | yield tmp_path |
| 194 | # Clean up pre-commit's cache dir too so back-to-back test |
| 195 | # invocations don't trip over stale ``language: system`` state. |
| 196 | cache = tmp_path / ".cache" |
| 197 | if cache.exists(): |
| 198 | shutil.rmtree(cache, ignore_errors=True) |
| 199 | |
| 200 | |
| 201 | def _run_hook(cwd: Path) -> subprocess.CompletedProcess[str]: |
| 202 | """Invoke ``pre-commit run sway-gate --all-files`` from ``cwd``.""" |
| 203 | env = os.environ.copy() |
| 204 | # Point pre-commit's cache at the test tmp dir so we don't |
| 205 | # pollute the user's ``~/.cache/pre-commit``. |
| 206 | env["PRE_COMMIT_HOME"] = str(cwd / ".pre-commit-cache") |
| 207 | return subprocess.run( # noqa: S603 |
| 208 | [sys.executable, "-m", "pre_commit", "run", "sway-gate", "--all-files"], |
| 209 | cwd=cwd, |
| 210 | env=env, |
| 211 | capture_output=True, |
| 212 | text=True, |
| 213 | check=False, |
| 214 | ) |
| 215 | |
| 216 | |
| 217 | def test_hook_passes_when_gate_passes(precommit_repo: Path) -> None: |
| 218 | """Pass case: ``assert_mean_gte=0.0`` — probe passes, hook exits 0.""" |
| 219 | result = _run_hook(precommit_repo) |
| 220 | # pre-commit reports ``Passed`` / ``Failed`` for each hook. |
| 221 | combined = result.stdout + result.stderr |
| 222 | assert result.returncode == 0, ( |
| 223 | f"hook failed unexpectedly (rc={result.returncode}):\n" |
| 224 | f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}" |
| 225 | ) |
| 226 | assert "sway gate" in combined.lower() or "passed" in combined.lower() |
| 227 | |
| 228 | |
| 229 | def test_hook_fails_when_gate_fails( |
| 230 | precommit_repo: Path, tiny_model_dir: Path, hook_adapter: Path |
| 231 | ) -> None: |
| 232 | """Fail case: impossible ``assert_mean_gte`` — probe FAILs, hook |
| 233 | exits non-zero with ``gate FAILED`` in the output.""" |
| 234 | # Overwrite the spec with an impossible threshold. |
| 235 | _write_spec( |
| 236 | precommit_repo / "sway.yaml", |
| 237 | base_dir=tiny_model_dir, |
| 238 | adapter_dir=hook_adapter, |
| 239 | assert_mean_gte=100.0, |
| 240 | ) |
| 241 | subprocess.run( # noqa: S603 |
| 242 | ["git", "add", "-A"], cwd=precommit_repo, check=True |
| 243 | ) |
| 244 | result = _run_hook(precommit_repo) |
| 245 | combined = result.stdout + result.stderr |
| 246 | assert result.returncode != 0, ( |
| 247 | f"hook should have failed on an impossible gate (got rc=0):\n" |
| 248 | f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}" |
| 249 | ) |
| 250 | assert "gate FAILED" in combined, ( |
| 251 | f"expected the 'gate FAILED' banner in hook output:\n" |
| 252 | f"stdout:\n{result.stdout}\nstderr:\n{result.stderr}" |
| 253 | ) |