Python · 6986 bytes Raw Blame History
1 """S25 — gradient_ghost integration tests.
2
3 Two flavors:
4
5 1. **Real-store (skipped on CI):** runs against a known-undertrained
6 adapter at ``~/.dlm/store/01KPPFAB2Z6DWCWY0QV702TSTX/`` if
7 present. This is the prove-the-value test the sprint DoD requires
8 on a real dlm-trained adapter. Skipped cleanly when the store is
9 absent so CI without local dlm install still passes.
10 2. **Synthetic-converged (runs everywhere):** writes a fully-formed
11 converged training_state.pt + matching safetensors fixture and
12 asserts PASS. Pairs with the real-store FAIL case to give end-
13 to-end "FAIL on undertrained, PASS on converged" coverage in CI.
14
15 Marked ``slow + online`` because building a synthetic converged
16 training_state.pt requires torch-pickle round-tripping a real-shape
17 optimizer state — heavier than a unit test should be.
18 """
19
20 from __future__ import annotations
21
22 from pathlib import Path
23
24 import numpy as np
25 import pytest
26
27 torch = pytest.importorskip("torch", reason="needs the [hf] extra (torch)")
28 safetensors_numpy = pytest.importorskip(
29 "safetensors.numpy", reason="needs the [hf] extra (safetensors)"
30 )
31
32 from dlm_sway.core.result import Verdict # noqa: E402
33 from dlm_sway.probes.base import RunContext, build_probe # noqa: E402
34
35 pytestmark = [pytest.mark.slow, pytest.mark.online]
36
37
38 _REAL_STORE_PATH = (
39 Path.home() / ".dlm" / "store" / "01KPPFAB2Z6DWCWY0QV702TSTX" / "adapter" / "versions" / "v0001"
40 )
41
42
43 def test_real_undertrained_dlm_store_fails(tmp_path: Path) -> None:
44 """If a known dlm-trained undertrained adapter is on disk, the
45 probe must FAIL on it.
46
47 Skipped on machines without the local fixture (CI). The store
48 was the ground-truth artifact that drove the sprint design — it
49 was a real ``--max-steps 2`` smoke-test run.
50 """
51 if not (_REAL_STORE_PATH / "training_state.pt").exists():
52 pytest.skip(
53 f"no dlm store fixture at {_REAL_STORE_PATH} — skipping the "
54 "real-adapter prove-the-value test (synthetic test below "
55 "still runs)"
56 )
57
58 probe, spec = build_probe(
59 {
60 "name": "gg_real",
61 "kind": "gradient_ghost",
62 "adapter_path": str(_REAL_STORE_PATH),
63 }
64 )
65 result = probe.run(spec, RunContext())
66
67 assert result.verdict == Verdict.FAIL, (
68 f"expected FAIL on a known-undertrained dlm store, got {result.verdict}: {result.message}"
69 )
70 # The real fixture is global_step=2 — a clean primary-signal hit.
71 assert result.evidence["global_step"] < 50
72 assert result.evidence["primary_signal"] in (
73 "global_step_below_threshold",
74 "all_optimizer_state_nan",
75 )
76
77
78 def _build_converged_fixture(adapter_dir: Path) -> int:
79 """Write a synthetic 'converged' adapter pair.
80
81 - safetensors with realistic per-layer LoRA tensor names
82 - training_state.pt with global_step=500 (well above threshold)
83 and a flat per-param exp_avg_sq distribution (no layer
84 crosses the per-layer ratio).
85 """
86 adapter_dir.mkdir(parents=True, exist_ok=True)
87 num_layers = 4
88 target_modules = ("q_proj", "v_proj")
89 rank = 8
90 in_features = 64
91
92 weights: dict[str, np.ndarray] = {}
93 for layer_idx in range(num_layers):
94 for mod in target_modules:
95 base = f"base_model.model.model.layers.{layer_idx}.self_attn.{mod}"
96 weights[f"{base}.lora_A.weight"] = np.zeros((rank, in_features), dtype=np.float32)
97 weights[f"{base}.lora_B.weight"] = np.zeros((in_features, rank), dtype=np.float32)
98 safetensors_numpy.save_file(weights, str(adapter_dir / "adapter_model.safetensors"))
99 num_keys = len(weights)
100
101 # Flat distribution: every param's exp_avg_sq is 0.1 (a small but
102 # finite value typical of a converged Adam state).
103 state_dict: dict[int, dict[str, object]] = {}
104 for pid in range(num_keys):
105 state_dict[pid] = {
106 "step": torch.tensor(500.0),
107 "exp_avg": torch.zeros((4,), dtype=torch.float32),
108 "exp_avg_sq": torch.full((4,), 0.1, dtype=torch.float32),
109 }
110
111 payload = {
112 "optimizer_state_dict": {
113 "state": state_dict,
114 "param_groups": [{"lr": 1e-4, "params": list(range(num_keys))}],
115 },
116 "scheduler_state_dict": {},
117 "scaler_state_dict": None,
118 "torch_rng_state": torch.zeros(8, dtype=torch.uint8),
119 "cuda_rng_state": None,
120 "numpy_rng_state": None,
121 "python_random_state": None,
122 "global_step": 500,
123 "epoch": 5.0,
124 "best_val_loss": 0.42,
125 "dlm_manifest_hash": None,
126 "base_model_revision": "synthetic-test-fixture",
127 "pinned_versions": {"torch": "2.11.0"},
128 "use_qlora": False,
129 }
130 torch.save(payload, str(adapter_dir / "training_state.pt"))
131 return num_keys
132
133
134 def test_synthetic_converged_adapter_passes(tmp_path: Path) -> None:
135 """A hand-rolled converged training_state (global_step=500, flat
136 exp_avg_sq distribution) must PASS.
137
138 Together with the real-store FAIL test above, covers the
139 sprint's prove-the-value: 'undertrained → FAIL, converged → PASS'.
140 """
141 adapter_dir = tmp_path / "synthetic-converged"
142 _build_converged_fixture(adapter_dir)
143
144 probe, spec = build_probe(
145 {
146 "name": "gg_synth",
147 "kind": "gradient_ghost",
148 "adapter_path": str(adapter_dir),
149 }
150 )
151 result = probe.run(spec, RunContext())
152
153 assert result.verdict == Verdict.PASS, (
154 f"expected PASS on a synthetic converged adapter, got {result.verdict}: {result.message}"
155 )
156 assert result.evidence["global_step"] == 500
157 assert result.evidence["frac_layers_undertrained"] == 0.0
158 assert result.evidence["num_layers"] == 4
159
160
161 def test_runner_skips_backend_for_pure_pre_run_suite(tmp_path: Path) -> None:
162 """End-to-end: a suite containing only gradient_ghost runs
163 successfully with backend=None. Confirms the S25 P5 runner
164 contract holds end-to-end (not just at the probe level)."""
165 from dlm_sway.core.model import ModelSpec
166 from dlm_sway.suite.runner import run as run_suite
167 from dlm_sway.suite.spec import SuiteDefaults, SuiteModels, SwaySpec
168
169 adapter_dir = tmp_path / "synthetic-converged"
170 _build_converged_fixture(adapter_dir)
171
172 spec = SwaySpec(
173 version=1,
174 models=SuiteModels(
175 base=ModelSpec(base="dummy", kind="dummy"),
176 ft=ModelSpec(base="dummy", kind="dummy", adapter=adapter_dir),
177 ),
178 defaults=SuiteDefaults(seed=0),
179 suite=[
180 {
181 "name": "gg",
182 "kind": "gradient_ghost",
183 "adapter_path": str(adapter_dir),
184 },
185 ],
186 )
187 result = run_suite(spec, backend=None, spec_path="<integration>")
188 assert len(result.probes) == 1
189 assert result.probes[0].verdict == Verdict.PASS
190 # No backend, no backend stats.
191 assert result.backend_stats == {}