Python · 7155 bytes Raw Blame History
1 #!/usr/bin/env python3
2 """Regenerate Sprint 15's determinism goldens (audit F19).
3
4 Goldens are keyed by the runtime tuple
5 `(torch, transformers, peft, trl, bitsandbytes, platform_tag)`. Changing
6 any pinned version invalidates the recorded adapter SHA; the operator
7 must re-run this script after deliberate review to refresh the golden.
8
9 Flow:
10
11 1. Sample the current runtime versions via `capture_runtime_versions()`.
12 2. Train SmolLM2-135M from scratch twice against the same seed/doc.
13 3. Hash each run's `adapter_model.safetensors`; assert they match.
14 4. Write `tests/golden/determinism/<tuple>.json` with:
15 - `adapter_sha256` — the matching hash
16 - `pinned_versions` — the runtime tuple
17 - `regenerated_at` — UTC timestamp
18 - `dlm_sha256` — hash of the synthetic training doc (reproducible
19 across runs when the factory's ULID seed is pinned)
20 5. Upsert `.determinism/lock.json` with the checked-in tuple metadata.
21 6. Compare against the prior golden (if one existed) and print a diff.
22 7. Exit non-zero unless `--approve` is passed. The default is
23 dry-run-and-report so a stray script invocation doesn't silently
24 overwrite a baseline.
25
26 Usage:
27 uv run python scripts/regen-determinism-golden.py # dry run
28 uv run python scripts/regen-determinism-golden.py --approve # write
29
30 The matching repo-level `.determinism/lock.json` records which tuples
31 have a checked-in golden. It is distinct from the per-store
32 `dlm.lock`, which captures one training run's determinism contract.
33 """
34
35 from __future__ import annotations
36
37 import argparse
38 import hashlib
39 import json
40 import platform
41 import sys
42 from datetime import UTC, datetime
43 from pathlib import Path
44 from typing import Any
45
46 _REPO_ROOT = Path(__file__).resolve().parents[1]
47 _GOLDEN_DIR = _REPO_ROOT / "tests" / "golden" / "determinism"
48 _SYNTHETIC_DLM_ID = "01HRDGOLDEN" + "0" * 15 # 26 chars — stable across runs
49 _SEED = 42
50 _MAX_STEPS = 20
51
52
53 def _tuple_filename(versions: dict[str, str]) -> str:
54 """Produce a filesystem-safe key from the version tuple.
55
56 Ordering matters for determinism — keys are sorted so a reorder
57 in `pinned_versions` doesn't produce a different filename.
58 """
59 parts = [f"{k}={versions[k]}" for k in sorted(versions)]
60 parts.append(f"platform={platform.system().lower()}-{platform.machine()}")
61 raw = "|".join(parts)
62 # Keep the filename short + avoid shell-unfriendly characters.
63 digest = hashlib.sha256(raw.encode()).hexdigest()[:16]
64 return f"tuple-{digest}.json"
65
66
67 def _hash_adapter(adapter_dir: Path) -> str:
68 target = adapter_dir / "adapter_model.safetensors"
69 if not target.is_file():
70 raise FileNotFoundError(f"adapter weights missing: {target}")
71 digest = hashlib.sha256()
72 with target.open("rb") as fh:
73 for chunk in iter(lambda: fh.read(65536), b""):
74 digest.update(chunk)
75 return digest.hexdigest()
76
77
78 def _run_training(home: Path) -> Path:
79 """Run one fresh training cycle under `home`. Return the adapter dir."""
80 import os
81
82 os.environ["DLM_HOME"] = str(home)
83
84 from tests.fixtures.dlm_factory import make_dlm
85
86 from dlm.base_models import resolve as resolve_base_model
87 from dlm.doc.parser import parse_file
88 from dlm.hardware import doctor
89 from dlm.store.paths import for_dlm
90 from dlm.train import run as run_training
91
92 doc_path = home / "determinism.dlm"
93 doc_path.write_text(
94 make_dlm(base_model="smollm2-135m", dlm_id=_SYNTHETIC_DLM_ID),
95 encoding="utf-8",
96 )
97
98 parsed = parse_file(doc_path)
99 spec = resolve_base_model(parsed.frontmatter.base_model)
100 plan = doctor().plan
101 if plan is None:
102 raise RuntimeError("doctor() returned no viable plan on this host")
103
104 store = for_dlm(parsed.frontmatter.dlm_id)
105 store.ensure_layout()
106
107 run_training(
108 store,
109 parsed,
110 spec,
111 plan,
112 mode="fresh",
113 seed=_SEED,
114 max_steps=_MAX_STEPS,
115 lock_mode="update", # we're deliberately (re)baselining
116 )
117 adapter = store.resolve_current_adapter()
118 if adapter is None:
119 raise RuntimeError("training finished but no current adapter is set")
120 return adapter
121
122
123 def _current_versions() -> dict[str, str]:
124 from dlm.train.state_sidecar import capture_runtime_versions
125
126 versions = capture_runtime_versions()
127 # Filter to str-valued keys so the output is stable across runs that
128 # happen to lack an optional dep.
129 return {k: v for k, v in versions.items() if isinstance(v, str)}
130
131
132 def main() -> int:
133 parser = argparse.ArgumentParser(description=__doc__)
134 parser.add_argument(
135 "--approve",
136 action="store_true",
137 help="Write the refreshed golden instead of dry-running.",
138 )
139 args = parser.parse_args()
140
141 import tempfile
142
143 from dlm.lock.golden_index import GOLDEN_INDEX_RELATIVE_PATH, upsert_golden_index
144
145 versions = _current_versions()
146 filename = _tuple_filename(versions)
147 target = _GOLDEN_DIR / filename
148 golden_relpath = target.relative_to(_REPO_ROOT).as_posix()
149 prior = None
150 if target.is_file():
151 try:
152 prior = json.loads(target.read_text(encoding="utf-8"))
153 except json.JSONDecodeError:
154 prior = None
155
156 with tempfile.TemporaryDirectory() as a, tempfile.TemporaryDirectory() as b:
157 adapter_a = _run_training(Path(a))
158 sha_a = _hash_adapter(adapter_a)
159 adapter_b = _run_training(Path(b))
160 sha_b = _hash_adapter(adapter_b)
161
162 if sha_a != sha_b:
163 print(
164 f"[ERROR] determinism broken: run-A={sha_a} run-B={sha_b}",
165 file=sys.stderr,
166 )
167 return 2
168
169 payload: dict[str, Any] = {
170 "adapter_sha256": sha_a,
171 "pinned_versions": versions,
172 "platform": f"{platform.system().lower()}-{platform.machine()}",
173 "regenerated_at": datetime.now(UTC).replace(tzinfo=None, microsecond=0).isoformat(),
174 "dlm_id": _SYNTHETIC_DLM_ID,
175 "seed": _SEED,
176 "max_steps": _MAX_STEPS,
177 }
178
179 print(f"[ok] tuple determinism confirmed: adapter_sha={sha_a[:16]}…")
180 if prior is not None:
181 prior_sha = prior.get("adapter_sha256")
182 if prior_sha != sha_a:
183 print(
184 f"[diff] prior={prior_sha} current={sha_a} delta=change",
185 file=sys.stderr,
186 )
187 else:
188 print("[diff] no change from prior golden")
189
190 if not args.approve:
191 print(
192 f"[dry-run] pass --approve to write {target.relative_to(_REPO_ROOT)}",
193 )
194 return 1 if prior is None or prior.get("adapter_sha256") != sha_a else 0
195
196 _GOLDEN_DIR.mkdir(parents=True, exist_ok=True)
197 target.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
198 upsert_golden_index(
199 _REPO_ROOT,
200 golden_relpath=golden_relpath,
201 adapter_sha256=sha_a,
202 platform=payload["platform"],
203 pinned_versions=versions,
204 )
205 print(f"[wrote] {target.relative_to(_REPO_ROOT)}")
206 print(f"[wrote] {GOLDEN_INDEX_RELATIVE_PATH}")
207 return 0
208
209
210 if __name__ == "__main__":
211 sys.exit(main())