| 1 |
#!/usr/bin/env python3 |
| 2 |
"""Regenerate Sprint 15's determinism goldens (audit F19). |
| 3 |
|
| 4 |
Goldens are keyed by the runtime tuple |
| 5 |
`(torch, transformers, peft, trl, bitsandbytes, platform_tag)`. Changing |
| 6 |
any pinned version invalidates the recorded adapter SHA; the operator |
| 7 |
must re-run this script after deliberate review to refresh the golden. |
| 8 |
|
| 9 |
Flow: |
| 10 |
|
| 11 |
1. Sample the current runtime versions via `capture_runtime_versions()`. |
| 12 |
2. Train SmolLM2-135M from scratch twice against the same seed/doc. |
| 13 |
3. Hash each run's `adapter_model.safetensors`; assert they match. |
| 14 |
4. Write `tests/golden/determinism/<tuple>.json` with: |
| 15 |
- `adapter_sha256` — the matching hash |
| 16 |
- `pinned_versions` — the runtime tuple |
| 17 |
- `regenerated_at` — UTC timestamp |
| 18 |
- `dlm_sha256` — hash of the synthetic training doc (reproducible |
| 19 |
across runs when the factory's ULID seed is pinned) |
| 20 |
5. Upsert `.determinism/lock.json` with the checked-in tuple metadata. |
| 21 |
6. Compare against the prior golden (if one existed) and print a diff. |
| 22 |
7. Exit non-zero unless `--approve` is passed. The default is |
| 23 |
dry-run-and-report so a stray script invocation doesn't silently |
| 24 |
overwrite a baseline. |
| 25 |
|
| 26 |
Usage: |
| 27 |
uv run python scripts/regen-determinism-golden.py # dry run |
| 28 |
uv run python scripts/regen-determinism-golden.py --approve # write |
| 29 |
|
| 30 |
The matching repo-level `.determinism/lock.json` records which tuples |
| 31 |
have a checked-in golden. It is distinct from the per-store |
| 32 |
`dlm.lock`, which captures one training run's determinism contract. |
| 33 |
""" |
| 34 |
|
| 35 |
from __future__ import annotations |
| 36 |
|
| 37 |
import argparse |
| 38 |
import hashlib |
| 39 |
import json |
| 40 |
import platform |
| 41 |
import sys |
| 42 |
from datetime import UTC, datetime |
| 43 |
from pathlib import Path |
| 44 |
from typing import Any |
| 45 |
|
| 46 |
_REPO_ROOT = Path(__file__).resolve().parents[1] |
| 47 |
_GOLDEN_DIR = _REPO_ROOT / "tests" / "golden" / "determinism" |
| 48 |
_SYNTHETIC_DLM_ID = "01HRDGOLDEN" + "0" * 15 # 26 chars — stable across runs |
| 49 |
_SEED = 42 |
| 50 |
_MAX_STEPS = 20 |
| 51 |
|
| 52 |
|
| 53 |
def _tuple_filename(versions: dict[str, str]) -> str: |
| 54 |
"""Produce a filesystem-safe key from the version tuple. |
| 55 |
|
| 56 |
Ordering matters for determinism — keys are sorted so a reorder |
| 57 |
in `pinned_versions` doesn't produce a different filename. |
| 58 |
""" |
| 59 |
parts = [f"{k}={versions[k]}" for k in sorted(versions)] |
| 60 |
parts.append(f"platform={platform.system().lower()}-{platform.machine()}") |
| 61 |
raw = "|".join(parts) |
| 62 |
# Keep the filename short + avoid shell-unfriendly characters. |
| 63 |
digest = hashlib.sha256(raw.encode()).hexdigest()[:16] |
| 64 |
return f"tuple-{digest}.json" |
| 65 |
|
| 66 |
|
| 67 |
def _hash_adapter(adapter_dir: Path) -> str: |
| 68 |
target = adapter_dir / "adapter_model.safetensors" |
| 69 |
if not target.is_file(): |
| 70 |
raise FileNotFoundError(f"adapter weights missing: {target}") |
| 71 |
digest = hashlib.sha256() |
| 72 |
with target.open("rb") as fh: |
| 73 |
for chunk in iter(lambda: fh.read(65536), b""): |
| 74 |
digest.update(chunk) |
| 75 |
return digest.hexdigest() |
| 76 |
|
| 77 |
|
| 78 |
def _run_training(home: Path) -> Path: |
| 79 |
"""Run one fresh training cycle under `home`. Return the adapter dir.""" |
| 80 |
import os |
| 81 |
|
| 82 |
os.environ["DLM_HOME"] = str(home) |
| 83 |
|
| 84 |
from tests.fixtures.dlm_factory import make_dlm |
| 85 |
|
| 86 |
from dlm.base_models import resolve as resolve_base_model |
| 87 |
from dlm.doc.parser import parse_file |
| 88 |
from dlm.hardware import doctor |
| 89 |
from dlm.store.paths import for_dlm |
| 90 |
from dlm.train import run as run_training |
| 91 |
|
| 92 |
doc_path = home / "determinism.dlm" |
| 93 |
doc_path.write_text( |
| 94 |
make_dlm(base_model="smollm2-135m", dlm_id=_SYNTHETIC_DLM_ID), |
| 95 |
encoding="utf-8", |
| 96 |
) |
| 97 |
|
| 98 |
parsed = parse_file(doc_path) |
| 99 |
spec = resolve_base_model(parsed.frontmatter.base_model) |
| 100 |
plan = doctor().plan |
| 101 |
if plan is None: |
| 102 |
raise RuntimeError("doctor() returned no viable plan on this host") |
| 103 |
|
| 104 |
store = for_dlm(parsed.frontmatter.dlm_id) |
| 105 |
store.ensure_layout() |
| 106 |
|
| 107 |
run_training( |
| 108 |
store, |
| 109 |
parsed, |
| 110 |
spec, |
| 111 |
plan, |
| 112 |
mode="fresh", |
| 113 |
seed=_SEED, |
| 114 |
max_steps=_MAX_STEPS, |
| 115 |
lock_mode="update", # we're deliberately (re)baselining |
| 116 |
) |
| 117 |
adapter = store.resolve_current_adapter() |
| 118 |
if adapter is None: |
| 119 |
raise RuntimeError("training finished but no current adapter is set") |
| 120 |
return adapter |
| 121 |
|
| 122 |
|
| 123 |
def _current_versions() -> dict[str, str]: |
| 124 |
from dlm.train.state_sidecar import capture_runtime_versions |
| 125 |
|
| 126 |
versions = capture_runtime_versions() |
| 127 |
# Filter to str-valued keys so the output is stable across runs that |
| 128 |
# happen to lack an optional dep. |
| 129 |
return {k: v for k, v in versions.items() if isinstance(v, str)} |
| 130 |
|
| 131 |
|
| 132 |
def main() -> int: |
| 133 |
parser = argparse.ArgumentParser(description=__doc__) |
| 134 |
parser.add_argument( |
| 135 |
"--approve", |
| 136 |
action="store_true", |
| 137 |
help="Write the refreshed golden instead of dry-running.", |
| 138 |
) |
| 139 |
args = parser.parse_args() |
| 140 |
|
| 141 |
import tempfile |
| 142 |
|
| 143 |
from dlm.lock.golden_index import GOLDEN_INDEX_RELATIVE_PATH, upsert_golden_index |
| 144 |
|
| 145 |
versions = _current_versions() |
| 146 |
filename = _tuple_filename(versions) |
| 147 |
target = _GOLDEN_DIR / filename |
| 148 |
golden_relpath = target.relative_to(_REPO_ROOT).as_posix() |
| 149 |
prior = None |
| 150 |
if target.is_file(): |
| 151 |
try: |
| 152 |
prior = json.loads(target.read_text(encoding="utf-8")) |
| 153 |
except json.JSONDecodeError: |
| 154 |
prior = None |
| 155 |
|
| 156 |
with tempfile.TemporaryDirectory() as a, tempfile.TemporaryDirectory() as b: |
| 157 |
adapter_a = _run_training(Path(a)) |
| 158 |
sha_a = _hash_adapter(adapter_a) |
| 159 |
adapter_b = _run_training(Path(b)) |
| 160 |
sha_b = _hash_adapter(adapter_b) |
| 161 |
|
| 162 |
if sha_a != sha_b: |
| 163 |
print( |
| 164 |
f"[ERROR] determinism broken: run-A={sha_a} run-B={sha_b}", |
| 165 |
file=sys.stderr, |
| 166 |
) |
| 167 |
return 2 |
| 168 |
|
| 169 |
payload: dict[str, Any] = { |
| 170 |
"adapter_sha256": sha_a, |
| 171 |
"pinned_versions": versions, |
| 172 |
"platform": f"{platform.system().lower()}-{platform.machine()}", |
| 173 |
"regenerated_at": datetime.now(UTC).replace(tzinfo=None, microsecond=0).isoformat(), |
| 174 |
"dlm_id": _SYNTHETIC_DLM_ID, |
| 175 |
"seed": _SEED, |
| 176 |
"max_steps": _MAX_STEPS, |
| 177 |
} |
| 178 |
|
| 179 |
print(f"[ok] tuple determinism confirmed: adapter_sha={sha_a[:16]}…") |
| 180 |
if prior is not None: |
| 181 |
prior_sha = prior.get("adapter_sha256") |
| 182 |
if prior_sha != sha_a: |
| 183 |
print( |
| 184 |
f"[diff] prior={prior_sha} current={sha_a} delta=change", |
| 185 |
file=sys.stderr, |
| 186 |
) |
| 187 |
else: |
| 188 |
print("[diff] no change from prior golden") |
| 189 |
|
| 190 |
if not args.approve: |
| 191 |
print( |
| 192 |
f"[dry-run] pass --approve to write {target.relative_to(_REPO_ROOT)}", |
| 193 |
) |
| 194 |
return 1 if prior is None or prior.get("adapter_sha256") != sha_a else 0 |
| 195 |
|
| 196 |
_GOLDEN_DIR.mkdir(parents=True, exist_ok=True) |
| 197 |
target.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8") |
| 198 |
upsert_golden_index( |
| 199 |
_REPO_ROOT, |
| 200 |
golden_relpath=golden_relpath, |
| 201 |
adapter_sha256=sha_a, |
| 202 |
platform=payload["platform"], |
| 203 |
pinned_versions=versions, |
| 204 |
) |
| 205 |
print(f"[wrote] {target.relative_to(_REPO_ROOT)}") |
| 206 |
print(f"[wrote] {GOLDEN_INDEX_RELATIVE_PATH}") |
| 207 |
return 0 |
| 208 |
|
| 209 |
|
| 210 |
if __name__ == "__main__": |
| 211 |
sys.exit(main()) |