Python · 1799 bytes Raw Blame History
1 #!/usr/bin/env python
2 """Re-resolve every curated base-model entry against its live sources.
3
4 Two modes:
5
6 - Default: print a human-readable diff for each entry whose pinned SHA
7 no longer matches its live fetch source (or whose license/gating /
8 provenance changed).
9 Exit 0.
10 - `--check`: exit 1 if *any* entry has drifted. Used by the weekly
11 CI job to open an issue when maintainer action is needed.
12
13 Does **not** write back to `registry.py` automatically — drifted SHAs
14 are a signal for a human to review the upstream change (new license
15 terms, tokenizer surgery, provenance changes, etc.). The script prints
16 the ready-to-paste field values so the manual update is trivial.
17
18 Usage:
19 uv run python scripts/refresh-registry.py # print diff
20 uv run python scripts/refresh-registry.py --check # CI gate
21 """
22
23 from __future__ import annotations
24
25 import argparse
26 import sys
27
28 from dlm.base_models import BASE_MODELS
29 from dlm.base_models.registry_refresh import check_registry
30
31
32 def main() -> int:
33 parser = argparse.ArgumentParser(description=__doc__ or "")
34 parser.add_argument(
35 "--check",
36 action="store_true",
37 help="Exit non-zero if any entry has drifted (for CI).",
38 )
39 args = parser.parse_args()
40
41 drifts = check_registry()
42
43 if not drifts:
44 print(f"All {len(BASE_MODELS)} registry entries match their live sources.")
45 return 0
46
47 print(f"{len(drifts)} of {len(BASE_MODELS)} entries have drifted:")
48 for drift in drifts:
49 print(drift.render())
50 print()
51 print(
52 "Review each upstream change (commit log / license / gating / provenance) and "
53 "update `src/dlm/base_models/registry.py` by hand."
54 )
55
56 return 1 if args.check else 0
57
58
59 if __name__ == "__main__":
60 sys.exit(main())