Python · 3998 bytes Raw Blame History
1 """On-disk cache for null-adapter calibration stats.
2
3 Null calibration runs a miniature version of every downstream numeric
4 probe across N seeds before the suite proper. For a 10-probe suite at
5 ``runs=3`` that's ~120 forward passes; on an HF backend against a real
6 model this can dominate wall time. Results are deterministic in the
7 calibration inputs — so we cache them at
8 ``~/.dlm-sway/null-stats/<key>.json`` keyed by the tuple that actually
9 influences the output.
10
11 Scope here is intentionally minimal. Sprint 07 adds a shared
12 forward-pass cache that cuts into a lower level; this module only
13 amortizes the per-suite calibration pass.
14 """
15
16 from __future__ import annotations
17
18 import hashlib
19 import json
20 import os
21 from pathlib import Path
22 from typing import Any
23
24 #: Environment knob — set to ``"1"`` to bypass load + save (development
25 #: / CI tests that want to prove calibration actually runs).
26 _ENV_DISABLE = "SWAY_DISABLE_NULL_CACHE"
27
28 #: S26 — env knob pointing at a custom null-cache directory. Used by
29 #: ``sway unpack`` to redirect lookups at a packed cache shipped in
30 #: a ``.swaypack.tar.gz`` instead of the user's home cache. Path is
31 #: used verbatim (no ``dlm-sway/null-stats`` suffix) so packs can
32 #: ship a flat directory of ``<key>.json`` files.
33 _ENV_OVERRIDE = "SWAY_NULL_CACHE_DIR"
34
35
36 def _cache_root() -> Path:
37 """Root directory for cached null stats.
38
39 Resolution order:
40
41 1. ``$SWAY_NULL_CACHE_DIR`` if set — used verbatim. ``sway unpack``
42 points this at the unpacked cache directory inside a swaypack
43 so subsequent ``sway run`` calls hit the packed stats instead
44 of the user's home cache (S26).
45 2. ``$XDG_CACHE_HOME/dlm-sway/null-stats`` if XDG is set.
46 3. ``~/.dlm-sway/null-stats`` otherwise.
47 """
48 override = os.environ.get(_ENV_OVERRIDE)
49 if override:
50 return Path(override).expanduser()
51 xdg = os.environ.get("XDG_CACHE_HOME")
52 if xdg:
53 return Path(xdg).expanduser() / "dlm-sway" / "null-stats"
54 return Path.home() / ".dlm-sway" / "null-stats"
55
56
57 def compute_key(*, backend_identity: str | None, params: dict[str, Any]) -> str | None:
58 """Hash backend identity + calibration params into a stable filename.
59
60 Returns ``None`` when ``backend_identity`` is ``None`` — backends that
61 can't uniquely identify themselves (e.g., the dummy backend used in
62 tests) skip caching entirely.
63 """
64 if not backend_identity:
65 return None
66 payload = {
67 "backend": backend_identity,
68 "params": params,
69 }
70 blob = json.dumps(payload, sort_keys=True, default=str).encode("utf-8")
71 return hashlib.sha256(blob).hexdigest()[:32]
72
73
74 def load(key: str | None) -> dict[str, Any] | None:
75 """Return the cached null-stats dict for ``key``, or ``None`` on miss.
76
77 Malformed / unreadable cache files are treated as a miss — we'd
78 rather recompute than crash the suite. A stale / schema-mismatched
79 cache can be wiped with ``rm -rf ~/.dlm-sway/null-stats``.
80 """
81 if key is None or os.environ.get(_ENV_DISABLE) == "1":
82 return None
83 path = _cache_root() / f"{key}.json"
84 if not path.exists():
85 return None
86 try:
87 with path.open("r", encoding="utf-8") as f:
88 data = json.load(f)
89 except (OSError, json.JSONDecodeError):
90 return None
91 if not isinstance(data, dict):
92 return None
93 return data
94
95
96 def save(key: str | None, stats: dict[str, Any]) -> None:
97 """Persist ``stats`` under ``key``. Silently no-ops on I/O errors —
98 the cache is a speed-up, not a correctness contract."""
99 if key is None or os.environ.get(_ENV_DISABLE) == "1":
100 return
101 root = _cache_root()
102 try:
103 root.mkdir(parents=True, exist_ok=True)
104 path = root / f"{key}.json"
105 tmp = path.with_suffix(".json.tmp")
106 with tmp.open("w", encoding="utf-8") as f:
107 json.dump(stats, f, indent=2, sort_keys=True)
108 tmp.replace(path)
109 except OSError:
110 return