| 1 | """On-disk cache for null-adapter calibration stats. |
| 2 | |
| 3 | Null calibration runs a miniature version of every downstream numeric |
| 4 | probe across N seeds before the suite proper. For a 10-probe suite at |
| 5 | ``runs=3`` that's ~120 forward passes; on an HF backend against a real |
| 6 | model this can dominate wall time. Results are deterministic in the |
| 7 | calibration inputs — so we cache them at |
| 8 | ``~/.dlm-sway/null-stats/<key>.json`` keyed by the tuple that actually |
| 9 | influences the output. |
| 10 | |
| 11 | Scope here is intentionally minimal. Sprint 07 adds a shared |
| 12 | forward-pass cache that cuts into a lower level; this module only |
| 13 | amortizes the per-suite calibration pass. |
| 14 | """ |
| 15 | |
| 16 | from __future__ import annotations |
| 17 | |
| 18 | import hashlib |
| 19 | import json |
| 20 | import os |
| 21 | from pathlib import Path |
| 22 | from typing import Any |
| 23 | |
| 24 | #: Environment knob — set to ``"1"`` to bypass load + save (development |
| 25 | #: / CI tests that want to prove calibration actually runs). |
| 26 | _ENV_DISABLE = "SWAY_DISABLE_NULL_CACHE" |
| 27 | |
| 28 | #: S26 — env knob pointing at a custom null-cache directory. Used by |
| 29 | #: ``sway unpack`` to redirect lookups at a packed cache shipped in |
| 30 | #: a ``.swaypack.tar.gz`` instead of the user's home cache. Path is |
| 31 | #: used verbatim (no ``dlm-sway/null-stats`` suffix) so packs can |
| 32 | #: ship a flat directory of ``<key>.json`` files. |
| 33 | _ENV_OVERRIDE = "SWAY_NULL_CACHE_DIR" |
| 34 | |
| 35 | |
| 36 | def _cache_root() -> Path: |
| 37 | """Root directory for cached null stats. |
| 38 | |
| 39 | Resolution order: |
| 40 | |
| 41 | 1. ``$SWAY_NULL_CACHE_DIR`` if set — used verbatim. ``sway unpack`` |
| 42 | points this at the unpacked cache directory inside a swaypack |
| 43 | so subsequent ``sway run`` calls hit the packed stats instead |
| 44 | of the user's home cache (S26). |
| 45 | 2. ``$XDG_CACHE_HOME/dlm-sway/null-stats`` if XDG is set. |
| 46 | 3. ``~/.dlm-sway/null-stats`` otherwise. |
| 47 | """ |
| 48 | override = os.environ.get(_ENV_OVERRIDE) |
| 49 | if override: |
| 50 | return Path(override).expanduser() |
| 51 | xdg = os.environ.get("XDG_CACHE_HOME") |
| 52 | if xdg: |
| 53 | return Path(xdg).expanduser() / "dlm-sway" / "null-stats" |
| 54 | return Path.home() / ".dlm-sway" / "null-stats" |
| 55 | |
| 56 | |
| 57 | def compute_key(*, backend_identity: str | None, params: dict[str, Any]) -> str | None: |
| 58 | """Hash backend identity + calibration params into a stable filename. |
| 59 | |
| 60 | Returns ``None`` when ``backend_identity`` is ``None`` — backends that |
| 61 | can't uniquely identify themselves (e.g., the dummy backend used in |
| 62 | tests) skip caching entirely. |
| 63 | """ |
| 64 | if not backend_identity: |
| 65 | return None |
| 66 | payload = { |
| 67 | "backend": backend_identity, |
| 68 | "params": params, |
| 69 | } |
| 70 | blob = json.dumps(payload, sort_keys=True, default=str).encode("utf-8") |
| 71 | return hashlib.sha256(blob).hexdigest()[:32] |
| 72 | |
| 73 | |
| 74 | def load(key: str | None) -> dict[str, Any] | None: |
| 75 | """Return the cached null-stats dict for ``key``, or ``None`` on miss. |
| 76 | |
| 77 | Malformed / unreadable cache files are treated as a miss — we'd |
| 78 | rather recompute than crash the suite. A stale / schema-mismatched |
| 79 | cache can be wiped with ``rm -rf ~/.dlm-sway/null-stats``. |
| 80 | """ |
| 81 | if key is None or os.environ.get(_ENV_DISABLE) == "1": |
| 82 | return None |
| 83 | path = _cache_root() / f"{key}.json" |
| 84 | if not path.exists(): |
| 85 | return None |
| 86 | try: |
| 87 | with path.open("r", encoding="utf-8") as f: |
| 88 | data = json.load(f) |
| 89 | except (OSError, json.JSONDecodeError): |
| 90 | return None |
| 91 | if not isinstance(data, dict): |
| 92 | return None |
| 93 | return data |
| 94 | |
| 95 | |
| 96 | def save(key: str | None, stats: dict[str, Any]) -> None: |
| 97 | """Persist ``stats`` under ``key``. Silently no-ops on I/O errors — |
| 98 | the cache is a speed-up, not a correctness contract.""" |
| 99 | if key is None or os.environ.get(_ENV_DISABLE) == "1": |
| 100 | return |
| 101 | root = _cache_root() |
| 102 | try: |
| 103 | root.mkdir(parents=True, exist_ok=True) |
| 104 | path = root / f"{key}.json" |
| 105 | tmp = path.with_suffix(".json.tmp") |
| 106 | with tmp.open("w", encoding="utf-8") as f: |
| 107 | json.dump(stats, f, indent=2, sort_keys=True) |
| 108 | tmp.replace(path) |
| 109 | except OSError: |
| 110 | return |