| 1 | """Unit tests for :class:`dlm_sway.serve.cache.BackendCache`. |
| 2 | |
| 3 | The cache is the daemon's heart: an LRU of warm differential backends |
| 4 | keyed by the identity tuple over ``ModelSpec``. These tests exercise: |
| 5 | |
| 6 | * hit / miss / LRU eviction order |
| 7 | * concurrent ``get_or_load`` for the same key only loads once |
| 8 | * ``close()`` runs on eviction (and tolerates close failures) |
| 9 | * ``cache_key_for`` is stable across spec field permutations that |
| 10 | don't change identity |
| 11 | """ |
| 12 | |
| 13 | from __future__ import annotations |
| 14 | |
| 15 | import threading |
| 16 | import time |
| 17 | from pathlib import Path |
| 18 | from typing import Any |
| 19 | |
| 20 | import pytest |
| 21 | |
| 22 | from dlm_sway.core.model import ModelSpec |
| 23 | from dlm_sway.serve.cache import BackendCache, CachedBackend, cache_key_for |
| 24 | |
| 25 | |
| 26 | class _StubBackend: |
| 27 | """Minimal stand-in implementing the loader contract. |
| 28 | |
| 29 | Tracks how many times :meth:`close` was called so eviction tests |
| 30 | can assert the call happened. Doesn't need to satisfy the full |
| 31 | ``DifferentialBackend`` Protocol — the cache only ever calls |
| 32 | ``.close()`` on it, and tests look up ``.tag`` to identify entries. |
| 33 | """ |
| 34 | |
| 35 | def __init__(self, tag: str) -> None: |
| 36 | self.tag = tag |
| 37 | self.close_count = 0 |
| 38 | |
| 39 | def close(self) -> None: |
| 40 | self.close_count += 1 |
| 41 | |
| 42 | |
| 43 | def _spec(base: str, *, adapter: Path | None = None) -> ModelSpec: |
| 44 | return ModelSpec(base=base, kind="dummy", adapter=adapter) |
| 45 | |
| 46 | |
| 47 | def _seed(cache: BackendCache, spec: ModelSpec, tag: str) -> _StubBackend: |
| 48 | """Helper: insert a stub entry under ``spec``'s identity key.""" |
| 49 | backend = _StubBackend(tag) |
| 50 | key = cache_key_for(spec) |
| 51 | entry = CachedBackend( |
| 52 | key=key, |
| 53 | backend=backend, # type: ignore[arg-type] |
| 54 | model_spec=spec, |
| 55 | load_seconds=0.0, |
| 56 | ) |
| 57 | # Use the cache's internal lock + dict directly so we don't trip |
| 58 | # the build path. Accessing _entries is fine in unit tests; the |
| 59 | # production path goes through get_or_load. |
| 60 | with cache._lock: # noqa: SLF001 |
| 61 | cache._entries[key] = entry # noqa: SLF001 |
| 62 | return backend |
| 63 | |
| 64 | |
| 65 | class TestCacheKey: |
| 66 | def test_key_is_stable_across_equivalent_specs(self) -> None: |
| 67 | a = _spec("modelA") |
| 68 | b = _spec("modelA") |
| 69 | assert cache_key_for(a) == cache_key_for(b) |
| 70 | |
| 71 | def test_key_differs_on_base(self) -> None: |
| 72 | assert cache_key_for(_spec("modelA")) != cache_key_for(_spec("modelB")) |
| 73 | |
| 74 | def test_key_differs_on_adapter(self, tmp_path: Path) -> None: |
| 75 | adapter = tmp_path / "ad" |
| 76 | adapter.mkdir() |
| 77 | with_adapter = _spec("modelA", adapter=adapter) |
| 78 | without_adapter = _spec("modelA") |
| 79 | assert cache_key_for(with_adapter) != cache_key_for(without_adapter) |
| 80 | |
| 81 | def test_key_ignores_trust_remote_code(self) -> None: |
| 82 | """Two specs differing only in non-identity fields hash equal.""" |
| 83 | plain = ModelSpec(base="modelA", kind="dummy") |
| 84 | trust = ModelSpec(base="modelA", kind="dummy", trust_remote_code=True) |
| 85 | assert cache_key_for(plain) == cache_key_for(trust) |
| 86 | |
| 87 | |
| 88 | class TestCacheLRU: |
| 89 | def test_max_size_validation(self) -> None: |
| 90 | with pytest.raises(ValueError, match="max_size must be >= 1"): |
| 91 | BackendCache(max_size=0) |
| 92 | |
| 93 | def test_hit_promotes_to_mru(self) -> None: |
| 94 | cache = BackendCache(max_size=2) |
| 95 | spec_a = _spec("A") |
| 96 | spec_b = _spec("B") |
| 97 | backend_a = _seed(cache, spec_a, "A") |
| 98 | _seed(cache, spec_b, "B") |
| 99 | |
| 100 | # Touch A so it becomes MRU. get_or_load goes through the |
| 101 | # hit path and moves the entry to the end. |
| 102 | result = cache.get_or_load(spec_a) |
| 103 | assert result.backend is backend_a |
| 104 | |
| 105 | keys = cache.loaded_keys() |
| 106 | assert keys[-1] == cache_key_for(spec_a) |
| 107 | assert keys[0] == cache_key_for(spec_b) |
| 108 | |
| 109 | def test_eviction_closes_lru_backend(self) -> None: |
| 110 | """Insert 2 with cap=2, then load a 3rd via get_or_load and |
| 111 | confirm the LRU's close() fires.""" |
| 112 | cache = BackendCache(max_size=2) |
| 113 | spec_a = _spec("A") |
| 114 | spec_b = _spec("B") |
| 115 | backend_a = _seed(cache, spec_a, "A") |
| 116 | _seed(cache, spec_b, "B") |
| 117 | |
| 118 | # Stub the loader so we don't need a real backend build. |
| 119 | third_backend = _StubBackend("C") |
| 120 | |
| 121 | def _fake_build(spec: ModelSpec, *, adapter_path: Path | None) -> Any: |
| 122 | del spec, adapter_path |
| 123 | return third_backend |
| 124 | |
| 125 | import dlm_sway.serve.cache as cache_mod |
| 126 | |
| 127 | original = cache_mod._build_entry # noqa: SLF001 |
| 128 | |
| 129 | def _fake_build_entry(spec: ModelSpec, *, key: Any, adapter_path: Any) -> CachedBackend: |
| 130 | return CachedBackend( |
| 131 | key=key, |
| 132 | backend=_fake_build(spec, adapter_path=adapter_path), |
| 133 | model_spec=spec, |
| 134 | load_seconds=0.0, |
| 135 | ) |
| 136 | |
| 137 | cache_mod._build_entry = _fake_build_entry # type: ignore[assignment] |
| 138 | try: |
| 139 | cache.get_or_load(_spec("C")) |
| 140 | finally: |
| 141 | cache_mod._build_entry = original # type: ignore[assignment] |
| 142 | |
| 143 | assert backend_a.close_count == 1, "LRU eviction should call close()" |
| 144 | keys = cache.loaded_keys() |
| 145 | assert cache_key_for(spec_a) not in keys |
| 146 | assert cache_key_for(spec_b) in keys |
| 147 | assert cache_key_for(_spec("C")) in keys |
| 148 | |
| 149 | def test_evict_all_closes_every_backend(self) -> None: |
| 150 | cache = BackendCache(max_size=3) |
| 151 | backends = [_seed(cache, _spec(f"M{i}"), f"M{i}") for i in range(3)] |
| 152 | cache.evict_all() |
| 153 | assert cache.loaded_keys() == [] |
| 154 | for b in backends: |
| 155 | assert b.close_count == 1 |
| 156 | |
| 157 | def test_close_failure_is_swallowed(self, caplog: pytest.LogCaptureFixture) -> None: |
| 158 | """A backend whose close() raises should not crash the daemon.""" |
| 159 | cache = BackendCache(max_size=1) |
| 160 | spec = _spec("boom") |
| 161 | backend = _seed(cache, spec, "boom") |
| 162 | |
| 163 | def _raising_close() -> None: |
| 164 | raise RuntimeError("close failed") |
| 165 | |
| 166 | backend.close = _raising_close # type: ignore[method-assign] |
| 167 | |
| 168 | with caplog.at_level("WARNING"): |
| 169 | cache.evict_all() |
| 170 | |
| 171 | # The error was logged but didn't propagate. |
| 172 | assert any("close raised" in r.message for r in caplog.records) |
| 173 | assert cache.loaded_keys() == [] |
| 174 | |
| 175 | |
| 176 | class TestSingleFlight: |
| 177 | def test_concurrent_get_or_load_loads_once(self) -> None: |
| 178 | """Two threads asking for the same spec must result in exactly |
| 179 | one underlying build, not two.""" |
| 180 | cache = BackendCache(max_size=2) |
| 181 | |
| 182 | build_count = 0 |
| 183 | build_lock = threading.Lock() |
| 184 | backend = _StubBackend("solo") |
| 185 | |
| 186 | import dlm_sway.serve.cache as cache_mod |
| 187 | |
| 188 | original = cache_mod._build_entry # noqa: SLF001 |
| 189 | |
| 190 | def _slow_build_entry(spec: ModelSpec, *, key: Any, adapter_path: Any) -> CachedBackend: |
| 191 | nonlocal build_count |
| 192 | with build_lock: |
| 193 | build_count += 1 |
| 194 | # Sleep to widen the window for both threads to see a miss. |
| 195 | time.sleep(0.05) |
| 196 | return CachedBackend( |
| 197 | key=key, |
| 198 | backend=backend, # type: ignore[arg-type] |
| 199 | model_spec=spec, |
| 200 | load_seconds=0.0, |
| 201 | ) |
| 202 | |
| 203 | cache_mod._build_entry = _slow_build_entry # type: ignore[assignment] |
| 204 | |
| 205 | spec = _spec("solo") |
| 206 | results: list[CachedBackend] = [] |
| 207 | errs: list[BaseException] = [] |
| 208 | |
| 209 | def _worker() -> None: |
| 210 | try: |
| 211 | results.append(cache.get_or_load(spec)) |
| 212 | except BaseException as exc: # noqa: BLE001 |
| 213 | errs.append(exc) |
| 214 | |
| 215 | try: |
| 216 | threads = [threading.Thread(target=_worker) for _ in range(4)] |
| 217 | for t in threads: |
| 218 | t.start() |
| 219 | for t in threads: |
| 220 | t.join(timeout=5.0) |
| 221 | finally: |
| 222 | cache_mod._build_entry = original # type: ignore[assignment] |
| 223 | |
| 224 | assert errs == [] |
| 225 | assert build_count == 1, f"single-flight broken: built {build_count} times" |
| 226 | assert all(r.backend is backend for r in results) |