| 1 |
"""VL cache — key stability, atomic I/O, processor fingerprint. |
| 2 |
|
| 3 |
Covers: |
| 4 |
|
| 5 |
- `VlCacheKey.as_filename` / `shard` are deterministic. |
| 6 |
- Different target_size / processor_sha produce different filenames. |
| 7 |
- Round-trip: put → get returns byte-identical array. |
| 8 |
- Miss on empty store, miss on corrupt file. |
| 9 |
- `processor_sha256` is stable across repeat calls + pinned on instance. |
| 10 |
- Different preprocessor constants drift the sha. |
| 11 |
""" |
| 12 |
|
| 13 |
from __future__ import annotations |
| 14 |
|
| 15 |
from pathlib import Path |
| 16 |
from types import SimpleNamespace |
| 17 |
|
| 18 |
import numpy as np |
| 19 |
import pytest |
| 20 |
|
| 21 |
from dlm.data.vl_cache import VlCache, VlCacheKey, processor_sha256 |
| 22 |
|
| 23 |
|
| 24 |
def _key(**overrides: object) -> VlCacheKey: |
| 25 |
defaults = { |
| 26 |
"blob_sha": "a" * 64, |
| 27 |
"processor_sha": "b" * 64, |
| 28 |
"target_height": 224, |
| 29 |
"target_width": 224, |
| 30 |
} |
| 31 |
defaults.update(overrides) |
| 32 |
return VlCacheKey(**defaults) # type: ignore[arg-type] |
| 33 |
|
| 34 |
|
| 35 |
class TestVlCacheKey: |
| 36 |
def test_filename_shape(self) -> None: |
| 37 |
key = _key() |
| 38 |
assert key.as_filename() == f"{'a' * 64}.{'b' * 12}.224x224.npz" |
| 39 |
|
| 40 |
def test_shard_is_two_prefix(self) -> None: |
| 41 |
assert _key(blob_sha="cd" + "0" * 62).shard() == "cd" |
| 42 |
|
| 43 |
def test_different_size_different_filename(self) -> None: |
| 44 |
a = _key(target_height=224) |
| 45 |
b = _key(target_height=336) |
| 46 |
assert a.as_filename() != b.as_filename() |
| 47 |
|
| 48 |
def test_different_processor_different_filename(self) -> None: |
| 49 |
a = _key(processor_sha="1" * 64) |
| 50 |
b = _key(processor_sha="2" * 64) |
| 51 |
assert a.as_filename() != b.as_filename() |
| 52 |
|
| 53 |
def test_key_is_frozen(self) -> None: |
| 54 |
key = _key() |
| 55 |
with pytest.raises(AttributeError): |
| 56 |
key.blob_sha = "x" * 64 # type: ignore[misc] |
| 57 |
|
| 58 |
|
| 59 |
class TestVlCacheRoundTrip: |
| 60 |
def test_miss_on_empty(self, tmp_path: Path) -> None: |
| 61 |
cache = VlCache(tmp_path / "vl") |
| 62 |
assert cache.get(_key()) is None |
| 63 |
|
| 64 |
def test_put_then_get(self, tmp_path: Path) -> None: |
| 65 |
cache = VlCache(tmp_path / "vl") |
| 66 |
tensor = np.arange(3 * 4 * 5, dtype=np.float32).reshape(1, 3, 4, 5) |
| 67 |
cache.put(_key(), tensor) |
| 68 |
loaded = cache.get(_key()) |
| 69 |
assert loaded is not None |
| 70 |
np.testing.assert_array_equal(loaded, tensor) |
| 71 |
assert loaded.dtype == np.float32 |
| 72 |
|
| 73 |
def test_put_creates_shard_dir(self, tmp_path: Path) -> None: |
| 74 |
cache = VlCache(tmp_path / "vl") |
| 75 |
key = _key(blob_sha="ef" + "0" * 62) |
| 76 |
cache.put(key, np.zeros((1,), dtype=np.float32)) |
| 77 |
assert (tmp_path / "vl" / "ef").is_dir() |
| 78 |
|
| 79 |
def test_exists_flips_after_put(self, tmp_path: Path) -> None: |
| 80 |
cache = VlCache(tmp_path / "vl") |
| 81 |
key = _key() |
| 82 |
assert cache.exists(key) is False |
| 83 |
cache.put(key, np.zeros((1,), dtype=np.float32)) |
| 84 |
assert cache.exists(key) is True |
| 85 |
|
| 86 |
def test_corrupt_file_treated_as_miss(self, tmp_path: Path) -> None: |
| 87 |
cache = VlCache(tmp_path / "vl") |
| 88 |
key = _key() |
| 89 |
cache.put(key, np.zeros((1,), dtype=np.float32)) |
| 90 |
# Corrupt on disk. |
| 91 |
cache.path_for(key).write_bytes(b"not a real npz") |
| 92 |
assert cache.get(key) is None |
| 93 |
|
| 94 |
def test_clear_removes_tree(self, tmp_path: Path) -> None: |
| 95 |
cache = VlCache(tmp_path / "vl") |
| 96 |
cache.put(_key(), np.zeros((1,), dtype=np.float32)) |
| 97 |
cache.clear() |
| 98 |
assert not (tmp_path / "vl").exists() |
| 99 |
|
| 100 |
|
| 101 |
class TestProcessorSha256: |
| 102 |
def _make_processor(self, **attrs: object) -> SimpleNamespace: |
| 103 |
defaults: dict[str, object] = { |
| 104 |
"image_size": (224, 224), |
| 105 |
"image_mean": [0.5, 0.5, 0.5], |
| 106 |
"image_std": [0.5, 0.5, 0.5], |
| 107 |
"do_normalize": True, |
| 108 |
"do_rescale": True, |
| 109 |
"rescale_factor": 1 / 255, |
| 110 |
"resample": 2, |
| 111 |
} |
| 112 |
defaults.update(attrs) |
| 113 |
return SimpleNamespace(**defaults) |
| 114 |
|
| 115 |
def test_stable_across_calls(self) -> None: |
| 116 |
proc = self._make_processor() |
| 117 |
assert processor_sha256(proc) == processor_sha256(proc) |
| 118 |
|
| 119 |
def test_pinned_on_instance(self) -> None: |
| 120 |
proc = self._make_processor() |
| 121 |
first = processor_sha256(proc) |
| 122 |
# Mutate a field that would drift the sha if recomputed — the |
| 123 |
# pinned cache returns the original so repeat calls stay O(1). |
| 124 |
proc.image_mean = [0.1, 0.1, 0.1] |
| 125 |
assert processor_sha256(proc) == first |
| 126 |
|
| 127 |
def test_different_size_different_sha(self) -> None: |
| 128 |
a = self._make_processor(image_size=(224, 224)) |
| 129 |
b = self._make_processor(image_size=(336, 336)) |
| 130 |
assert processor_sha256(a) != processor_sha256(b) |
| 131 |
|
| 132 |
def test_different_mean_different_sha(self) -> None: |
| 133 |
a = self._make_processor(image_mean=[0.5, 0.5, 0.5]) |
| 134 |
b = self._make_processor(image_mean=[0.1, 0.2, 0.3]) |
| 135 |
assert processor_sha256(a) != processor_sha256(b) |
| 136 |
|
| 137 |
def test_different_class_different_sha(self) -> None: |
| 138 |
class ProcA: |
| 139 |
image_size = (224, 224) |
| 140 |
image_mean = [0.5] * 3 |
| 141 |
image_std = [0.5] * 3 |
| 142 |
|
| 143 |
class ProcB: |
| 144 |
image_size = (224, 224) |
| 145 |
image_mean = [0.5] * 3 |
| 146 |
image_std = [0.5] * 3 |
| 147 |
|
| 148 |
assert processor_sha256(ProcA()) != processor_sha256(ProcB()) |
| 149 |
|
| 150 |
def test_nested_dict_and_tuple_fields_are_readable(self) -> None: |
| 151 |
proc = SimpleNamespace( |
| 152 |
image_processor=SimpleNamespace( |
| 153 |
size={"shortest_edge": 224, "crop": (224, 224)}, |
| 154 |
image_mean=(0.5, 0.5, 0.5), |
| 155 |
image_std=[0.2, 0.2, 0.2], |
| 156 |
do_normalize=True, |
| 157 |
do_rescale=True, |
| 158 |
rescale_factor=1 / 255, |
| 159 |
resample="bicubic", |
| 160 |
) |
| 161 |
) |
| 162 |
sha = processor_sha256(proc) |
| 163 |
assert len(sha) == 64 |
| 164 |
|
| 165 |
def test_exotic_resample_value_stringifies_stably(self) -> None: |
| 166 |
proc = SimpleNamespace( |
| 167 |
image_processor=SimpleNamespace( |
| 168 |
size={"shortest_edge": 224}, |
| 169 |
image_mean=[0.5] * 3, |
| 170 |
image_std=[0.5] * 3, |
| 171 |
do_normalize=True, |
| 172 |
do_rescale=True, |
| 173 |
rescale_factor=1 / 255, |
| 174 |
resample=object(), |
| 175 |
) |
| 176 |
) |
| 177 |
sha = processor_sha256(proc) |
| 178 |
assert len(sha) == 64 |