Python · 6227 bytes Raw Blame History
1 """VL cache — key stability, atomic I/O, processor fingerprint.
2
3 Covers:
4
5 - `VlCacheKey.as_filename` / `shard` are deterministic.
6 - Different target_size / processor_sha produce different filenames.
7 - Round-trip: put → get returns byte-identical array.
8 - Miss on empty store, miss on corrupt file.
9 - `processor_sha256` is stable across repeat calls + pinned on instance.
10 - Different preprocessor constants drift the sha.
11 """
12
13 from __future__ import annotations
14
15 from pathlib import Path
16 from types import SimpleNamespace
17
18 import numpy as np
19 import pytest
20
21 from dlm.data.vl_cache import VlCache, VlCacheKey, processor_sha256
22
23
24 def _key(**overrides: object) -> VlCacheKey:
25 defaults = {
26 "blob_sha": "a" * 64,
27 "processor_sha": "b" * 64,
28 "target_height": 224,
29 "target_width": 224,
30 }
31 defaults.update(overrides)
32 return VlCacheKey(**defaults) # type: ignore[arg-type]
33
34
35 class TestVlCacheKey:
36 def test_filename_shape(self) -> None:
37 key = _key()
38 assert key.as_filename() == f"{'a' * 64}.{'b' * 12}.224x224.npz"
39
40 def test_shard_is_two_prefix(self) -> None:
41 assert _key(blob_sha="cd" + "0" * 62).shard() == "cd"
42
43 def test_different_size_different_filename(self) -> None:
44 a = _key(target_height=224)
45 b = _key(target_height=336)
46 assert a.as_filename() != b.as_filename()
47
48 def test_different_processor_different_filename(self) -> None:
49 a = _key(processor_sha="1" * 64)
50 b = _key(processor_sha="2" * 64)
51 assert a.as_filename() != b.as_filename()
52
53 def test_key_is_frozen(self) -> None:
54 key = _key()
55 with pytest.raises(AttributeError):
56 key.blob_sha = "x" * 64 # type: ignore[misc]
57
58
59 class TestVlCacheRoundTrip:
60 def test_miss_on_empty(self, tmp_path: Path) -> None:
61 cache = VlCache(tmp_path / "vl")
62 assert cache.get(_key()) is None
63
64 def test_put_then_get(self, tmp_path: Path) -> None:
65 cache = VlCache(tmp_path / "vl")
66 tensor = np.arange(3 * 4 * 5, dtype=np.float32).reshape(1, 3, 4, 5)
67 cache.put(_key(), tensor)
68 loaded = cache.get(_key())
69 assert loaded is not None
70 np.testing.assert_array_equal(loaded, tensor)
71 assert loaded.dtype == np.float32
72
73 def test_put_creates_shard_dir(self, tmp_path: Path) -> None:
74 cache = VlCache(tmp_path / "vl")
75 key = _key(blob_sha="ef" + "0" * 62)
76 cache.put(key, np.zeros((1,), dtype=np.float32))
77 assert (tmp_path / "vl" / "ef").is_dir()
78
79 def test_exists_flips_after_put(self, tmp_path: Path) -> None:
80 cache = VlCache(tmp_path / "vl")
81 key = _key()
82 assert cache.exists(key) is False
83 cache.put(key, np.zeros((1,), dtype=np.float32))
84 assert cache.exists(key) is True
85
86 def test_corrupt_file_treated_as_miss(self, tmp_path: Path) -> None:
87 cache = VlCache(tmp_path / "vl")
88 key = _key()
89 cache.put(key, np.zeros((1,), dtype=np.float32))
90 # Corrupt on disk.
91 cache.path_for(key).write_bytes(b"not a real npz")
92 assert cache.get(key) is None
93
94 def test_clear_removes_tree(self, tmp_path: Path) -> None:
95 cache = VlCache(tmp_path / "vl")
96 cache.put(_key(), np.zeros((1,), dtype=np.float32))
97 cache.clear()
98 assert not (tmp_path / "vl").exists()
99
100
101 class TestProcessorSha256:
102 def _make_processor(self, **attrs: object) -> SimpleNamespace:
103 defaults: dict[str, object] = {
104 "image_size": (224, 224),
105 "image_mean": [0.5, 0.5, 0.5],
106 "image_std": [0.5, 0.5, 0.5],
107 "do_normalize": True,
108 "do_rescale": True,
109 "rescale_factor": 1 / 255,
110 "resample": 2,
111 }
112 defaults.update(attrs)
113 return SimpleNamespace(**defaults)
114
115 def test_stable_across_calls(self) -> None:
116 proc = self._make_processor()
117 assert processor_sha256(proc) == processor_sha256(proc)
118
119 def test_pinned_on_instance(self) -> None:
120 proc = self._make_processor()
121 first = processor_sha256(proc)
122 # Mutate a field that would drift the sha if recomputed — the
123 # pinned cache returns the original so repeat calls stay O(1).
124 proc.image_mean = [0.1, 0.1, 0.1]
125 assert processor_sha256(proc) == first
126
127 def test_different_size_different_sha(self) -> None:
128 a = self._make_processor(image_size=(224, 224))
129 b = self._make_processor(image_size=(336, 336))
130 assert processor_sha256(a) != processor_sha256(b)
131
132 def test_different_mean_different_sha(self) -> None:
133 a = self._make_processor(image_mean=[0.5, 0.5, 0.5])
134 b = self._make_processor(image_mean=[0.1, 0.2, 0.3])
135 assert processor_sha256(a) != processor_sha256(b)
136
137 def test_different_class_different_sha(self) -> None:
138 class ProcA:
139 image_size = (224, 224)
140 image_mean = [0.5] * 3
141 image_std = [0.5] * 3
142
143 class ProcB:
144 image_size = (224, 224)
145 image_mean = [0.5] * 3
146 image_std = [0.5] * 3
147
148 assert processor_sha256(ProcA()) != processor_sha256(ProcB())
149
150 def test_nested_dict_and_tuple_fields_are_readable(self) -> None:
151 proc = SimpleNamespace(
152 image_processor=SimpleNamespace(
153 size={"shortest_edge": 224, "crop": (224, 224)},
154 image_mean=(0.5, 0.5, 0.5),
155 image_std=[0.2, 0.2, 0.2],
156 do_normalize=True,
157 do_rescale=True,
158 rescale_factor=1 / 255,
159 resample="bicubic",
160 )
161 )
162 sha = processor_sha256(proc)
163 assert len(sha) == 64
164
165 def test_exotic_resample_value_stringifies_stably(self) -> None:
166 proc = SimpleNamespace(
167 image_processor=SimpleNamespace(
168 size={"shortest_edge": 224},
169 image_mean=[0.5] * 3,
170 image_std=[0.5] * 3,
171 do_normalize=True,
172 do_rescale=True,
173 rescale_factor=1 / 255,
174 resample=object(),
175 )
176 )
177 sha = processor_sha256(proc)
178 assert len(sha) == 64