Python · 5913 bytes Raw Blame History
1 """Downloader contract — pinned revision, directory sha256, error paths."""
2
3 from __future__ import annotations
4
5 from pathlib import Path
6 from unittest.mock import patch
7
8 import pytest
9
10 from dlm.base_models import BaseModelSpec, GatedModelError, download_spec, sha256_of_directory
11 from dlm.base_models.downloader import _resolve_revision
12
13
14 def _spec() -> BaseModelSpec:
15 return BaseModelSpec.model_validate(
16 {
17 "key": "demo-1b",
18 "hf_id": "org/demo",
19 "revision": "a" * 40,
20 "architecture": "DemoForCausalLM",
21 "params": 1_000_000_000,
22 "target_modules": ["q_proj", "v_proj"],
23 "template": "chatml",
24 "gguf_arch": "demo",
25 "tokenizer_pre": "demo",
26 "license_spdx": "MIT",
27 "requires_acceptance": False,
28 "redistributable": True,
29 "size_gb_fp16": 2.0,
30 "context_length": 4096,
31 "recommended_seq_len": 2048,
32 }
33 )
34
35
36 class TestDirectorySha256:
37 def test_same_contents_same_digest(self, tmp_path: Path) -> None:
38 a = tmp_path / "a"
39 b = tmp_path / "b"
40 for root in (a, b):
41 root.mkdir()
42 (root / "config.json").write_bytes(b'{"x": 1}')
43 (root / "model.safetensors").write_bytes(b"\x00" * 64)
44 assert sha256_of_directory(a) == sha256_of_directory(b)
45
46 def test_different_content_different_digest(self, tmp_path: Path) -> None:
47 a = tmp_path / "a"
48 b = tmp_path / "b"
49 a.mkdir()
50 b.mkdir()
51 (a / "config.json").write_bytes(b'{"x": 1}')
52 (b / "config.json").write_bytes(b'{"x": 2}')
53 assert sha256_of_directory(a) != sha256_of_directory(b)
54
55 def test_different_paths_different_digest(self, tmp_path: Path) -> None:
56 a = tmp_path / "a"
57 b = tmp_path / "b"
58 a.mkdir()
59 b.mkdir()
60 (a / "config.json").write_bytes(b"content")
61 (b / "other.json").write_bytes(b"content")
62 assert sha256_of_directory(a) != sha256_of_directory(b)
63
64 def test_deterministic_across_runs(self, tmp_path: Path) -> None:
65 """Same tree = same digest, invoked twice."""
66 root = tmp_path / "r"
67 root.mkdir()
68 (root / "a.txt").write_bytes(b"hello")
69 (root / "b.txt").write_bytes(b"world")
70 first = sha256_of_directory(root)
71 second = sha256_of_directory(root)
72 assert first == second
73
74 def test_missing_directory_raises(self, tmp_path: Path) -> None:
75 with pytest.raises(NotADirectoryError):
76 sha256_of_directory(tmp_path / "absent")
77
78 def test_only_files_included_not_dirs(self, tmp_path: Path) -> None:
79 """Empty subdirectories don't affect the digest."""
80 root = tmp_path / "r"
81 root.mkdir()
82 (root / "file.txt").write_bytes(b"hi")
83 without_empty = sha256_of_directory(root)
84 (root / "empty-dir").mkdir()
85 with_empty = sha256_of_directory(root)
86 assert without_empty == with_empty
87
88
89 class TestDownloadSpec:
90 def test_returns_result_with_pinned_revision(self, tmp_path: Path) -> None:
91 spec = _spec()
92 snapshot = tmp_path / "hub" / "models--org--demo" / "snapshots" / spec.revision
93 snapshot.mkdir(parents=True)
94 (snapshot / "config.json").write_bytes(b'{"arch":"demo"}')
95
96 with patch("huggingface_hub.snapshot_download", return_value=str(snapshot)):
97 result = download_spec(spec)
98 assert result.path == snapshot
99 assert result.revision == spec.revision
100 # Digest matches our standalone computation
101 expected_digest = sha256_of_directory(snapshot)
102 assert result.sha256 == expected_digest
103
104 def test_revision_mismatch_raises(self, tmp_path: Path) -> None:
105 spec = _spec()
106 # Snapshot under a DIFFERENT sha to simulate a revision race.
107 other_sha = "b" * 40
108 snapshot = tmp_path / "hub" / "models--org--demo" / "snapshots" / other_sha
109 snapshot.mkdir(parents=True)
110 (snapshot / "config.json").write_bytes(b"{}")
111
112 with (
113 patch("huggingface_hub.snapshot_download", return_value=str(snapshot)),
114 pytest.raises(RuntimeError, match="revision mismatch"),
115 ):
116 download_spec(spec)
117
118 def test_gated_repo_surfaces_as_gated_model_error(self) -> None:
119 from unittest.mock import Mock
120
121 from huggingface_hub.errors import GatedRepoError
122
123 with (
124 patch(
125 "huggingface_hub.snapshot_download",
126 side_effect=GatedRepoError("gated", response=Mock()),
127 ),
128 pytest.raises(GatedModelError),
129 ):
130 download_spec(_spec())
131
132 def test_local_files_only_refuses_when_absent(self) -> None:
133 from huggingface_hub.errors import LocalEntryNotFoundError
134
135 with (
136 patch(
137 "huggingface_hub.snapshot_download",
138 side_effect=LocalEntryNotFoundError("not cached"),
139 ),
140 pytest.raises(RuntimeError, match="offline"),
141 ):
142 download_spec(_spec(), local_files_only=True)
143
144 def test_repository_not_found_raises_runtime_error(self) -> None:
145 from unittest.mock import Mock
146
147 from huggingface_hub.errors import RepositoryNotFoundError
148
149 with (
150 patch(
151 "huggingface_hub.snapshot_download",
152 side_effect=RepositoryNotFoundError("missing", response=Mock()),
153 ),
154 pytest.raises(RuntimeError, match="HF repository not found"),
155 ):
156 download_spec(_spec())
157
158 def test_resolve_revision_falls_back_to_expected_outside_snapshot_layout(
159 self, tmp_path: Path
160 ) -> None:
161 local_copy = tmp_path / "local-model"
162 local_copy.mkdir()
163 assert _resolve_revision(local_copy, "a" * 40) == "a" * 40