| 1 |
"""Downloader contract — pinned revision, directory sha256, error paths.""" |
| 2 |
|
| 3 |
from __future__ import annotations |
| 4 |
|
| 5 |
from pathlib import Path |
| 6 |
from unittest.mock import patch |
| 7 |
|
| 8 |
import pytest |
| 9 |
|
| 10 |
from dlm.base_models import BaseModelSpec, GatedModelError, download_spec, sha256_of_directory |
| 11 |
from dlm.base_models.downloader import _resolve_revision |
| 12 |
|
| 13 |
|
| 14 |
def _spec() -> BaseModelSpec: |
| 15 |
return BaseModelSpec.model_validate( |
| 16 |
{ |
| 17 |
"key": "demo-1b", |
| 18 |
"hf_id": "org/demo", |
| 19 |
"revision": "a" * 40, |
| 20 |
"architecture": "DemoForCausalLM", |
| 21 |
"params": 1_000_000_000, |
| 22 |
"target_modules": ["q_proj", "v_proj"], |
| 23 |
"template": "chatml", |
| 24 |
"gguf_arch": "demo", |
| 25 |
"tokenizer_pre": "demo", |
| 26 |
"license_spdx": "MIT", |
| 27 |
"requires_acceptance": False, |
| 28 |
"redistributable": True, |
| 29 |
"size_gb_fp16": 2.0, |
| 30 |
"context_length": 4096, |
| 31 |
"recommended_seq_len": 2048, |
| 32 |
} |
| 33 |
) |
| 34 |
|
| 35 |
|
| 36 |
class TestDirectorySha256: |
| 37 |
def test_same_contents_same_digest(self, tmp_path: Path) -> None: |
| 38 |
a = tmp_path / "a" |
| 39 |
b = tmp_path / "b" |
| 40 |
for root in (a, b): |
| 41 |
root.mkdir() |
| 42 |
(root / "config.json").write_bytes(b'{"x": 1}') |
| 43 |
(root / "model.safetensors").write_bytes(b"\x00" * 64) |
| 44 |
assert sha256_of_directory(a) == sha256_of_directory(b) |
| 45 |
|
| 46 |
def test_different_content_different_digest(self, tmp_path: Path) -> None: |
| 47 |
a = tmp_path / "a" |
| 48 |
b = tmp_path / "b" |
| 49 |
a.mkdir() |
| 50 |
b.mkdir() |
| 51 |
(a / "config.json").write_bytes(b'{"x": 1}') |
| 52 |
(b / "config.json").write_bytes(b'{"x": 2}') |
| 53 |
assert sha256_of_directory(a) != sha256_of_directory(b) |
| 54 |
|
| 55 |
def test_different_paths_different_digest(self, tmp_path: Path) -> None: |
| 56 |
a = tmp_path / "a" |
| 57 |
b = tmp_path / "b" |
| 58 |
a.mkdir() |
| 59 |
b.mkdir() |
| 60 |
(a / "config.json").write_bytes(b"content") |
| 61 |
(b / "other.json").write_bytes(b"content") |
| 62 |
assert sha256_of_directory(a) != sha256_of_directory(b) |
| 63 |
|
| 64 |
def test_deterministic_across_runs(self, tmp_path: Path) -> None: |
| 65 |
"""Same tree = same digest, invoked twice.""" |
| 66 |
root = tmp_path / "r" |
| 67 |
root.mkdir() |
| 68 |
(root / "a.txt").write_bytes(b"hello") |
| 69 |
(root / "b.txt").write_bytes(b"world") |
| 70 |
first = sha256_of_directory(root) |
| 71 |
second = sha256_of_directory(root) |
| 72 |
assert first == second |
| 73 |
|
| 74 |
def test_missing_directory_raises(self, tmp_path: Path) -> None: |
| 75 |
with pytest.raises(NotADirectoryError): |
| 76 |
sha256_of_directory(tmp_path / "absent") |
| 77 |
|
| 78 |
def test_only_files_included_not_dirs(self, tmp_path: Path) -> None: |
| 79 |
"""Empty subdirectories don't affect the digest.""" |
| 80 |
root = tmp_path / "r" |
| 81 |
root.mkdir() |
| 82 |
(root / "file.txt").write_bytes(b"hi") |
| 83 |
without_empty = sha256_of_directory(root) |
| 84 |
(root / "empty-dir").mkdir() |
| 85 |
with_empty = sha256_of_directory(root) |
| 86 |
assert without_empty == with_empty |
| 87 |
|
| 88 |
|
| 89 |
class TestDownloadSpec: |
| 90 |
def test_returns_result_with_pinned_revision(self, tmp_path: Path) -> None: |
| 91 |
spec = _spec() |
| 92 |
snapshot = tmp_path / "hub" / "models--org--demo" / "snapshots" / spec.revision |
| 93 |
snapshot.mkdir(parents=True) |
| 94 |
(snapshot / "config.json").write_bytes(b'{"arch":"demo"}') |
| 95 |
|
| 96 |
with patch("huggingface_hub.snapshot_download", return_value=str(snapshot)): |
| 97 |
result = download_spec(spec) |
| 98 |
assert result.path == snapshot |
| 99 |
assert result.revision == spec.revision |
| 100 |
# Digest matches our standalone computation |
| 101 |
expected_digest = sha256_of_directory(snapshot) |
| 102 |
assert result.sha256 == expected_digest |
| 103 |
|
| 104 |
def test_revision_mismatch_raises(self, tmp_path: Path) -> None: |
| 105 |
spec = _spec() |
| 106 |
# Snapshot under a DIFFERENT sha to simulate a revision race. |
| 107 |
other_sha = "b" * 40 |
| 108 |
snapshot = tmp_path / "hub" / "models--org--demo" / "snapshots" / other_sha |
| 109 |
snapshot.mkdir(parents=True) |
| 110 |
(snapshot / "config.json").write_bytes(b"{}") |
| 111 |
|
| 112 |
with ( |
| 113 |
patch("huggingface_hub.snapshot_download", return_value=str(snapshot)), |
| 114 |
pytest.raises(RuntimeError, match="revision mismatch"), |
| 115 |
): |
| 116 |
download_spec(spec) |
| 117 |
|
| 118 |
def test_gated_repo_surfaces_as_gated_model_error(self) -> None: |
| 119 |
from unittest.mock import Mock |
| 120 |
|
| 121 |
from huggingface_hub.errors import GatedRepoError |
| 122 |
|
| 123 |
with ( |
| 124 |
patch( |
| 125 |
"huggingface_hub.snapshot_download", |
| 126 |
side_effect=GatedRepoError("gated", response=Mock()), |
| 127 |
), |
| 128 |
pytest.raises(GatedModelError), |
| 129 |
): |
| 130 |
download_spec(_spec()) |
| 131 |
|
| 132 |
def test_local_files_only_refuses_when_absent(self) -> None: |
| 133 |
from huggingface_hub.errors import LocalEntryNotFoundError |
| 134 |
|
| 135 |
with ( |
| 136 |
patch( |
| 137 |
"huggingface_hub.snapshot_download", |
| 138 |
side_effect=LocalEntryNotFoundError("not cached"), |
| 139 |
), |
| 140 |
pytest.raises(RuntimeError, match="offline"), |
| 141 |
): |
| 142 |
download_spec(_spec(), local_files_only=True) |
| 143 |
|
| 144 |
def test_repository_not_found_raises_runtime_error(self) -> None: |
| 145 |
from unittest.mock import Mock |
| 146 |
|
| 147 |
from huggingface_hub.errors import RepositoryNotFoundError |
| 148 |
|
| 149 |
with ( |
| 150 |
patch( |
| 151 |
"huggingface_hub.snapshot_download", |
| 152 |
side_effect=RepositoryNotFoundError("missing", response=Mock()), |
| 153 |
), |
| 154 |
pytest.raises(RuntimeError, match="HF repository not found"), |
| 155 |
): |
| 156 |
download_spec(_spec()) |
| 157 |
|
| 158 |
def test_resolve_revision_falls_back_to_expected_outside_snapshot_layout( |
| 159 |
self, tmp_path: Path |
| 160 |
) -> None: |
| 161 |
local_copy = tmp_path / "local-model" |
| 162 |
local_copy.mkdir() |
| 163 |
assert _resolve_revision(local_copy, "a" * 40) == "a" * 40 |