Python · 10409 bytes Raw Blame History
1 """`dlm.share.hf_sink` — push/pull via huggingface_hub.
2
3 Covers the sink without hitting the network: `huggingface_hub`
4 functions (`create_repo`, `upload_file`, `hf_hub_download`) are
5 monkeypatched to stand-ins so we can assert call shape, README
6 rendering, and error translation.
7 """
8
9 from __future__ import annotations
10
11 from pathlib import Path
12
13 import pytest
14
15 from dlm.share import hf_sink
16 from dlm.share.errors import SinkError
17 from dlm.share.hf_sink import HFUploadSummary, _render_readme, pull_hf, push_hf
18
19
20 class _FakeHfHubHTTPError(Exception):
21 """Minimal stand-in for huggingface_hub.utils.HfHubHTTPError.
22
23 The real class accepts (message, response, server_message) but the
24 sink only catches by type and formats `str(exc)` into its
25 SinkError — any Exception subclass raised where the real error
26 would be will produce the same observable result if we patch the
27 import target.
28 """
29
30
31 @pytest.fixture
32 def pack(tmp_path: Path) -> Path:
33 p = tmp_path / "adapter.dlm.pack"
34 p.write_bytes(b"PACK" * 512)
35 return p
36
37
38 @pytest.fixture
39 def patched_hub(monkeypatch: pytest.MonkeyPatch) -> dict[str, list[dict[str, object]]]:
40 """Patch huggingface_hub symbols the sink deferred-imports.
41
42 Returns a capture dict the tests can inspect for call args. Default
43 behavior is success — tests that want a failure replace the
44 closure after the fixture returns.
45 """
46 captured: dict[str, list[dict[str, object]]] = {
47 "create_repo": [],
48 "upload_file": [],
49 "hf_hub_download": [],
50 }
51
52 def _create_repo(**kwargs: object) -> None:
53 captured["create_repo"].append(kwargs)
54
55 def _upload_file(**kwargs: object) -> str:
56 captured["upload_file"].append(kwargs)
57 # Real hub returns a CommitInfo; sink str()s it. A string is fine.
58 return f"https://huggingface.co/{kwargs['repo_id']}/blob/main/{kwargs['path_in_repo']}"
59
60 def _hf_hub_download(**kwargs: object) -> str:
61 captured["hf_hub_download"].append(kwargs)
62 # Return a path to a tmp file with some content.
63 scratch = Path("/tmp/fake-hf-cache/adapter.dlm.pack")
64 scratch.parent.mkdir(parents=True, exist_ok=True)
65 scratch.write_bytes(b"downloaded bytes")
66 return str(scratch)
67
68 import huggingface_hub
69
70 monkeypatch.setattr(huggingface_hub, "create_repo", _create_repo, raising=False)
71 monkeypatch.setattr(huggingface_hub, "upload_file", _upload_file, raising=False)
72 monkeypatch.setattr(huggingface_hub, "hf_hub_download", _hf_hub_download, raising=False)
73
74 # The sink imports HfHubHTTPError from huggingface_hub.utils; replace
75 # it with our stand-in so tests that want a failure can raise it.
76 import huggingface_hub.utils
77
78 monkeypatch.setattr(huggingface_hub.utils, "HfHubHTTPError", _FakeHfHubHTTPError, raising=False)
79 return captured
80
81
82 class TestPushHf:
83 def test_missing_file_refused(self, tmp_path: Path) -> None:
84 with pytest.raises(SinkError, match="pack file missing"):
85 push_hf(tmp_path / "nope.pack", "user/repo")
86
87 def test_happy_path_creates_repo_and_uploads_pack_and_readme(
88 self, pack: Path, patched_hub: dict[str, list[dict[str, object]]]
89 ) -> None:
90 summary = push_hf(
91 pack,
92 "user/myadapter",
93 private=True,
94 readme_fields={
95 "dlm_id": "01K...",
96 "base_model": "smollm2-135m",
97 "adapter_version": "1",
98 },
99 )
100
101 assert isinstance(summary, HFUploadSummary)
102 assert summary.repo_id == "user/myadapter"
103
104 # Repo created once with expected args.
105 assert len(patched_hub["create_repo"]) == 1
106 cr = patched_hub["create_repo"][0]
107 assert cr["repo_id"] == "user/myadapter"
108 assert cr["private"] is True
109 assert cr["exist_ok"] is True
110
111 # Two uploads — pack + README.
112 assert len(patched_hub["upload_file"]) == 2
113 pack_call, readme_call = patched_hub["upload_file"]
114 assert pack_call["path_in_repo"] == "adapter.dlm.pack"
115 assert pack_call["repo_id"] == "user/myadapter"
116 assert readme_call["path_in_repo"] == "README.md"
117 # README body carries our fields.
118 assert isinstance(readme_call["path_or_fileobj"], bytes)
119 body = readme_call["path_or_fileobj"].decode("utf-8")
120 assert "01K..." in body
121 assert "smollm2-135m" in body
122
123 def test_create_repo_failure_translates_to_sink_error(
124 self,
125 pack: Path,
126 patched_hub: dict[str, list[dict[str, object]]],
127 monkeypatch: pytest.MonkeyPatch,
128 ) -> None:
129 import huggingface_hub
130
131 def _boom(**kwargs: object) -> None:
132 raise _FakeHfHubHTTPError("access denied")
133
134 monkeypatch.setattr(huggingface_hub, "create_repo", _boom, raising=False)
135
136 with pytest.raises(SinkError, match="failed to ensure repo"):
137 push_hf(pack, "user/myadapter")
138
139 def test_upload_failure_translates_to_sink_error(
140 self,
141 pack: Path,
142 patched_hub: dict[str, list[dict[str, object]]],
143 monkeypatch: pytest.MonkeyPatch,
144 ) -> None:
145 import huggingface_hub
146
147 def _boom(**kwargs: object) -> None:
148 raise _FakeHfHubHTTPError("quota exceeded")
149
150 monkeypatch.setattr(huggingface_hub, "upload_file", _boom, raising=False)
151
152 with pytest.raises(SinkError, match="upload failed"):
153 push_hf(pack, "user/myadapter")
154
155 def test_readme_upload_failure_translates_to_sink_error(
156 self,
157 pack: Path,
158 patched_hub: dict[str, list[dict[str, object]]],
159 monkeypatch: pytest.MonkeyPatch,
160 ) -> None:
161 import huggingface_hub
162
163 calls = {"count": 0}
164
165 def _boom_on_second_upload(**kwargs: object) -> str:
166 calls["count"] += 1
167 if calls["count"] == 2:
168 raise _FakeHfHubHTTPError("readme denied")
169 return f"https://huggingface.co/{kwargs['repo_id']}/blob/main/{kwargs['path_in_repo']}"
170
171 monkeypatch.setattr(huggingface_hub, "upload_file", _boom_on_second_upload, raising=False)
172
173 with pytest.raises(SinkError, match="README upload failed"):
174 push_hf(pack, "user/myadapter")
175
176 def test_progress_fires_with_full_size(
177 self, pack: Path, patched_hub: dict[str, list[dict[str, object]]]
178 ) -> None:
179 seen: list[tuple[int, int]] = []
180 push_hf(pack, "user/myadapter", progress=lambda d, t: seen.append((d, t)))
181 total = pack.stat().st_size
182 # Progress fires once at 0 (pre-upload) and once at 100% (post).
183 assert seen[0] == (0, total)
184 assert seen[-1] == (total, total)
185
186
187 class TestPullHf:
188 def test_happy_path_downloads_to_out_path(
189 self, tmp_path: Path, patched_hub: dict[str, list[dict[str, object]]]
190 ) -> None:
191 out = tmp_path / "pulled.pack"
192 written = pull_hf("user/myadapter", out)
193
194 assert out.is_file()
195 assert out.read_bytes() == b"downloaded bytes"
196 assert written == len(b"downloaded bytes")
197
198 assert len(patched_hub["hf_hub_download"]) == 1
199 call = patched_hub["hf_hub_download"][0]
200 assert call["repo_id"] == "user/myadapter"
201 assert call["filename"] == "adapter.dlm.pack"
202 assert call["repo_type"] == "model"
203
204 def test_download_failure_translates_to_sink_error(
205 self,
206 tmp_path: Path,
207 patched_hub: dict[str, list[dict[str, object]]],
208 monkeypatch: pytest.MonkeyPatch,
209 ) -> None:
210 import huggingface_hub
211
212 def _boom(**kwargs: object) -> None:
213 raise _FakeHfHubHTTPError("not found")
214
215 monkeypatch.setattr(huggingface_hub, "hf_hub_download", _boom, raising=False)
216
217 with pytest.raises(SinkError, match="download failed"):
218 pull_hf("user/myadapter", tmp_path / "out.pack")
219
220 def test_creates_parent_dir(
221 self, tmp_path: Path, patched_hub: dict[str, list[dict[str, object]]]
222 ) -> None:
223 out = tmp_path / "nested" / "dir" / "pulled.pack"
224 pull_hf("user/myadapter", out)
225 assert out.is_file()
226
227 def test_progress_fires_at_end(
228 self, tmp_path: Path, patched_hub: dict[str, list[dict[str, object]]]
229 ) -> None:
230 seen: list[tuple[int, int]] = []
231 pull_hf("user/myadapter", tmp_path / "out.pack", progress=lambda d, t: seen.append((d, t)))
232 # Downloaded-bytes fixture is 16 bytes; only a single end-of-download call fires.
233 assert seen == [(16, 16)]
234
235
236 class TestRenderReadme:
237 def test_shape_contains_core_fields(self) -> None:
238 body = _render_readme(
239 "alice/cool-adapter",
240 {
241 "dlm_id": "01K...",
242 "base_model": "qwen2.5-1.5b",
243 "adapter_version": "3",
244 "license": "Apache-2.0",
245 },
246 )
247 assert body.startswith("---\n")
248 assert "library_name: dlm" in body
249 assert "alice/cool-adapter" in body
250 assert "`01K...`" in body
251 assert "`qwen2.5-1.5b`" in body
252 assert "`3`" in body
253 assert "Apache-2.0" in body
254
255 def test_unknown_fields_placeholder(self) -> None:
256 body = _render_readme("bob/minimal", {})
257 # Missing fields fall back to `(unknown)` placeholders.
258 assert "(unknown)" in body
259 assert "See the base model's license." in body
260
261 def test_install_block_references_repo(self) -> None:
262 body = _render_readme("team/pkg", {})
263 assert "dlm pull hf:team/pkg" in body
264
265
266 class TestDeferredImportFallback:
267 def test_push_raises_sink_error_when_hub_missing(
268 self, pack: Path, monkeypatch: pytest.MonkeyPatch
269 ) -> None:
270 """If huggingface_hub is unimportable, push_hf raises SinkError.
271
272 The pragma'd fallback path in the sink catches ImportError and
273 translates to SinkError. We simulate the missing import via
274 sys.modules injection, then call through.
275 """
276 # Capture the raised error type; the sink wraps into SinkError.
277 import sys
278
279 # Force the `from huggingface_hub import ...` to raise ImportError.
280 monkeypatch.setitem(sys.modules, "huggingface_hub", None)
281 _ = hf_sink # silence unused-import lint
282 with pytest.raises((SinkError, ImportError)):
283 push_hf(pack, "user/myadapter")