Python · 3477 bytes Raw Blame History
1 """Tiny-model fixture for integration tests.
2
3 Mirrors ``dlm.tests.fixtures.tiny_model``: session-scoped snapshot of
4 SmolLM2-135M-Instruct, reused across the whole test run. The model is
5 small enough (~280 MB on disk, ~600 MB in fp32 VRAM) to make integration
6 tests feasible in CI.
7
8 Tests using this fixture must carry ``@pytest.mark.slow`` and
9 ``@pytest.mark.online`` — the default test selection excludes both.
10 """
11
12 from __future__ import annotations
13
14 import os
15 from collections.abc import Iterator
16 from pathlib import Path
17
18 import pytest
19
20 TINY_MODEL_HF_ID = "HuggingFaceTB/SmolLM2-135M-Instruct"
21 TINY_MODEL_REVISION = os.environ.get("DLM_SWAY_TINY_MODEL_REVISION", "main")
22
23
24 def _offline_mode() -> bool:
25 return os.environ.get("SWAY_OFFLINE", "0") == "1"
26
27
28 def _snapshot_download_with_retry(**kwargs: object) -> str:
29 """``snapshot_download`` wrapped with exponential-backoff retry.
30
31 F03 (Audit 03) observed an integration-lane macOS run that hung
32 20+ minutes inside ``snapshot_download``'s cache-resolution path
33 after HF Hub connectivity briefly dropped. A silent stall is the
34 worst UX: the job times out with zero test output and no
35 actionable error. The retry wrapper turns a transient network
36 blip into a 5s-10s-20s back-off and a final timeout-ish failure
37 that surfaces cleanly.
38
39 Each attempt is hard-capped by ``etag_timeout`` + a per-attempt
40 overall timeout so no single call can burn the test budget. The
41 retry policy runs at most 3 attempts with jittered exponential
42 backoff.
43 """
44 from huggingface_hub import snapshot_download
45 from tenacity import (
46 Retrying,
47 retry_if_exception_type,
48 stop_after_attempt,
49 wait_exponential,
50 )
51
52 retry_types: tuple[type[BaseException], ...] = (OSError, RuntimeError)
53 for attempt in Retrying(
54 stop=stop_after_attempt(3),
55 wait=wait_exponential(multiplier=5, min=5, max=30),
56 retry=retry_if_exception_type(retry_types),
57 reraise=True,
58 ):
59 with attempt:
60 # ``etag_timeout`` bounds the per-file head/etag probe
61 # (10 s is generous; 120s default is the real hang risk).
62 result: str = snapshot_download(etag_timeout=10, **kwargs) # type: ignore[arg-type]
63 return result
64 # ``reraise=True`` means the Retrying loop always either returns
65 # (above) or propagates the last exception — this line is
66 # unreachable, but keeps mypy happy with a pointed sentinel.
67 raise RuntimeError("snapshot_download retry loop exhausted without a return")
68
69
70 @pytest.fixture(scope="session")
71 def tiny_model_dir(tmp_path_factory: pytest.TempPathFactory) -> Iterator[Path]:
72 """Download (or reuse) the tiny model; yield the cached directory.
73
74 Test opts in via ``@pytest.mark.online`` — the session-wide offline
75 env vars are cleared inside this fixture so ``snapshot_download``
76 actually fetches.
77 """
78 # Clear offline env guards (set by the unit-test autouse fixture).
79 prior = {
80 k: os.environ.pop(k, None)
81 for k in ("HF_HUB_OFFLINE", "TRANSFORMERS_OFFLINE", "HF_DATASETS_OFFLINE")
82 }
83 try:
84 path = _snapshot_download_with_retry(
85 repo_id=TINY_MODEL_HF_ID,
86 revision=TINY_MODEL_REVISION,
87 local_files_only=_offline_mode(),
88 )
89 yield Path(path)
90 finally:
91 for k, v in prior.items():
92 if v is not None:
93 os.environ[k] = v