tenseleyflow/sway / e79f3d7

Browse files

tests/fixtures/tiny_model: tenacity-backed snapshot_download retry (F03)

Authored by espadonne
SHA
e79f3d701c7a6fb1efe6e5462c8b28e3b7da1a0d
Parents
3134089
Tree
d8c2c79

1 changed file

StatusFile+-
M tests/fixtures/tiny_model.py 43 3
tests/fixtures/tiny_model.pymodified
@@ -25,6 +25,48 @@ def _offline_mode() -> bool:
25
     return os.environ.get("SWAY_OFFLINE", "0") == "1"
25
     return os.environ.get("SWAY_OFFLINE", "0") == "1"
26
 
26
 
27
 
27
 
28
+def _snapshot_download_with_retry(**kwargs: object) -> str:
29
+    """``snapshot_download`` wrapped with exponential-backoff retry.
30
+
31
+    F03 (Audit 03) observed an integration-lane macOS run that hung
32
+    20+ minutes inside ``snapshot_download``'s cache-resolution path
33
+    after HF Hub connectivity briefly dropped. A silent stall is the
34
+    worst UX: the job times out with zero test output and no
35
+    actionable error. The retry wrapper turns a transient network
36
+    blip into a 5s-10s-20s back-off and a final timeout-ish failure
37
+    that surfaces cleanly.
38
+
39
+    Each attempt is hard-capped by ``etag_timeout`` + a per-attempt
40
+    overall timeout so no single call can burn the test budget. The
41
+    retry policy runs at most 3 attempts with jittered exponential
42
+    backoff.
43
+    """
44
+    from huggingface_hub import snapshot_download
45
+    from tenacity import (
46
+        Retrying,
47
+        retry_if_exception_type,
48
+        stop_after_attempt,
49
+        wait_exponential,
50
+    )
51
+
52
+    retry_types: tuple[type[BaseException], ...] = (OSError, RuntimeError)
53
+    for attempt in Retrying(
54
+        stop=stop_after_attempt(3),
55
+        wait=wait_exponential(multiplier=5, min=5, max=30),
56
+        retry=retry_if_exception_type(retry_types),
57
+        reraise=True,
58
+    ):
59
+        with attempt:
60
+            # ``etag_timeout`` bounds the per-file head/etag probe
61
+            # (10 s is generous; 120s default is the real hang risk).
62
+            result: str = snapshot_download(etag_timeout=10, **kwargs)  # type: ignore[arg-type]
63
+            return result
64
+    # ``reraise=True`` means the Retrying loop always either returns
65
+    # (above) or propagates the last exception — this line is
66
+    # unreachable, but keeps mypy happy with a pointed sentinel.
67
+    raise RuntimeError("snapshot_download retry loop exhausted without a return")
68
+
69
+
28
 @pytest.fixture(scope="session")
70
 @pytest.fixture(scope="session")
29
 def tiny_model_dir(tmp_path_factory: pytest.TempPathFactory) -> Iterator[Path]:
71
 def tiny_model_dir(tmp_path_factory: pytest.TempPathFactory) -> Iterator[Path]:
30
     """Download (or reuse) the tiny model; yield the cached directory.
72
     """Download (or reuse) the tiny model; yield the cached directory.
@@ -33,15 +75,13 @@ def tiny_model_dir(tmp_path_factory: pytest.TempPathFactory) -> Iterator[Path]:
33
     env vars are cleared inside this fixture so ``snapshot_download``
75
     env vars are cleared inside this fixture so ``snapshot_download``
34
     actually fetches.
76
     actually fetches.
35
     """
77
     """
36
-    from huggingface_hub import snapshot_download
37
-
38
     # Clear offline env guards (set by the unit-test autouse fixture).
78
     # Clear offline env guards (set by the unit-test autouse fixture).
39
     prior = {
79
     prior = {
40
         k: os.environ.pop(k, None)
80
         k: os.environ.pop(k, None)
41
         for k in ("HF_HUB_OFFLINE", "TRANSFORMERS_OFFLINE", "HF_DATASETS_OFFLINE")
81
         for k in ("HF_HUB_OFFLINE", "TRANSFORMERS_OFFLINE", "HF_DATASETS_OFFLINE")
42
     }
82
     }
43
     try:
83
     try:
44
-        path = snapshot_download(
84
+        path = _snapshot_download_with_retry(
45
             repo_id=TINY_MODEL_HF_ID,
85
             repo_id=TINY_MODEL_HF_ID,
46
             revision=TINY_MODEL_REVISION,
86
             revision=TINY_MODEL_REVISION,
47
             local_files_only=_offline_mode(),
87
             local_files_only=_offline_mode(),