tenseleyflow/documentlanguagemodel / 8a9ac8c

Browse files

test: slow+online tiny-model smoke so CI slow-tests job runs real work (audit-02 M3)

Authored by espadonne
SHA
8a9ac8c51a7fb9a645fee906e47001ceb5be66b0
Parents
d05d987
Tree
74a9d3c

1 changed file

StatusFile+-
A tests/integration/test_tiny_model_smoke.py 46 0
tests/integration/test_tiny_model_smoke.pyadded
@@ -0,0 +1,46 @@
1
+"""Minimal `slow` + `online` sanity check on the tiny-model fixture.
2
+
3
+Placeholder until Sprint 09 lands the real LoRA-loop assertions (audit 02
4
+M3). What this guards today:
5
+
6
+- The cache-and-pre-warm CI step actually downloads SmolLM2-135M.
7
+- The downloaded cache contains a tokenizer with a chat template.
8
+- Subsequent `tiny_model_dir` invocations are a no-op (cache hit).
9
+
10
+Scope is deliberately narrow so the slow-tests CI job has *something*
11
+non-trivial to exercise.
12
+"""
13
+
14
+from __future__ import annotations
15
+
16
+from pathlib import Path
17
+
18
+import pytest
19
+
20
+
21
+@pytest.mark.slow
22
+@pytest.mark.online
23
+def test_tiny_model_dir_yields_usable_tokenizer(tiny_model_dir: Path) -> None:
24
+    # Import inside the test so collection stays cheap for the fast
25
+    # subset (transformers import is ~seconds).
26
+    from transformers import AutoTokenizer
27
+
28
+    tokenizer = AutoTokenizer.from_pretrained(str(tiny_model_dir))
29
+    assert tokenizer.chat_template, (
30
+        "SmolLM2-135M-Instruct must ship a chat template out of the box; "
31
+        "absence here signals a broken cache or an upstream revision drift"
32
+    )
33
+    # Basic encode roundtrip (no chat template applied — just vocab sanity).
34
+    ids = tokenizer.encode("hello world", add_special_tokens=False)
35
+    assert ids
36
+    assert tokenizer.decode(ids).strip() == "hello world"
37
+
38
+
39
+@pytest.mark.slow
40
+@pytest.mark.online
41
+def test_tiny_model_dir_cache_hit_is_fast(tiny_model_dir: Path) -> None:
42
+    """Second call should resolve from the session cache."""
43
+    from tests.fixtures.tiny_model import tiny_model_path
44
+
45
+    again = tiny_model_path()
46
+    assert again == tiny_model_dir