tenseleyflow/sway / c8e3bf3

Browse files

tests: subprocess determinism test — pins F08 + stronger-test #12

Authored by espadonne
SHA
c8e3bf3834a88ed3b2713c6844e5d7f1d66879e8
Parents
1f7bf7c
Tree
03a5d41

1 changed file

StatusFile+-
A tests/unit/test_cross_process_determinism.py 71 0
tests/unit/test_cross_process_determinism.pyadded
@@ -0,0 +1,71 @@
1
+"""F08 + audit stronger-test #12: cross-process determinism.
2
+
3
+The ``_NullView`` RNG used ``hash(prompt)`` as part of its seed. Python
4
+salts the built-in ``hash()`` per interpreter invocation via
5
+``PYTHONHASHSEED``, which meant the null-calibration output was
6
+process-salted even for a fixed ``(seed, prompt)`` pair. The dummy
7
+backend's null-stats disk cache could serve stale values on a restart
8
+after any CI run that set a different ``PYTHONHASHSEED``.
9
+
10
+This test runs the same null-calibration call in two subprocesses with
11
+explicitly different ``PYTHONHASHSEED`` values and asserts the
12
+resulting TokenDist logprobs are byte-identical — the stable-seed fix
13
+in concrete form. Before the fix this test fails in seconds; after, it
14
+passes.
15
+"""
16
+
17
+from __future__ import annotations
18
+
19
+import json
20
+import os
21
+import subprocess
22
+import sys
23
+from pathlib import Path
24
+
25
+REPO_ROOT = Path(__file__).resolve().parents[2]
26
+
27
+
28
+_WORKER = r"""
29
+import json
30
+import sys
31
+
32
+import numpy as np
33
+
34
+from dlm_sway.backends.dummy import DummyResponses, _NullView
35
+
36
+view = _NullView(
37
+    base_responses=DummyResponses(),
38
+    seed=42,
39
+    init_scale=0.02,
40
+    rank_scale=1.0,
41
+)
42
+dist = view.next_token_dist("cross-process-determinism")
43
+payload = {"logprobs": dist.logprobs.astype(np.float64).tolist()}
44
+json.dump(payload, sys.stdout)
45
+"""
46
+
47
+
48
+def _run_worker(pythonhashseed: str) -> list[float]:
49
+    env = os.environ.copy()
50
+    env["PYTHONHASHSEED"] = pythonhashseed
51
+    result = subprocess.run(  # noqa: S603 — controlled local invocation
52
+        [sys.executable, "-c", _WORKER],
53
+        check=True,
54
+        capture_output=True,
55
+        text=True,
56
+        env=env,
57
+        cwd=REPO_ROOT,
58
+    )
59
+    payload = json.loads(result.stdout)
60
+    return list(payload["logprobs"])
61
+
62
+
63
+def test_null_view_is_invariant_under_pythonhashseed() -> None:
64
+    """Same seed + same prompt + different PYTHONHASHSEED → identical
65
+    logprobs."""
66
+    logprobs_1 = _run_worker(pythonhashseed="1")
67
+    logprobs_2 = _run_worker(pythonhashseed="99991")
68
+    assert logprobs_1 == logprobs_2, (
69
+        "dummy _NullView RNG stream drifted across PYTHONHASHSEED values — "
70
+        "the stable-seed fix in backends/dummy.py (F08) regressed."
71
+    )