Python · 2181 bytes Raw Blame History
1 """F08 + audit stronger-test #12: cross-process determinism.
2
3 The ``_NullView`` RNG used ``hash(prompt)`` as part of its seed. Python
4 salts the built-in ``hash()`` per interpreter invocation via
5 ``PYTHONHASHSEED``, which meant the null-calibration output was
6 process-salted even for a fixed ``(seed, prompt)`` pair. The dummy
7 backend's null-stats disk cache could serve stale values on a restart
8 after any CI run that set a different ``PYTHONHASHSEED``.
9
10 This test runs the same null-calibration call in two subprocesses with
11 explicitly different ``PYTHONHASHSEED`` values and asserts the
12 resulting TokenDist logprobs are byte-identical — the stable-seed fix
13 in concrete form. Before the fix this test fails in seconds; after, it
14 passes.
15 """
16
17 from __future__ import annotations
18
19 import json
20 import os
21 import subprocess
22 import sys
23 from pathlib import Path
24
25 REPO_ROOT = Path(__file__).resolve().parents[2]
26
27
28 _WORKER = r"""
29 import json
30 import sys
31
32 import numpy as np
33
34 from dlm_sway.backends.dummy import DummyResponses, _NullView
35
36 view = _NullView(
37 base_responses=DummyResponses(),
38 seed=42,
39 init_scale=0.02,
40 rank_scale=1.0,
41 )
42 dist = view.next_token_dist("cross-process-determinism")
43 payload = {"logprobs": dist.logprobs.astype(np.float64).tolist()}
44 json.dump(payload, sys.stdout)
45 """
46
47
48 def _run_worker(pythonhashseed: str) -> list[float]:
49 env = os.environ.copy()
50 env["PYTHONHASHSEED"] = pythonhashseed
51 result = subprocess.run( # noqa: S603 — controlled local invocation
52 [sys.executable, "-c", _WORKER],
53 check=True,
54 capture_output=True,
55 text=True,
56 env=env,
57 cwd=REPO_ROOT,
58 )
59 payload = json.loads(result.stdout)
60 return list(payload["logprobs"])
61
62
63 def test_null_view_is_invariant_under_pythonhashseed() -> None:
64 """Same seed + same prompt + different PYTHONHASHSEED → identical
65 logprobs."""
66 logprobs_1 = _run_worker(pythonhashseed="1")
67 logprobs_2 = _run_worker(pythonhashseed="99991")
68 assert logprobs_1 == logprobs_2, (
69 "dummy _NullView RNG stream drifted across PYTHONHASHSEED values — "
70 "the stable-seed fix in backends/dummy.py (F08) regressed."
71 )