@@ -0,0 +1,71 @@ |
| 1 | +"""F08 + audit stronger-test #12: cross-process determinism. |
| 2 | + |
| 3 | +The ``_NullView`` RNG used ``hash(prompt)`` as part of its seed. Python |
| 4 | +salts the built-in ``hash()`` per interpreter invocation via |
| 5 | +``PYTHONHASHSEED``, which meant the null-calibration output was |
| 6 | +process-salted even for a fixed ``(seed, prompt)`` pair. The dummy |
| 7 | +backend's null-stats disk cache could serve stale values on a restart |
| 8 | +after any CI run that set a different ``PYTHONHASHSEED``. |
| 9 | + |
| 10 | +This test runs the same null-calibration call in two subprocesses with |
| 11 | +explicitly different ``PYTHONHASHSEED`` values and asserts the |
| 12 | +resulting TokenDist logprobs are byte-identical — the stable-seed fix |
| 13 | +in concrete form. Before the fix this test fails in seconds; after, it |
| 14 | +passes. |
| 15 | +""" |
| 16 | + |
| 17 | +from __future__ import annotations |
| 18 | + |
| 19 | +import json |
| 20 | +import os |
| 21 | +import subprocess |
| 22 | +import sys |
| 23 | +from pathlib import Path |
| 24 | + |
| 25 | +REPO_ROOT = Path(__file__).resolve().parents[2] |
| 26 | + |
| 27 | + |
| 28 | +_WORKER = r""" |
| 29 | +import json |
| 30 | +import sys |
| 31 | + |
| 32 | +import numpy as np |
| 33 | + |
| 34 | +from dlm_sway.backends.dummy import DummyResponses, _NullView |
| 35 | + |
| 36 | +view = _NullView( |
| 37 | + base_responses=DummyResponses(), |
| 38 | + seed=42, |
| 39 | + init_scale=0.02, |
| 40 | + rank_scale=1.0, |
| 41 | +) |
| 42 | +dist = view.next_token_dist("cross-process-determinism") |
| 43 | +payload = {"logprobs": dist.logprobs.astype(np.float64).tolist()} |
| 44 | +json.dump(payload, sys.stdout) |
| 45 | +""" |
| 46 | + |
| 47 | + |
| 48 | +def _run_worker(pythonhashseed: str) -> list[float]: |
| 49 | + env = os.environ.copy() |
| 50 | + env["PYTHONHASHSEED"] = pythonhashseed |
| 51 | + result = subprocess.run( # noqa: S603 — controlled local invocation |
| 52 | + [sys.executable, "-c", _WORKER], |
| 53 | + check=True, |
| 54 | + capture_output=True, |
| 55 | + text=True, |
| 56 | + env=env, |
| 57 | + cwd=REPO_ROOT, |
| 58 | + ) |
| 59 | + payload = json.loads(result.stdout) |
| 60 | + return list(payload["logprobs"]) |
| 61 | + |
| 62 | + |
| 63 | +def test_null_view_is_invariant_under_pythonhashseed() -> None: |
| 64 | + """Same seed + same prompt + different PYTHONHASHSEED → identical |
| 65 | + logprobs.""" |
| 66 | + logprobs_1 = _run_worker(pythonhashseed="1") |
| 67 | + logprobs_2 = _run_worker(pythonhashseed="99991") |
| 68 | + assert logprobs_1 == logprobs_2, ( |
| 69 | + "dummy _NullView RNG stream drifted across PYTHONHASHSEED values — " |
| 70 | + "the stable-seed fix in backends/dummy.py (F08) regressed." |
| 71 | + ) |