| 1 | """F08 + audit stronger-test #12: cross-process determinism. |
| 2 | |
| 3 | The ``_NullView`` RNG used ``hash(prompt)`` as part of its seed. Python |
| 4 | salts the built-in ``hash()`` per interpreter invocation via |
| 5 | ``PYTHONHASHSEED``, which meant the null-calibration output was |
| 6 | process-salted even for a fixed ``(seed, prompt)`` pair. The dummy |
| 7 | backend's null-stats disk cache could serve stale values on a restart |
| 8 | after any CI run that set a different ``PYTHONHASHSEED``. |
| 9 | |
| 10 | This test runs the same null-calibration call in two subprocesses with |
| 11 | explicitly different ``PYTHONHASHSEED`` values and asserts the |
| 12 | resulting TokenDist logprobs are byte-identical — the stable-seed fix |
| 13 | in concrete form. Before the fix this test fails in seconds; after, it |
| 14 | passes. |
| 15 | """ |
| 16 | |
| 17 | from __future__ import annotations |
| 18 | |
| 19 | import json |
| 20 | import os |
| 21 | import subprocess |
| 22 | import sys |
| 23 | from pathlib import Path |
| 24 | |
| 25 | REPO_ROOT = Path(__file__).resolve().parents[2] |
| 26 | |
| 27 | |
| 28 | _WORKER = r""" |
| 29 | import json |
| 30 | import sys |
| 31 | |
| 32 | import numpy as np |
| 33 | |
| 34 | from dlm_sway.backends.dummy import DummyResponses, _NullView |
| 35 | |
| 36 | view = _NullView( |
| 37 | base_responses=DummyResponses(), |
| 38 | seed=42, |
| 39 | init_scale=0.02, |
| 40 | rank_scale=1.0, |
| 41 | ) |
| 42 | dist = view.next_token_dist("cross-process-determinism") |
| 43 | payload = {"logprobs": dist.logprobs.astype(np.float64).tolist()} |
| 44 | json.dump(payload, sys.stdout) |
| 45 | """ |
| 46 | |
| 47 | |
| 48 | def _run_worker(pythonhashseed: str) -> list[float]: |
| 49 | env = os.environ.copy() |
| 50 | env["PYTHONHASHSEED"] = pythonhashseed |
| 51 | result = subprocess.run( # noqa: S603 — controlled local invocation |
| 52 | [sys.executable, "-c", _WORKER], |
| 53 | check=True, |
| 54 | capture_output=True, |
| 55 | text=True, |
| 56 | env=env, |
| 57 | cwd=REPO_ROOT, |
| 58 | ) |
| 59 | payload = json.loads(result.stdout) |
| 60 | return list(payload["logprobs"]) |
| 61 | |
| 62 | |
| 63 | def test_null_view_is_invariant_under_pythonhashseed() -> None: |
| 64 | """Same seed + same prompt + different PYTHONHASHSEED → identical |
| 65 | logprobs.""" |
| 66 | logprobs_1 = _run_worker(pythonhashseed="1") |
| 67 | logprobs_2 = _run_worker(pythonhashseed="99991") |
| 68 | assert logprobs_1 == logprobs_2, ( |
| 69 | "dummy _NullView RNG stream drifted across PYTHONHASHSEED values — " |
| 70 | "the stable-seed fix in backends/dummy.py (F08) regressed." |
| 71 | ) |