@@ -123,6 +123,53 @@ class TestPromptCollapse: |
| 123 | 123 | # Fast decay → short half-life → fail against 500-token threshold. |
| 124 | 124 | assert result.verdict == Verdict.FAIL |
| 125 | 125 | |
| 126 | + def test_tokenizer_aware_stuffing_uses_pad_token(self) -> None: |
| 127 | + """B13: when a tokenizer is supplied, the stuffing is built from |
| 128 | + the model's pad/unk token, not the hardcoded English string.""" |
| 129 | + from dlm_sway.probes.prompt_collapse import _stuffing |
| 130 | + |
| 131 | + class _FakeTokenizer: |
| 132 | + pad_token = "<pad>" |
| 133 | + |
| 134 | + def encode(self, text: str) -> list[int]: |
| 135 | + # 1 id per character of text — simple enough to verify length. |
| 136 | + return [1] * len(text) |
| 137 | + |
| 138 | + def decode(self, ids: list[int], *, skip_special_tokens: bool = False) -> str: |
| 139 | + del skip_special_tokens |
| 140 | + return "<pad>" * len(ids) |
| 141 | + |
| 142 | + out = _stuffing(50, tokenizer=_FakeTokenizer()) |
| 143 | + # No English noise from the legacy fallback. |
| 144 | + assert "archived for historical record" not in out |
| 145 | + assert "<pad>" in out |
| 146 | + |
| 147 | + def test_legacy_path_used_when_no_tokenizer(self) -> None: |
| 148 | + """The default ``_stuffing(n)`` (no tokenizer) returns the legacy English.""" |
| 149 | + from dlm_sway.probes.prompt_collapse import _stuffing |
| 150 | + |
| 151 | + out = _stuffing(50) |
| 152 | + assert "archived for historical record" in out |
| 153 | + |
| 154 | + def test_legacy_stuffing_spec_field_forces_english(self) -> None: |
| 155 | + """``legacy_stuffing=True`` opts out of the tokenizer path.""" |
| 156 | + backend = _programmed_backend(0.001) |
| 157 | + probe, spec = build_probe( |
| 158 | + { |
| 159 | + "name": "pc", |
| 160 | + "kind": "prompt_collapse", |
| 161 | + "prompts": ["q1"], |
| 162 | + "context_lengths": [0, 256], |
| 163 | + "assert_half_life_tokens": 0, |
| 164 | + "legacy_stuffing": True, |
| 165 | + } |
| 166 | + ) |
| 167 | + # Even if the dummy backend grew a tokenizer, this spec wouldn't |
| 168 | + # use it. Smoke: probe runs end-to-end. |
| 169 | + ctx = RunContext(backend=backend) |
| 170 | + result = probe.run(spec, ctx) |
| 171 | + assert result.verdict in (Verdict.PASS, Verdict.FAIL) |
| 172 | + |
| 126 | 173 | def test_error_on_empty_prompts(self) -> None: |
| 127 | 174 | probe, spec = build_probe( |
| 128 | 175 | { |