@@ -38,15 +38,25 @@ def _dist_from_probs(probs: list[float]) -> TokenDist: |
| 38 | 38 | |
| 39 | 39 | class TestMineOutliers: |
| 40 | 40 | def test_ranks_prompts_by_per_prompt_divergence(self) -> None: |
| 41 | | - """Three prompts with planted divergences: ``hi`` has the |
| 42 | | - biggest gap, ``lo`` the smallest. Top-1 = hi, bottom-1 = lo.""" |
| 41 | + """Six prompts with planted divergences: ``hi*`` have the biggest |
| 42 | + gap, ``lo*`` the smallest, ``mid*`` in between. Top-K = hi rows, |
| 43 | + bottom-K = lo rows.""" |
| 43 | 44 | base = _dist_from_probs([0.92, 0.02, 0.02, 0.02, 0.02]) |
| 44 | 45 | ft_flat = _dist_from_probs([0.25, 0.20, 0.20, 0.20, 0.15]) # big KL |
| 45 | 46 | ft_mild = _dist_from_probs([0.70, 0.10, 0.10, 0.05, 0.05]) # mid KL |
| 46 | 47 | ft_same = base # zero KL |
| 47 | 48 | |
| 48 | | - base_dists = {"hi": base, "mid": base, "lo": base} |
| 49 | | - ft_dists = {"hi": ft_flat, "mid": ft_mild, "lo": ft_same} |
| 49 | + # F04 — need ≥ 2·top_k=4 distinct prompts to clear the guard. |
| 50 | + prompts = ["hi1", "hi2", "mid1", "mid2", "lo1", "lo2"] |
| 51 | + base_dists = dict.fromkeys(prompts, base) |
| 52 | + ft_dists = { |
| 53 | + "hi1": ft_flat, |
| 54 | + "hi2": ft_flat, |
| 55 | + "mid1": ft_mild, |
| 56 | + "mid2": ft_mild, |
| 57 | + "lo1": ft_same, |
| 58 | + "lo2": ft_same, |
| 59 | + } |
| 50 | 60 | backend = DummyDifferentialBackend( |
| 51 | 61 | base=DummyResponses(token_dists=base_dists), |
| 52 | 62 | ft=DummyResponses(token_dists=ft_dists), |
@@ -54,37 +64,64 @@ class TestMineOutliers: |
| 54 | 64 | |
| 55 | 65 | result = mine_outliers( |
| 56 | 66 | probe_kind="delta_kl", |
| 57 | | - candidate_prompts=["hi", "mid", "lo"], |
| 67 | + candidate_prompts=prompts, |
| 58 | 68 | backend=backend, |
| 59 | | - top_k=3, |
| 69 | + top_k=2, |
| 60 | 70 | ) |
| 61 | 71 | |
| 62 | 72 | assert isinstance(result, OutlierResult) |
| 63 | 73 | assert result.probe_kind == "delta_kl" |
| 64 | | - # Top is ordered most-positive first. |
| 65 | | - assert [c.prompt for c in result.top] == ["hi", "mid", "lo"] |
| 66 | | - # Bottom is ordered least-positive first. |
| 67 | | - assert [c.prompt for c in result.bottom] == ["lo", "mid", "hi"] |
| 74 | + # Top is most-positive first; bottom is least-positive first. |
| 75 | + top_prompts = {c.prompt for c in result.top} |
| 76 | + bottom_prompts = {c.prompt for c in result.bottom} |
| 77 | + assert top_prompts == {"hi1", "hi2"} |
| 78 | + assert bottom_prompts == {"lo1", "lo2"} |
| 68 | 79 | # Raw values are finite and positive (JS divergence ≥ 0). |
| 69 | 80 | for c in result.top: |
| 70 | 81 | assert math.isfinite(c.raw) |
| 71 | 82 | assert c.raw >= 0.0 |
| 72 | 83 | |
| 73 | | - def test_top_k_clipped_to_pool_size(self) -> None: |
| 84 | + def test_small_pool_raises_f04_guard(self) -> None: |
| 85 | + """F04 (Audit 03) — pool below ``2·top_k`` distinct prompts |
| 86 | + raises SwayError with an actionable hint. Replaces pre-F04 |
| 87 | + 'test_top_k_clipped_to_pool_size' which relied on the same |
| 88 | + degenerate single-prompt case the audit flagged as produced |
| 89 | + top=[p], bottom=[p] — identical lists.""" |
| 90 | + from dlm_sway.core.errors import SwayError |
| 91 | + |
| 74 | 92 | base = _dist_from_probs([0.92, 0.02, 0.02, 0.02, 0.02]) |
| 75 | 93 | ft = _dist_from_probs([0.25, 0.20, 0.20, 0.20, 0.15]) |
| 76 | 94 | backend = DummyDifferentialBackend( |
| 77 | 95 | base=DummyResponses(token_dists={"p": base}), |
| 78 | 96 | ft=DummyResponses(token_dists={"p": ft}), |
| 79 | 97 | ) |
| 80 | | - result = mine_outliers( |
| 81 | | - probe_kind="delta_kl", |
| 82 | | - candidate_prompts=["p"], |
| 83 | | - backend=backend, |
| 84 | | - top_k=10, |
| 98 | + with pytest.raises(SwayError, match="below the 2·top_k"): |
| 99 | + mine_outliers( |
| 100 | + probe_kind="delta_kl", |
| 101 | + candidate_prompts=["p"], |
| 102 | + backend=backend, |
| 103 | + top_k=10, |
| 104 | + ) |
| 105 | + |
| 106 | + def test_small_pool_error_suggests_smaller_top_k(self) -> None: |
| 107 | + """The error message includes a concrete ``--top-k N`` hint the |
| 108 | + user can copy into their CLI invocation.""" |
| 109 | + from dlm_sway.core.errors import SwayError |
| 110 | + |
| 111 | + base = _dist_from_probs([0.92, 0.02, 0.02, 0.02, 0.02]) |
| 112 | + ft = _dist_from_probs([0.25, 0.20, 0.20, 0.20, 0.15]) |
| 113 | + prompts = ["p1", "p2", "p3"] |
| 114 | + backend = DummyDifferentialBackend( |
| 115 | + base=DummyResponses(token_dists=dict.fromkeys(prompts, base)), |
| 116 | + ft=DummyResponses(token_dists=dict.fromkeys(prompts, ft)), |
| 85 | 117 | ) |
| 86 | | - assert len(result.top) == 1 |
| 87 | | - assert len(result.bottom) == 1 |
| 118 | + with pytest.raises(SwayError, match="Pass --top-k 1"): |
| 119 | + mine_outliers( |
| 120 | + probe_kind="delta_kl", |
| 121 | + candidate_prompts=prompts, |
| 122 | + backend=backend, |
| 123 | + top_k=5, |
| 124 | + ) |
| 88 | 125 | |
| 89 | 126 | def test_empty_pool_returns_empty_result(self) -> None: |
| 90 | 127 | backend = DummyDifferentialBackend(base=DummyResponses(), ft=DummyResponses()) |
@@ -99,8 +136,9 @@ class TestMineOutliers: |
| 99 | 136 | |
| 100 | 137 | def test_unsupported_probe_kind_returns_empty(self) -> None: |
| 101 | 138 | """Probes that need a non-``prompts`` spec (leakage, etc.) skip |
| 102 | | - every candidate silently. S17 scope is delta_kl; other probes |
| 103 | | - are documented as future work.""" |
| 139 | + every candidate silently. The F04 floor doesn't fire in that |
| 140 | + case because the scored list is empty — empty-result path |
| 141 | + preserved for the unsupported-kind UX.""" |
| 104 | 142 | base = _dist_from_probs([0.92, 0.02, 0.02, 0.02, 0.02]) |
| 105 | 143 | ft = _dist_from_probs([0.25, 0.20, 0.20, 0.20, 0.15]) |
| 106 | 144 | backend = DummyDifferentialBackend( |