sway Public

Watch 0 Fork 0 Star 0

Python · 12027 bytes Raw Blame History

  
        1
        """Quality-of-output tests for the autogen YAML.
      
        2
        
        3
        The audit's B8 finding was that ``style_fingerprint`` got the leading
      
        4
        sentence of a prose section as its prompt — which elicits doc
      
        5
        *continuation* (a content probe), not stylistic voice. Sprint 05
      
        6
        replaces that with a fixed list of stylistic-elicitation prompts. This
      
        7
        file pins the new contract.
      
        8
        """
      
        9
        
        10
        from __future__ import annotations
      
        11
        
        12
        from pathlib import Path
      
        13
        
        14
        from dlm_sway.core.sections import Section
      
        15
        from dlm_sway.integrations.dlm.autogen import (
      
        16
            _STYLE_ELICITATION_PROMPTS,
      
        17
            build_spec_dict,
      
        18
        )
      
        19
        from dlm_sway.integrations.dlm.resolver import DlmHandle
      
        20
        
        21
        
        22
        def _handle_with_prose_first_sentence() -> DlmHandle:
      
        23
            """A handle whose only prose section starts with a strong, doc-specific
      
        24
            opener — the kind of sentence that, under the old heuristic, would
      
        25
            have leaked into the style probe."""
      
        26
            sections = (
      
        27
                Section(
      
        28
                    id="s1",
      
        29
                    kind="prose",
      
        30
                    content=(
      
        31
                        "The mitochondrion is the powerhouse of the cell. "
      
        32
                        "It generates ATP via oxidative phosphorylation. "
      
        33
                        "Inner-membrane folds called cristae increase surface area."
      
        34
                    ),
      
        35
                ),
      
        36
            )
      
        37
            return DlmHandle(
      
        38
                dlm_id="x",
      
        39
                base_model="HuggingFaceTB/SmolLM2-135M-Instruct",
      
        40
                adapter_path=Path("/tmp/adapter"),
      
        41
                sections=sections,
      
        42
                doc_text="whole document",
      
        43
            )
      
        44
        
        45
        
        46
        def test_style_prompts_use_elicitation_set_not_doc_content() -> None:
      
        47
            """B8: style_fingerprint prompts come from the fixed elicitation set."""
      
        48
            spec = build_spec_dict(_handle_with_prose_first_sentence())
      
        49
            style_entry = next((e for e in spec["suite"] if e["kind"] == "style_fingerprint"), None)
      
        50
            assert style_entry is not None, "autogen should emit a style_fingerprint entry"
      
        51
            style_prompts = style_entry["prompts"]
      
        52
            # Every prompt comes from the elicitation set.
      
        53
            assert set(style_prompts) <= set(_STYLE_ELICITATION_PROMPTS)
      
        54
            # No prompt smells like the leading prose sentence.
      
        55
            assert not any("mitochondrion" in p.lower() for p in style_prompts)
      
        56
            assert not any("powerhouse" in p.lower() for p in style_prompts)
      
        57
        
        58
        
        59
        def test_style_prompts_nonempty_even_without_prose() -> None:
      
        60
            """The fixed list means the probe always has something to ask the model."""
      
        61
            sections = (Section(id="i1", kind="instruction", content="What is X? X is Y.", probes=()),)
      
        62
            handle = DlmHandle(
      
        63
                dlm_id="x",
      
        64
                base_model="b",
      
        65
                adapter_path=Path("/tmp/a"),
      
        66
                sections=sections,
      
        67
                doc_text=None,
      
        68
            )
      
        69
            spec = build_spec_dict(handle)
      
        70
            style_entry = next((e for e in spec["suite"] if e["kind"] == "style_fingerprint"), None)
      
        71
            assert style_entry is not None
      
        72
            assert len(style_entry["prompts"]) >= 4
      
        73
        
        74
        
        75
        def test_elicitation_prompts_are_open_ended() -> None:
      
        76
            """A sanity check on the constant itself: each prompt invites prose,
      
        77
            not a single-token completion."""
      
        78
            for prompt in _STYLE_ELICITATION_PROMPTS:
      
        79
                assert len(prompt) >= 30, f"prompt too short to elicit prose: {prompt!r}"
      
        80
                assert prompt.endswith(".")
      
        81
        
        82
        
        83
        def _handle_with_many_instruction_probes(n: int) -> DlmHandle:
      
        84
            """A handle rigged to produce at least ``n`` distinct instruction
      
        85
            prompts (used to clear ``cluster_kl``'s 20-prompt floor)."""
      
        86
            from dlm_sway.core.sections import SectionProbe
      
        87
        
        88
            probes = tuple(SectionProbe(prompt=f"Q{i}: what is topic {i}?", gold=f"A{i}") for i in range(n))
      
        89
            sections = (
      
        90
                Section(id="i1", kind="instruction", content="…", probes=probes),
      
        91
                Section(
      
        92
                    id="p1",
      
        93
                    kind="prose",
      
        94
                    content="Prose sentence one. Prose sentence two. Prose sentence three.",
      
        95
                ),
      
        96
            )
      
        97
            return DlmHandle(
      
        98
                dlm_id="x",
      
        99
                base_model="b",
      
        100
                adapter_path=Path("/tmp/a"),
      
        101
                sections=sections,
      
        102
                doc_text=None,
      
        103
            )
      
        104
        
        105
        
        106
        class TestSkippedProbesRollup:
      
        107
            """F07 (Audit 03) — ``_render_annotated_yaml`` prepends a
      
        108
            ``# skipped: <probe> (<reason>)`` block so users see which probes
      
        109
            the autogen intentionally omitted, without diffing this module's
      
        110
            docstring."""
      
        111
        
        112
            def test_prose_only_handle_omits_instruction_heavy_probes(self) -> None:
      
        113
                """A .dlm with only PROSE sections skips adapter_revert +
      
        114
                paraphrase_invariance + preference_flip + (with 1 section)
      
        115
                section_internalization."""
      
        116
                from dlm_sway.integrations.dlm.autogen import collect_skipped_probe_reasons
      
        117
        
        118
                handle = DlmHandle(
      
        119
                    dlm_id="x",
      
        120
                    base_model="b",
      
        121
                    adapter_path=Path("/tmp/a"),
      
        122
                    sections=(
      
        123
                        Section(
      
        124
                            id="s1",
      
        125
                            kind="prose",
      
        126
                            content="One paragraph of prose. Second sentence.",
      
        127
                        ),
      
        128
                    ),
      
        129
                    doc_text="doc",
      
        130
                )
      
        131
                skipped = collect_skipped_probe_reasons(handle)
      
        132
                skipped_kinds = {k for k, _ in skipped}
      
        133
                assert "adapter_revert" in skipped_kinds
      
        134
                assert "paraphrase_invariance" in skipped_kinds
      
        135
                assert "preference_flip" in skipped_kinds
      
        136
                assert "section_internalization" in skipped_kinds
      
        137
                # delta_kl should NOT be skipped — prose provides a fallback
      
        138
                # prompt pool.
      
        139
                assert "delta_kl" not in skipped_kinds
      
        140
        
        141
            def test_instruction_only_handle_omits_prose_heavy_probes(self) -> None:
      
        142
                """An instruction-only doc skips external_perplexity + leakage."""
      
        143
                from dlm_sway.core.sections import SectionProbe
      
        144
                from dlm_sway.integrations.dlm.autogen import collect_skipped_probe_reasons
      
        145
        
        146
                handle = DlmHandle(
      
        147
                    dlm_id="x",
      
        148
                    base_model="b",
      
        149
                    adapter_path=Path("/tmp/a"),
      
        150
                    sections=(
      
        151
                        Section(
      
        152
                            id="i1",
      
        153
                            kind="instruction",
      
        154
                            content="Q/A",
      
        155
                            probes=(SectionProbe(prompt="Q?", gold="A"),),
      
        156
                        ),
      
        157
                    ),
      
        158
                    doc_text=None,
      
        159
                )
      
        160
                skipped = collect_skipped_probe_reasons(handle)
      
        161
                skipped_kinds = {k for k, _ in skipped}
      
        162
                assert "external_perplexity" in skipped_kinds
      
        163
                assert "leakage" in skipped_kinds
      
        164
        
        165
            def test_rendered_yaml_carries_skipped_block(self, tmp_path: Path) -> None:
      
        166
                """End-to-end: on a minimal prose-only .dlm, the rendered YAML
      
        167
                header has the ``# skipped:`` lines."""
      
        168
                from dlm_sway.integrations.dlm.autogen import (
      
        169
                    _render_annotated_yaml,
      
        170
                    build_spec_dict,
      
        171
                    collect_skipped_probe_reasons,
      
        172
                )
      
        173
        
        174
                handle = DlmHandle(
      
        175
                    dlm_id="x",
      
        176
                    base_model="b",
      
        177
                    adapter_path=Path("/tmp/a"),
      
        178
                    sections=(Section(id="s1", kind="prose", content="Short prose."),),
      
        179
                    doc_text="doc",
      
        180
                )
      
        181
                dlm_path = tmp_path / "demo.dlm"
      
        182
                dlm_path.write_text("# empty")
      
        183
                spec = build_spec_dict(handle, dlm_source="demo.dlm")
      
        184
                skipped = collect_skipped_probe_reasons(handle)
      
        185
                rendered = _render_annotated_yaml(spec, handle, dlm_path, skipped=skipped)
      
        186
                assert "# skipped: adapter_revert" in rendered
      
        187
                assert "# skipped: preference_flip" in rendered
      
        188
                assert "(no " in rendered  # reasons start with "no ..."
      
        189
        
        190
            def test_rendered_yaml_omits_skipped_block_when_all_probes_fit(self) -> None:
      
        191
                """A heavily-populated doc that triggers every probe emits no
      
        192
                ``# skipped:`` lines."""
      
        193
                from dlm_sway.core.sections import SectionPreference, SectionProbe
      
        194
                from dlm_sway.integrations.dlm.autogen import (
      
        195
                    _render_annotated_yaml,
      
        196
                    build_spec_dict,
      
        197
                    collect_skipped_probe_reasons,
      
        198
                )
      
        199
        
        200
                probes = tuple(SectionProbe(prompt=f"Q{i}?", gold=f"A{i}") for i in range(25))
      
        201
                preferences = (SectionPreference(prompt="P1", chosen="good", rejected="bad"),)
      
        202
                handle = DlmHandle(
      
        203
                    dlm_id="x",
      
        204
                    base_model="b",
      
        205
                    adapter_path=Path("/tmp/a"),
      
        206
                    sections=(
      
        207
                        Section(id="i1", kind="instruction", content="Q/A", probes=probes),
      
        208
                        Section(
      
        209
                            id="p1",
      
        210
                            kind="prose",
      
        211
                            content="A first prose sentence. A second. A third.",
      
        212
                        ),
      
        213
                        Section(
      
        214
                            id="pref1",
      
        215
                            kind="preference",
      
        216
                            content="pref",
      
        217
                            preferences=preferences,
      
        218
                        ),
      
        219
                    ),
      
        220
                    doc_text="doc",
      
        221
                )
      
        222
                spec = build_spec_dict(handle)
      
        223
                skipped = collect_skipped_probe_reasons(handle)
      
        224
                rendered = _render_annotated_yaml(spec, handle, Path("/tmp/demo.dlm"), skipped=skipped)
      
        225
                assert "# skipped:" not in rendered
      
        226
        
        227
        
        228
        class TestPortableDlmSource:
      
        229
            """F09 (Audit 03) — ``_portable_dlm_source`` emits a cwd-relative
      
        230
            path when the ``.dlm`` lives inside the cwd (survives CI checkout),
      
        231
            absolute path when it lives elsewhere.
      
        232
            """
      
        233
        
        234
            def test_cwd_relative_when_inside(self, tmp_path: Path, monkeypatch) -> None:  # type: ignore[no-untyped-def]
      
        235
                from dlm_sway.integrations.dlm.autogen import _portable_dlm_source
      
        236
        
        237
                # Set cwd to tmp_path; drop a .dlm inside a subdir.
      
        238
                subdir = tmp_path / "src"
      
        239
                subdir.mkdir()
      
        240
                dlm_file = subdir / "demo.dlm"
      
        241
                dlm_file.write_text("# empty\n")
      
        242
                monkeypatch.chdir(tmp_path)
      
        243
                source = _portable_dlm_source(dlm_file)
      
        244
                assert source == "src/demo.dlm"
      
        245
                # Not an absolute path — the whole point of F09.
      
        246
                assert not Path(source).is_absolute()
      
        247
        
        248
            def test_absolute_when_outside(self, tmp_path: Path, monkeypatch) -> None:  # type: ignore[no-untyped-def]
      
        249
                """A ``.dlm`` somewhere outside the cwd falls back to its
      
        250
                absolute path — relative-ization would point at a nonexistent
      
        251
                parent directory on a fresh checkout."""
      
        252
                from dlm_sway.integrations.dlm.autogen import _portable_dlm_source
      
        253
        
        254
                # cwd inside tmp_path; .dlm lives in a sibling tree.
      
        255
                cwd = tmp_path / "cwd"
      
        256
                cwd.mkdir()
      
        257
                sibling = tmp_path / "other"
      
        258
                sibling.mkdir()
      
        259
                dlm_file = sibling / "demo.dlm"
      
        260
                dlm_file.write_text("# empty\n")
      
        261
                monkeypatch.chdir(cwd)
      
        262
                source = _portable_dlm_source(dlm_file)
      
        263
                assert Path(source).is_absolute()
      
        264
                assert source == str(dlm_file.resolve())
      
        265
        
        266
        
        267
        class TestAutogenClusterKL:
      
        268
            """F07 — autogen emits ``cluster_kl`` when the prompt pool has
      
        269
            enough entries to clear S16's ``min_prompts=20`` floor, and omits
      
        270
            it otherwise."""
      
        271
        
        272
            def test_emits_cluster_kl_when_prompt_pool_is_large(self) -> None:
      
        273
                spec = build_spec_dict(_handle_with_many_instruction_probes(25))
      
        274
                entry = next((e for e in spec["suite"] if e["kind"] == "cluster_kl"), None)
      
        275
                assert entry is not None, "autogen should emit cluster_kl on large prompt pools"
      
        276
                assert entry["num_clusters"] == 5
      
        277
                assert entry["min_prompts"] == 20
      
        278
                assert len(entry["prompts"]) >= 20
      
        279
                # Cap at 64 so a doc with hundreds of probes doesn't explode
      
        280
                # the cluster runtime.
      
        281
                assert len(entry["prompts"]) <= 64
      
        282
        
        283
            def test_omits_cluster_kl_on_small_prompt_pool(self) -> None:
      
        284
                """Under 20 prompts → omit the entry. The probe would SKIP
      
        285
                anyway; skipping emission keeps the autogen'd YAML tidy."""
      
        286
                spec = build_spec_dict(_handle_with_many_instruction_probes(5))
      
        287
                entry = next((e for e in spec["suite"] if e["kind"] == "cluster_kl"), None)
      
        288
                assert entry is None
      
        289
        
        290
            def test_prompts_deduplicated(self) -> None:
      
        291
                """No duplicate entries (instruction prompts + prose leading
      
        292
                sentences are merged but must not repeat verbatim)."""
      
        293
                spec = build_spec_dict(_handle_with_many_instruction_probes(30))
      
        294
                entry = next((e for e in spec["suite"] if e["kind"] == "cluster_kl"), None)
      
        295
                assert entry is not None
      
        296
                assert len(entry["prompts"]) == len(set(entry["prompts"]))

1	"""Quality-of-output tests for the autogen YAML.
2
3	The audit's B8 finding was that ``style_fingerprint`` got the leading
4	sentence of a prose section as its prompt — which elicits doc
5	continuation (a content probe), not stylistic voice. Sprint 05
6	replaces that with a fixed list of stylistic-elicitation prompts. This
7	file pins the new contract.
8	"""
9
10	from __future__ import annotations
11
12	from pathlib import Path
13
14	from dlm_sway.core.sections import Section
15	from dlm_sway.integrations.dlm.autogen import (
16	_STYLE_ELICITATION_PROMPTS,
17	build_spec_dict,
18	)
19	from dlm_sway.integrations.dlm.resolver import DlmHandle
20
21
22	def _handle_with_prose_first_sentence() -> DlmHandle:
23	"""A handle whose only prose section starts with a strong, doc-specific
24	opener — the kind of sentence that, under the old heuristic, would
25	have leaked into the style probe."""
26	sections = (
27	Section(
28	id="s1",
29	kind="prose",
30	content=(
31	"The mitochondrion is the powerhouse of the cell. "
32	"It generates ATP via oxidative phosphorylation. "
33	"Inner-membrane folds called cristae increase surface area."
34	),
35	),
36	)
37	return DlmHandle(
38	dlm_id="x",
39	base_model="HuggingFaceTB/SmolLM2-135M-Instruct",
40	adapter_path=Path("/tmp/adapter"),
41	sections=sections,
42	doc_text="whole document",
43	)
44
45
46	def test_style_prompts_use_elicitation_set_not_doc_content() -> None:
47	"""B8: style_fingerprint prompts come from the fixed elicitation set."""
48	spec = build_spec_dict(_handle_with_prose_first_sentence())
49	style_entry = next((e for e in spec["suite"] if e["kind"] == "style_fingerprint"), None)
50	assert style_entry is not None, "autogen should emit a style_fingerprint entry"
51	style_prompts = style_entry["prompts"]
52	# Every prompt comes from the elicitation set.
53	assert set(style_prompts) <= set(_STYLE_ELICITATION_PROMPTS)
54	# No prompt smells like the leading prose sentence.
55	assert not any("mitochondrion" in p.lower() for p in style_prompts)
56	assert not any("powerhouse" in p.lower() for p in style_prompts)
57
58
59	def test_style_prompts_nonempty_even_without_prose() -> None:
60	"""The fixed list means the probe always has something to ask the model."""
61	sections = (Section(id="i1", kind="instruction", content="What is X? X is Y.", probes=()),)
62	handle = DlmHandle(
63	dlm_id="x",
64	base_model="b",
65	adapter_path=Path("/tmp/a"),
66	sections=sections,
67	doc_text=None,
68	)
69	spec = build_spec_dict(handle)
70	style_entry = next((e for e in spec["suite"] if e["kind"] == "style_fingerprint"), None)
71	assert style_entry is not None
72	assert len(style_entry["prompts"]) >= 4
73
74
75	def test_elicitation_prompts_are_open_ended() -> None:
76	"""A sanity check on the constant itself: each prompt invites prose,
77	not a single-token completion."""
78	for prompt in _STYLE_ELICITATION_PROMPTS:
79	assert len(prompt) >= 30, f"prompt too short to elicit prose: {prompt!r}"
80	assert prompt.endswith(".")
81
82
83	def _handle_with_many_instruction_probes(n: int) -> DlmHandle:
84	"""A handle rigged to produce at least ``n`` distinct instruction
85	prompts (used to clear ``cluster_kl``'s 20-prompt floor)."""
86	from dlm_sway.core.sections import SectionProbe
87
88	probes = tuple(SectionProbe(prompt=f"Q{i}: what is topic {i}?", gold=f"A{i}") for i in range(n))
89	sections = (
90	Section(id="i1", kind="instruction", content="…", probes=probes),
91	Section(
92	id="p1",
93	kind="prose",
94	content="Prose sentence one. Prose sentence two. Prose sentence three.",
95	),
96	)
97	return DlmHandle(
98	dlm_id="x",
99	base_model="b",
100	adapter_path=Path("/tmp/a"),
101	sections=sections,
102	doc_text=None,
103	)
104
105
106	class TestSkippedProbesRollup:
107	"""F07 (Audit 03) — ``_render_annotated_yaml`` prepends a
108	``# skipped: <probe> (<reason>)`` block so users see which probes
109	the autogen intentionally omitted, without diffing this module's
110	docstring."""
111
112	def test_prose_only_handle_omits_instruction_heavy_probes(self) -> None:
113	"""A .dlm with only PROSE sections skips adapter_revert +
114	paraphrase_invariance + preference_flip + (with 1 section)
115	section_internalization."""
116	from dlm_sway.integrations.dlm.autogen import collect_skipped_probe_reasons
117
118	handle = DlmHandle(
119	dlm_id="x",
120	base_model="b",
121	adapter_path=Path("/tmp/a"),
122	sections=(
123	Section(
124	id="s1",
125	kind="prose",
126	content="One paragraph of prose. Second sentence.",
127	),
128	),
129	doc_text="doc",
130	)
131	skipped = collect_skipped_probe_reasons(handle)
132	skipped_kinds = {k for k, _ in skipped}
133	assert "adapter_revert" in skipped_kinds
134	assert "paraphrase_invariance" in skipped_kinds
135	assert "preference_flip" in skipped_kinds
136	assert "section_internalization" in skipped_kinds
137	# delta_kl should NOT be skipped — prose provides a fallback
138	# prompt pool.
139	assert "delta_kl" not in skipped_kinds
140
141	def test_instruction_only_handle_omits_prose_heavy_probes(self) -> None:
142	"""An instruction-only doc skips external_perplexity + leakage."""
143	from dlm_sway.core.sections import SectionProbe
144	from dlm_sway.integrations.dlm.autogen import collect_skipped_probe_reasons
145
146	handle = DlmHandle(
147	dlm_id="x",
148	base_model="b",
149	adapter_path=Path("/tmp/a"),
150	sections=(
151	Section(
152	id="i1",
153	kind="instruction",
154	content="Q/A",
155	probes=(SectionProbe(prompt="Q?", gold="A"),),
156	),
157	),
158	doc_text=None,
159	)
160	skipped = collect_skipped_probe_reasons(handle)
161	skipped_kinds = {k for k, _ in skipped}
162	assert "external_perplexity" in skipped_kinds
163	assert "leakage" in skipped_kinds
164
165	def test_rendered_yaml_carries_skipped_block(self, tmp_path: Path) -> None:
166	"""End-to-end: on a minimal prose-only .dlm, the rendered YAML
167	header has the ``# skipped:`` lines."""
168	from dlm_sway.integrations.dlm.autogen import (
169	_render_annotated_yaml,
170	build_spec_dict,
171	collect_skipped_probe_reasons,
172	)
173
174	handle = DlmHandle(
175	dlm_id="x",
176	base_model="b",
177	adapter_path=Path("/tmp/a"),
178	sections=(Section(id="s1", kind="prose", content="Short prose."),),
179	doc_text="doc",
180	)
181	dlm_path = tmp_path / "demo.dlm"
182	dlm_path.write_text("# empty")
183	spec = build_spec_dict(handle, dlm_source="demo.dlm")
184	skipped = collect_skipped_probe_reasons(handle)
185	rendered = _render_annotated_yaml(spec, handle, dlm_path, skipped=skipped)
186	assert "# skipped: adapter_revert" in rendered
187	assert "# skipped: preference_flip" in rendered
188	assert "(no " in rendered # reasons start with "no ..."
189
190	def test_rendered_yaml_omits_skipped_block_when_all_probes_fit(self) -> None:
191	"""A heavily-populated doc that triggers every probe emits no
192	``# skipped:`` lines."""
193	from dlm_sway.core.sections import SectionPreference, SectionProbe
194	from dlm_sway.integrations.dlm.autogen import (
195	_render_annotated_yaml,
196	build_spec_dict,
197	collect_skipped_probe_reasons,
198	)
199
200	probes = tuple(SectionProbe(prompt=f"Q{i}?", gold=f"A{i}") for i in range(25))
201	preferences = (SectionPreference(prompt="P1", chosen="good", rejected="bad"),)
202	handle = DlmHandle(
203	dlm_id="x",
204	base_model="b",
205	adapter_path=Path("/tmp/a"),
206	sections=(
207	Section(id="i1", kind="instruction", content="Q/A", probes=probes),
208	Section(
209	id="p1",
210	kind="prose",
211	content="A first prose sentence. A second. A third.",
212	),
213	Section(
214	id="pref1",
215	kind="preference",
216	content="pref",
217	preferences=preferences,
218	),
219	),
220	doc_text="doc",
221	)
222	spec = build_spec_dict(handle)
223	skipped = collect_skipped_probe_reasons(handle)
224	rendered = _render_annotated_yaml(spec, handle, Path("/tmp/demo.dlm"), skipped=skipped)
225	assert "# skipped:" not in rendered
226
227
228	class TestPortableDlmSource:
229	"""F09 (Audit 03) — ``_portable_dlm_source`` emits a cwd-relative
230	path when the ``.dlm`` lives inside the cwd (survives CI checkout),
231	absolute path when it lives elsewhere.
232	"""
233
234	def test_cwd_relative_when_inside(self, tmp_path: Path, monkeypatch) -> None: # type: ignore[no-untyped-def]
235	from dlm_sway.integrations.dlm.autogen import _portable_dlm_source
236
237	# Set cwd to tmp_path; drop a .dlm inside a subdir.
238	subdir = tmp_path / "src"
239	subdir.mkdir()
240	dlm_file = subdir / "demo.dlm"
241	dlm_file.write_text("# empty\n")
242	monkeypatch.chdir(tmp_path)
243	source = _portable_dlm_source(dlm_file)
244	assert source == "src/demo.dlm"
245	# Not an absolute path — the whole point of F09.
246	assert not Path(source).is_absolute()
247
248	def test_absolute_when_outside(self, tmp_path: Path, monkeypatch) -> None: # type: ignore[no-untyped-def]
249	"""A ``.dlm`` somewhere outside the cwd falls back to its
250	absolute path — relative-ization would point at a nonexistent
251	parent directory on a fresh checkout."""
252	from dlm_sway.integrations.dlm.autogen import _portable_dlm_source
253
254	# cwd inside tmp_path; .dlm lives in a sibling tree.
255	cwd = tmp_path / "cwd"
256	cwd.mkdir()
257	sibling = tmp_path / "other"
258	sibling.mkdir()
259	dlm_file = sibling / "demo.dlm"
260	dlm_file.write_text("# empty\n")
261	monkeypatch.chdir(cwd)
262	source = _portable_dlm_source(dlm_file)
263	assert Path(source).is_absolute()
264	assert source == str(dlm_file.resolve())
265
266
267	class TestAutogenClusterKL:
268	"""F07 — autogen emits ``cluster_kl`` when the prompt pool has
269	enough entries to clear S16's ``min_prompts=20`` floor, and omits
270	it otherwise."""
271
272	def test_emits_cluster_kl_when_prompt_pool_is_large(self) -> None:
273	spec = build_spec_dict(_handle_with_many_instruction_probes(25))
274	entry = next((e for e in spec["suite"] if e["kind"] == "cluster_kl"), None)
275	assert entry is not None, "autogen should emit cluster_kl on large prompt pools"
276	assert entry["num_clusters"] == 5
277	assert entry["min_prompts"] == 20
278	assert len(entry["prompts"]) >= 20
279	# Cap at 64 so a doc with hundreds of probes doesn't explode
280	# the cluster runtime.
281	assert len(entry["prompts"]) <= 64
282
283	def test_omits_cluster_kl_on_small_prompt_pool(self) -> None:
284	"""Under 20 prompts → omit the entry. The probe would SKIP
285	anyway; skipping emission keeps the autogen'd YAML tidy."""
286	spec = build_spec_dict(_handle_with_many_instruction_probes(5))
287	entry = next((e for e in spec["suite"] if e["kind"] == "cluster_kl"), None)
288	assert entry is None
289
290	def test_prompts_deduplicated(self) -> None:
291	"""No duplicate entries (instruction prompts + prose leading
292	sentences are merged but must not repeat verbatim)."""
293	spec = build_spec_dict(_handle_with_many_instruction_probes(30))
294	entry = next((e for e in spec["suite"] if e["kind"] == "cluster_kl"), None)
295	assert entry is not None
296	assert len(entry["prompts"]) == len(set(entry["prompts"]))