@@ -103,7 +103,69 @@ def write_sway_yaml(dlm_path: Path, out: Path) -> None: |
| 103 | 103 | "train the document with `dlm train` before generating a sway suite." |
| 104 | 104 | ) |
| 105 | 105 | spec = build_spec_dict(handle, dlm_source=_portable_dlm_source(dlm_path)) |
| 106 | | - out.write_text(_render_annotated_yaml(spec, handle, dlm_path), encoding="utf-8") |
| 106 | + skipped = collect_skipped_probe_reasons(handle) |
| 107 | + out.write_text( |
| 108 | + _render_annotated_yaml(spec, handle, dlm_path, skipped=skipped), |
| 109 | + encoding="utf-8", |
| 110 | + ) |
| 111 | + |
| 112 | + |
| 113 | +def collect_skipped_probe_reasons(handle: DlmHandle) -> list[tuple[str, str]]: |
| 114 | + """Return ``(probe_kind, reason)`` tuples for every probe |
| 115 | + ``_build_suite`` intentionally omitted for this ``.dlm``. |
| 116 | + |
| 117 | + F07 (Audit 03) — the emitted YAML previously had no record of |
| 118 | + which probes were skipped and why. Users had to diff the autogen |
| 119 | + output against the intent docstring to know. This surface is the |
| 120 | + input to the YAML-comment block the renderer prepends. |
| 121 | + |
| 122 | + Mirrors the conditional logic inside :func:`_build_suite` — any |
| 123 | + change to that function's gating must update this function too. |
| 124 | + """ |
| 125 | + sections = handle.sections |
| 126 | + instruction_probes = [ |
| 127 | + (p.prompt, p.gold) for s in sections if s.kind == "instruction" for p in s.probes |
| 128 | + ] |
| 129 | + prose_prompts = [ |
| 130 | + s.content.split(".")[0].strip() |
| 131 | + for s in sections |
| 132 | + if s.kind == "prose" and s.content.strip() and s.content.split(".")[0].strip() |
| 133 | + ] |
| 134 | + has_instruction_probes = bool(instruction_probes) |
| 135 | + has_prose = any(s.kind == "prose" for s in sections) |
| 136 | + has_preferences = any(s.kind == "preference" and s.preferences for s in sections) |
| 137 | + |
| 138 | + kl_prompts = [q for q, _ in instruction_probes][:16] or prose_prompts[:16] |
| 139 | + all_instruction_prompts = [q for q, _ in instruction_probes] |
| 140 | + cluster_pool_size = len({*all_instruction_prompts, *prose_prompts}) |
| 141 | + |
| 142 | + skipped: list[tuple[str, str]] = [] |
| 143 | + if not kl_prompts: |
| 144 | + skipped.append(("delta_kl", "no instruction probes or prose sections")) |
| 145 | + if not has_instruction_probes: |
| 146 | + skipped.append(("adapter_revert", "no !probe markers in INSTRUCTION sections")) |
| 147 | + skipped.append(("paraphrase_invariance", "no !probe markers in INSTRUCTION sections")) |
| 148 | + if not kl_prompts: |
| 149 | + skipped.append(("prompt_collapse", "no prompts available to score")) |
| 150 | + if len(sections) < 2: |
| 151 | + skipped.append(("section_internalization", "document has fewer than 2 sections")) |
| 152 | + if not has_preferences: |
| 153 | + skipped.append(("preference_flip", "no PREFERENCE sections with populated triples")) |
| 154 | + if not has_prose: |
| 155 | + skipped.append( |
| 156 | + ("external_perplexity", "no PROSE sections to measure external-corpus drift against") |
| 157 | + ) |
| 158 | + skipped.append(("leakage", "no PROSE sections to extract prefix/continuation windows from")) |
| 159 | + if cluster_pool_size < 20: |
| 160 | + skipped.append( |
| 161 | + ( |
| 162 | + "cluster_kl", |
| 163 | + f"only {cluster_pool_size} distinct prompts in pool (need ≥ 20 for stable clustering)", |
| 164 | + ) |
| 165 | + ) |
| 166 | + if not kl_prompts: |
| 167 | + skipped.append(("adapter_ablation", "no prompts available to score")) |
| 168 | + return skipped |
| 107 | 169 | |
| 108 | 170 | |
| 109 | 171 | def _portable_dlm_source(dlm_path: Path) -> str: |
@@ -130,7 +192,13 @@ def _portable_dlm_source(dlm_path: Path) -> str: |
| 130 | 192 | return str(abs_path) |
| 131 | 193 | |
| 132 | 194 | |
| 133 | | -def _render_annotated_yaml(spec: dict[str, Any], handle: DlmHandle, dlm_path: Path) -> str: |
| 195 | +def _render_annotated_yaml( |
| 196 | + spec: dict[str, Any], |
| 197 | + handle: DlmHandle, |
| 198 | + dlm_path: Path, |
| 199 | + *, |
| 200 | + skipped: list[tuple[str, str]] | None = None, |
| 201 | +) -> str: |
| 134 | 202 | """Render the spec as YAML with a provenance header + per-probe intent lines (D5). |
| 135 | 203 | |
| 136 | 204 | Uses pyyaml (already a hard dep) and post-processes the output to |
@@ -138,6 +206,11 @@ def _render_annotated_yaml(spec: dict[str, Any], handle: DlmHandle, dlm_path: Pa |
| 138 | 206 | ``ruamel.yaml`` dep the sprint contemplated — the annotation here |
| 139 | 207 | is structural (position-based), not round-trippable, so the lighter |
| 140 | 208 | approach is sufficient. |
| 209 | + |
| 210 | + F07 (Audit 03) — when ``skipped`` is non-empty, the header gains a |
| 211 | + ``# skipped: <probe> (<reason>)`` block so users see which probes |
| 212 | + the autogen intentionally omitted, without diffing the autogen |
| 213 | + module's docstring. |
| 141 | 214 | """ |
| 142 | 215 | import datetime as _dt |
| 143 | 216 | |
@@ -158,8 +231,17 @@ def _render_annotated_yaml(spec: dict[str, Any], handle: DlmHandle, dlm_path: Pa |
| 158 | 231 | "# Edit freely — this file is your checked-in contract. Re-running", |
| 159 | 232 | "# `sway autogen` overwrites it; commit the generated file so your", |
| 160 | 233 | "# test suite is diffable in PRs.", |
| 161 | | - "", |
| 162 | 234 | ] |
| 235 | + if skipped: |
| 236 | + header_lines.extend( |
| 237 | + [ |
| 238 | + "#", |
| 239 | + f"# {len(skipped)} probe(s) intentionally omitted for this document:", |
| 240 | + *[f"# skipped: {kind} ({reason})" for kind, reason in skipped], |
| 241 | + "# (sway gate will still pass — missing probes don't fail the gate.)", |
| 242 | + ] |
| 243 | + ) |
| 244 | + header_lines.append("") |
| 163 | 245 | return "\n".join(header_lines) + annotated |
| 164 | 246 | |
| 165 | 247 | |