"""Quality-of-output tests for the autogen YAML.

The audit's B8 finding was that ``style_fingerprint`` got the leading
sentence of a prose section as its prompt — which elicits doc
*continuation* (a content probe), not stylistic voice. Sprint 05
replaces that with a fixed list of stylistic-elicitation prompts. This
file pins the new contract.
"""

from __future__ import annotations

from pathlib import Path

from dlm_sway.core.sections import Section
from dlm_sway.integrations.dlm.autogen import (
    _STYLE_ELICITATION_PROMPTS,
    build_spec_dict,
)
from dlm_sway.integrations.dlm.resolver import DlmHandle


def _handle_with_prose_first_sentence() -> DlmHandle:
    """A handle whose only prose section starts with a strong, doc-specific
    opener — the kind of sentence that, under the old heuristic, would
    have leaked into the style probe."""
    sections = (
        Section(
            id="s1",
            kind="prose",
            content=(
                "The mitochondrion is the powerhouse of the cell. "
                "It generates ATP via oxidative phosphorylation. "
                "Inner-membrane folds called cristae increase surface area."
            ),
        ),
    )
    return DlmHandle(
        dlm_id="x",
        base_model="HuggingFaceTB/SmolLM2-135M-Instruct",
        adapter_path=Path("/tmp/adapter"),
        sections=sections,
        doc_text="whole document",
    )


def test_style_prompts_use_elicitation_set_not_doc_content() -> None:
    """B8: style_fingerprint prompts come from the fixed elicitation set."""
    spec = build_spec_dict(_handle_with_prose_first_sentence())
    style_entry = next((e for e in spec["suite"] if e["kind"] == "style_fingerprint"), None)
    assert style_entry is not None, "autogen should emit a style_fingerprint entry"
    style_prompts = style_entry["prompts"]
    # Every prompt comes from the elicitation set.
    assert set(style_prompts) <= set(_STYLE_ELICITATION_PROMPTS)
    # No prompt smells like the leading prose sentence.
    assert not any("mitochondrion" in p.lower() for p in style_prompts)
    assert not any("powerhouse" in p.lower() for p in style_prompts)


def test_style_prompts_nonempty_even_without_prose() -> None:
    """The fixed list means the probe always has something to ask the model."""
    sections = (Section(id="i1", kind="instruction", content="What is X? X is Y.", probes=()),)
    handle = DlmHandle(
        dlm_id="x",
        base_model="b",
        adapter_path=Path("/tmp/a"),
        sections=sections,
        doc_text=None,
    )
    spec = build_spec_dict(handle)
    style_entry = next((e for e in spec["suite"] if e["kind"] == "style_fingerprint"), None)
    assert style_entry is not None
    assert len(style_entry["prompts"]) >= 4


def test_elicitation_prompts_are_open_ended() -> None:
    """A sanity check on the constant itself: each prompt invites prose,
    not a single-token completion."""
    for prompt in _STYLE_ELICITATION_PROMPTS:
        assert len(prompt) >= 30, f"prompt too short to elicit prose: {prompt!r}"
        assert prompt.endswith(".")


def _handle_with_many_instruction_probes(n: int) -> DlmHandle:
    """A handle rigged to produce at least ``n`` distinct instruction
    prompts (used to clear ``cluster_kl``'s 20-prompt floor)."""
    from dlm_sway.core.sections import SectionProbe

    probes = tuple(SectionProbe(prompt=f"Q{i}: what is topic {i}?", gold=f"A{i}") for i in range(n))
    sections = (
        Section(id="i1", kind="instruction", content="…", probes=probes),
        Section(
            id="p1",
            kind="prose",
            content="Prose sentence one. Prose sentence two. Prose sentence three.",
        ),
    )
    return DlmHandle(
        dlm_id="x",
        base_model="b",
        adapter_path=Path("/tmp/a"),
        sections=sections,
        doc_text=None,
    )


class TestSkippedProbesRollup:
    """F07 (Audit 03) — ``_render_annotated_yaml`` prepends a
    ``# skipped: <probe> (<reason>)`` block so users see which probes
    the autogen intentionally omitted, without diffing this module's
    docstring."""

    def test_prose_only_handle_omits_instruction_heavy_probes(self) -> None:
        """A .dlm with only PROSE sections skips adapter_revert +
        paraphrase_invariance + preference_flip + (with 1 section)
        section_internalization."""
        from dlm_sway.integrations.dlm.autogen import collect_skipped_probe_reasons

        handle = DlmHandle(
            dlm_id="x",
            base_model="b",
            adapter_path=Path("/tmp/a"),
            sections=(
                Section(
                    id="s1",
                    kind="prose",
                    content="One paragraph of prose. Second sentence.",
                ),
            ),
            doc_text="doc",
        )
        skipped = collect_skipped_probe_reasons(handle)
        skipped_kinds = {k for k, _ in skipped}
        assert "adapter_revert" in skipped_kinds
        assert "paraphrase_invariance" in skipped_kinds
        assert "preference_flip" in skipped_kinds
        assert "section_internalization" in skipped_kinds
        # delta_kl should NOT be skipped — prose provides a fallback
        # prompt pool.
        assert "delta_kl" not in skipped_kinds

    def test_instruction_only_handle_omits_prose_heavy_probes(self) -> None:
        """An instruction-only doc skips external_perplexity + leakage."""
        from dlm_sway.core.sections import SectionProbe
        from dlm_sway.integrations.dlm.autogen import collect_skipped_probe_reasons

        handle = DlmHandle(
            dlm_id="x",
            base_model="b",
            adapter_path=Path("/tmp/a"),
            sections=(
                Section(
                    id="i1",
                    kind="instruction",
                    content="Q/A",
                    probes=(SectionProbe(prompt="Q?", gold="A"),),
                ),
            ),
            doc_text=None,
        )
        skipped = collect_skipped_probe_reasons(handle)
        skipped_kinds = {k for k, _ in skipped}
        assert "external_perplexity" in skipped_kinds
        assert "leakage" in skipped_kinds

    def test_rendered_yaml_carries_skipped_block(self, tmp_path: Path) -> None:
        """End-to-end: on a minimal prose-only .dlm, the rendered YAML
        header has the ``# skipped:`` lines."""
        from dlm_sway.integrations.dlm.autogen import (
            _render_annotated_yaml,
            build_spec_dict,
            collect_skipped_probe_reasons,
        )

        handle = DlmHandle(
            dlm_id="x",
            base_model="b",
            adapter_path=Path("/tmp/a"),
            sections=(Section(id="s1", kind="prose", content="Short prose."),),
            doc_text="doc",
        )
        dlm_path = tmp_path / "demo.dlm"
        dlm_path.write_text("# empty")
        spec = build_spec_dict(handle, dlm_source="demo.dlm")
        skipped = collect_skipped_probe_reasons(handle)
        rendered = _render_annotated_yaml(spec, handle, dlm_path, skipped=skipped)
        assert "# skipped: adapter_revert" in rendered
        assert "# skipped: preference_flip" in rendered
        assert "(no " in rendered  # reasons start with "no ..."

    def test_rendered_yaml_omits_skipped_block_when_all_probes_fit(self) -> None:
        """A heavily-populated doc that triggers every probe emits no
        ``# skipped:`` lines."""
        from dlm_sway.core.sections import SectionPreference, SectionProbe
        from dlm_sway.integrations.dlm.autogen import (
            _render_annotated_yaml,
            build_spec_dict,
            collect_skipped_probe_reasons,
        )

        probes = tuple(SectionProbe(prompt=f"Q{i}?", gold=f"A{i}") for i in range(25))
        preferences = (SectionPreference(prompt="P1", chosen="good", rejected="bad"),)
        handle = DlmHandle(
            dlm_id="x",
            base_model="b",
            adapter_path=Path("/tmp/a"),
            sections=(
                Section(id="i1", kind="instruction", content="Q/A", probes=probes),
                Section(
                    id="p1",
                    kind="prose",
                    content="A first prose sentence. A second. A third.",
                ),
                Section(
                    id="pref1",
                    kind="preference",
                    content="pref",
                    preferences=preferences,
                ),
            ),
            doc_text="doc",
        )
        spec = build_spec_dict(handle)
        skipped = collect_skipped_probe_reasons(handle)
        rendered = _render_annotated_yaml(spec, handle, Path("/tmp/demo.dlm"), skipped=skipped)
        assert "# skipped:" not in rendered


class TestPortableDlmSource:
    """F09 (Audit 03) — ``_portable_dlm_source`` emits a cwd-relative
    path when the ``.dlm`` lives inside the cwd (survives CI checkout),
    absolute path when it lives elsewhere.
    """

    def test_cwd_relative_when_inside(self, tmp_path: Path, monkeypatch) -> None:  # type: ignore[no-untyped-def]
        from dlm_sway.integrations.dlm.autogen import _portable_dlm_source

        # Set cwd to tmp_path; drop a .dlm inside a subdir.
        subdir = tmp_path / "src"
        subdir.mkdir()
        dlm_file = subdir / "demo.dlm"
        dlm_file.write_text("# empty\n")
        monkeypatch.chdir(tmp_path)
        source = _portable_dlm_source(dlm_file)
        assert source == "src/demo.dlm"
        # Not an absolute path — the whole point of F09.
        assert not Path(source).is_absolute()

    def test_absolute_when_outside(self, tmp_path: Path, monkeypatch) -> None:  # type: ignore[no-untyped-def]
        """A ``.dlm`` somewhere outside the cwd falls back to its
        absolute path — relative-ization would point at a nonexistent
        parent directory on a fresh checkout."""
        from dlm_sway.integrations.dlm.autogen import _portable_dlm_source

        # cwd inside tmp_path; .dlm lives in a sibling tree.
        cwd = tmp_path / "cwd"
        cwd.mkdir()
        sibling = tmp_path / "other"
        sibling.mkdir()
        dlm_file = sibling / "demo.dlm"
        dlm_file.write_text("# empty\n")
        monkeypatch.chdir(cwd)
        source = _portable_dlm_source(dlm_file)
        assert Path(source).is_absolute()
        assert source == str(dlm_file.resolve())


class TestAutogenClusterKL:
    """F07 — autogen emits ``cluster_kl`` when the prompt pool has
    enough entries to clear S16's ``min_prompts=20`` floor, and omits
    it otherwise."""

    def test_emits_cluster_kl_when_prompt_pool_is_large(self) -> None:
        spec = build_spec_dict(_handle_with_many_instruction_probes(25))
        entry = next((e for e in spec["suite"] if e["kind"] == "cluster_kl"), None)
        assert entry is not None, "autogen should emit cluster_kl on large prompt pools"
        assert entry["num_clusters"] == 5
        assert entry["min_prompts"] == 20
        assert len(entry["prompts"]) >= 20
        # Cap at 64 so a doc with hundreds of probes doesn't explode
        # the cluster runtime.
        assert len(entry["prompts"]) <= 64

    def test_omits_cluster_kl_on_small_prompt_pool(self) -> None:
        """Under 20 prompts → omit the entry. The probe would SKIP
        anyway; skipping emission keeps the autogen'd YAML tidy."""
        spec = build_spec_dict(_handle_with_many_instruction_probes(5))
        entry = next((e for e in spec["suite"] if e["kind"] == "cluster_kl"), None)
        assert entry is None

    def test_prompts_deduplicated(self) -> None:
        """No duplicate entries (instruction prompts + prose leading
        sentences are merged but must not repeat verbatim)."""
        spec = build_spec_dict(_handle_with_many_instruction_probes(30))
        entry = next((e for e in spec["suite"] if e["kind"] == "cluster_kl"), None)
        assert entry is not None
        assert len(entry["prompts"]) == len(set(entry["prompts"]))