"""`BaseModelSpec` — curated metadata for a single pretrained base model.

Every field is strict: `extra="forbid"`, frozen, and validated on
instantiation. Values pack everything the rest of the project needs to
know about a base without re-fetching HF metadata at every decision
point:

- `revision`: 40-char commit SHA. Enforced non-None so retrains under the
  same spec pin at exactly the same weights.
- `target_modules`: per-architecture LoRA target list (see findings §8;
  `"all-linear"` is avoided because it bloats small models).
- `template`: the chat-template dialect used by the Go-template
  registry for Modelfile generation.
- `gguf_arch` / `tokenizer_pre`: identifiers the llama.cpp converter
  matches against; export preflight uses them.
- `reasoning_tuned` / `context_length_effective`: additive registry
  hints for prompt defaults and realistic doctor estimates. The
  effective length defaults to the nominal context window when unset.
- `refresh_check_hf_gating` / `provenance_url` /
  `provenance_match_text`: live-registry refresh hints for entries
  whose fetch mirror and first-party provenance page are not the same
  system.
- License / gating: separate fields for SPDX, acceptance gating, and
  re-distribution — each consumed by a different policy gate (license
  acceptance, pack `--include-base`, share-protocol refusal).
"""

from __future__ import annotations

import re
from typing import Final, Literal

from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator

_SHA_RE: Final[re.Pattern[str]] = re.compile(r"^[0-9a-f]{40}$")
DEFAULT_PROMPT_TEMPERATURE: Final[float] = 0.7
DEFAULT_REASONING_PROMPT_TEMPERATURE: Final[float] = 0.6


class VlPreprocessorPlan(BaseModel):
    """Per-base vision-preprocessing parameters.

    Pinned at registry-build time so `dlm export` + the VL cache key
    stay stable across reruns. HF's `AutoProcessor` is the source of
    truth at runtime; this block records the *expected* shape for
    preflight checks + cache keying.

    `target_size` is `(height, width)` in pixels. `resize_policy`
    defaults to `"fixed"` because that's what the current launch
    registry ships. `image_token` is the textual placeholder inserted
    into prompts before the processor expands it into
    `num_image_tokens` copies.
    """

    model_config = ConfigDict(extra="forbid", frozen=True)

    target_size: tuple[int, int] = Field(..., description="(height, width) in pixels")
    resize_policy: Literal["fixed", "dynamic"] = "fixed"
    image_token: str = Field(..., min_length=1, description="Placeholder token string")
    num_image_tokens: int = Field(..., gt=0, description="Tokens consumed per image")

    @field_validator("target_size")
    @classmethod
    def _validate_target_size(cls, value: tuple[int, int]) -> tuple[int, int]:
        h, w = value
        if h <= 0 or w <= 0:
            raise ValueError(f"target_size must be positive, got {value!r}")
        return value


class AudioPreprocessorPlan(BaseModel):
    """Per-base audio-preprocessing parameters.

    Mirrors `VlPreprocessorPlan` — pinned at registry-build time so
    the audio cache key stays stable. Current releases refuse audio at
    non-target `sample_rate`; resampling lands as a follow-up.

    `sample_rate` is the model's training rate in Hz (Qwen2-Audio:
    16000). `max_length_seconds` caps the per-clip duration the
    processor sees; longer clips are truncated (the processor's
    built-in policy). `audio_token` is the textual placeholder that
    expands into the model's fixed audio-token window.
    """

    model_config = ConfigDict(extra="forbid", frozen=True)

    sample_rate: int = Field(..., gt=0, description="Hz — refuse on mismatch")
    max_length_seconds: float = Field(..., gt=0.0)
    audio_token: str = Field(..., min_length=1, description="Placeholder token string")
    num_audio_tokens: int = Field(..., gt=0, description="Tokens reserved per clip")


class BaseModelSpec(BaseModel):
    """Curated registry metadata for one base model."""

    model_config = ConfigDict(extra="forbid", frozen=True)

    key: str = Field(..., min_length=1, description="Registry slug (e.g. `qwen2.5-1.5b`).")
    hf_id: str = Field(
        ..., min_length=1, description="HuggingFace id, e.g. `Qwen/Qwen2.5-1.5B-Instruct`."
    )
    revision: str = Field(..., description="40-char commit SHA; never a branch.")
    architecture: str = Field(..., description="transformers `config.architectures[0]` value.")
    params: int = Field(..., gt=0, description="Parameter count; drives hardware doctor.")
    target_modules: list[str] = Field(..., min_length=1)
    template: Literal[
        "chatml",
        "qwen3thinking",
        "gemma2",
        "smollm3",
        "olmo2",
        "llama3",
        "phi3",
        "phi4mini",
        "mistral",
        "paligemma",
        "qwen2-audio",
        "qwen2-vl",
        "internvl2",
    ]
    gguf_arch: str = Field(..., min_length=1, description="Name llama.cpp's converter uses.")
    tokenizer_pre: str = Field(..., min_length=1, description="Pre-tokenizer label.")

    # License + acceptance.
    license_spdx: str = Field(..., min_length=1)
    license_url: str | None = None
    requires_acceptance: bool = False
    redistributable: bool = Field(
        ...,
        description="True iff the license allows bundling the base inside a .dlm.pack.",
    )
    # trust_remote_code: `True` for bases whose HF class lives in the
    # model's own repo (custom `modeling_*.py` files) rather than in
    # transformers. Picking such a base as `base_model:` in a .dlm is
    # the user's informed acknowledgment — the registry entry carries
    # a docstring caveat, vl-memory.md + the cookbook flag it, and the
    # loader only passes `trust_remote_code=True` to HF when this is
    # `True` on the spec. Defaults to False so non-custom bases can
    # never accidentally opt into remote code.
    trust_remote_code: bool = False

    # Size + context hints.
    size_gb_fp16: float = Field(..., gt=0)
    context_length: int = Field(..., gt=0)
    context_length_effective: int | None = Field(None, gt=0)
    recommended_seq_len: int = Field(..., gt=0)
    reasoning_tuned: bool = False
    refresh_check_hf_gating: bool = True
    provenance_url: str | None = None
    provenance_match_text: str | None = None

    # Modality + multi-modal preprocessing (schema v10 + v11, plus the
    # additive `text-moe` discriminator).
    # Text-family bases leave `modality in {"text", "text-moe"}`
    # with both plans None;
    # `modality="vision-language"` requires a `vl_preprocessor_plan`
    # and rejects an audio plan; `modality="audio-language"` requires
    # an `audio_preprocessor_plan` and rejects a vl plan. Every
    # invariant is enforced below at validate time.
    modality: Literal["text", "text-moe", "vision-language", "audio-language"] = "text"
    vl_preprocessor_plan: VlPreprocessorPlan | None = None
    audio_preprocessor_plan: AudioPreprocessorPlan | None = None

    @model_validator(mode="after")
    def _modality_matches_plan(self) -> BaseModelSpec:
        if self.modality == "vision-language":
            if self.vl_preprocessor_plan is None:
                raise ValueError(
                    f"base {self.key!r}: modality='vision-language' requires "
                    "a vl_preprocessor_plan (pinned image size + token shape)"
                )
            if self.audio_preprocessor_plan is not None:
                raise ValueError(
                    f"base {self.key!r}: audio_preprocessor_plan is invalid "
                    "on a vision-language base"
                )
        elif self.modality == "audio-language":
            if self.audio_preprocessor_plan is None:
                raise ValueError(
                    f"base {self.key!r}: modality='audio-language' requires "
                    "an audio_preprocessor_plan (pinned sample_rate + token shape)"
                )
            if self.vl_preprocessor_plan is not None:
                raise ValueError(
                    f"base {self.key!r}: vl_preprocessor_plan is invalid on an audio-language base"
                )
        else:  # "text" or "text-moe"
            if self.vl_preprocessor_plan is not None:
                raise ValueError(
                    f"base {self.key!r}: vl_preprocessor_plan only valid with "
                    "modality='vision-language'"
                )
            if self.audio_preprocessor_plan is not None:
                raise ValueError(
                    f"base {self.key!r}: audio_preprocessor_plan only valid "
                    "with modality='audio-language'"
                )
        return self

    @model_validator(mode="after")
    def _effective_context_length_is_bounded(self) -> BaseModelSpec:
        if (
            self.context_length_effective is not None
            and self.context_length_effective > self.context_length
        ):
            raise ValueError(
                f"base {self.key!r}: context_length_effective={self.context_length_effective} "
                f"cannot exceed context_length={self.context_length}"
            )
        return self

    @model_validator(mode="after")
    def _provenance_probe_is_complete(self) -> BaseModelSpec:
        url_set = self.provenance_url is not None
        text_set = self.provenance_match_text is not None
        if url_set != text_set:
            raise ValueError(
                f"base {self.key!r}: provenance_url and provenance_match_text must be set together"
            )
        if not self.refresh_check_hf_gating and not url_set:
            raise ValueError(
                f"base {self.key!r}: refresh_check_hf_gating=False requires a "
                "first-party provenance_url + provenance_match_text"
            )
        return self

    @property
    def suggested_prompt_temperature(self) -> float:
        """Default sampling temperature for `dlm prompt`.

        Most instruct bases keep the long-standing 0.7 default.
        Reasoning-tuned bases run slightly cooler by default so the
        chain-of-thought control tokens they were tuned around stay
        stable when the user omits `--temp`.
        """
        if self.reasoning_tuned:
            return DEFAULT_REASONING_PROMPT_TEMPERATURE
        return DEFAULT_PROMPT_TEMPERATURE

    @property
    def effective_context_length(self) -> int:
        """Context window `dlm doctor` should estimate against.

        Registry rows can pin a lower practical ceiling than the
        model's advertised nominal context length. When no such hint is
        set, the nominal context window remains the source of truth.
        """
        return self.context_length_effective or self.context_length

    @field_validator("revision")
    @classmethod
    def _validate_revision(cls, value: str) -> str:
        if not _SHA_RE.fullmatch(value):
            raise ValueError(f"revision must be a 40-char lowercase hex SHA, got {value!r}")
        return value

    @field_validator("hf_id")
    @classmethod
    def _validate_hf_id(cls, value: str) -> str:
        if "/" not in value or value.startswith("/") or value.endswith("/"):
            raise ValueError(f"hf_id must be 'org/name', got {value!r}")
        org, _, name = value.partition("/")
        if not org or not name or "/" in name:
            raise ValueError(f"hf_id must be 'org/name' (single `/`), got {value!r}")
        return value

    @field_validator("target_modules")
    @classmethod
    def _validate_target_modules(cls, value: list[str]) -> list[str]:
        if any(not m for m in value):
            raise ValueError("target_modules must not contain empty strings")
        return value