"""Shared Modelfile directive builders. Used by both `modelfile.py` (text-only path) and `vl_modelfile.py` (vision-language path). Hoisted out so the VL renderer doesn't reach across `_`-prefixed names into its sibling module and no longer needs a ModelfileContext adapter bridge. All public helpers here take the minimal field set they read. Both call sites pack their own context shape and hand field values in — the renderers keep their own frozen dataclasses as the authoritative input type; the helpers stay input-shape-agnostic. Security note: `build_system_line` + `build_license_line` pass their strings through `json.dumps`. Ollama's Modelfile grammar accepts JSON string-literal escapes verbatim (`\\"`, `\\n`, `\\\\`), so a hostile prompt surfaces as content rather than metaparse. See `modelfile.py` module docstring for the full rationale. """ from __future__ import annotations import json from datetime import UTC, datetime from pathlib import Path from typing import TYPE_CHECKING from dlm.export.ollama.errors import ModelfileError if TYPE_CHECKING: from dlm.base_models import BaseModelSpec from dlm.export.ollama.template_registry import DialectTemplate def build_header( *, dlm_version: str, dlm_id: str, adapter_version: int, base_key: str, base_revision: str, quant: str, merged: bool, source_dlm_path: Path | None = None, ) -> str: """Top-of-file `# Generated by dlm …` comment block.""" now = datetime.now(UTC).replace(tzinfo=None, microsecond=0).isoformat() lines = [f"# Generated by dlm {dlm_version} on {now}"] if source_dlm_path is not None: lines.append(f"# Source: {source_dlm_path}") lines.extend( [ f"# dlm_id: {dlm_id}", f"# adapter_version: {adapter_version}", f"# base_model: {base_key} (revision {base_revision})", f"# quant: {quant}", f"# merged: {merged}", ] ) return "\n".join(lines) def resolve_stops(adapter_dir: Path, template_row: DialectTemplate) -> tuple[str, ...]: """Union of dialect defaults + EOS/added-tokens from the adapter tokenizer. Per the tokenizer contract: added special tokens from the adapter tokenizer become additional stops. Without this, a pad-token-grown model emits `<|pad|>` indefinitely. """ merged: list[str] = list(template_row.default_stops) merged.extend(template_row.extra_stop_hints) adapter_stops = _read_adapter_stops(adapter_dir) for tok in adapter_stops: if tok and tok not in merged: merged.append(tok) seen: set[str] = set() unique: list[str] = [] for tok in merged: if tok not in seen: seen.add(tok) unique.append(tok) return tuple(unique) def _read_adapter_stops(adapter_dir: Path) -> list[str]: """Pull `eos_token` + added-tokens from the adapter tokenizer config.""" cfg_path = adapter_dir / "tokenizer_config.json" if not cfg_path.exists(): return [] try: cfg = json.loads(cfg_path.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError) as exc: raise ModelfileError( f"adapter tokenizer_config.json at {cfg_path} is unreadable: {exc}" ) from exc stops: list[str] = [] eos = cfg.get("eos_token") if isinstance(eos, str) and eos: stops.append(eos) elif isinstance(eos, dict) and isinstance(eos.get("content"), str): stops.append(eos["content"]) added = cfg.get("added_tokens_decoder") or {} if isinstance(added, dict): for entry in added.values(): if isinstance(entry, dict) and entry.get("special") is True: content = entry.get("content") if isinstance(content, str): stops.append(content) return stops def build_param_lines( *, stops: tuple[str, ...], temperature: float, top_p: float, num_ctx: int | None, draft_model: str | None, ) -> list[str]: """Emit the `PARAMETER stop …` + sampling defaults block.""" lines: list[str] = [] for stop in stops: lines.append(f"PARAMETER stop {json.dumps(stop)}") lines.append(f"PARAMETER temperature {temperature}") lines.append(f"PARAMETER top_p {top_p}") if num_ctx is not None: lines.append(f"PARAMETER num_ctx {num_ctx}") if draft_model is not None: lines.append(f"# Speculative decoding: `ollama pull {draft_model}` first.") lines.append(f"PARAMETER draft_model {draft_model}") return lines def resolve_num_ctx( training_sequence_len: int | None, spec_context_length: int, ) -> int | None: """Cap `training_sequence_len` at the base spec's `context_length`. Returns `None` when the document didn't pin a length — Ollama's 2048 default applies. Otherwise the capped length so a document trained at 8192 gets the window it expects without exceeding the base model's positional-embedding table. """ if training_sequence_len is None: return None return min(training_sequence_len, spec_context_length) def build_system_line(system_prompt: str | None) -> str | None: """JSON-escaped `SYSTEM "…"` directive, or `None` when no prompt.""" if system_prompt is None: return None stripped = system_prompt.strip() if not stripped: return None return f"SYSTEM {json.dumps(stripped)}" def build_license_line(spec: BaseModelSpec) -> str | None: """`LICENSE "…"` directive from the base model's SPDX id.""" if not spec.license_spdx: return None return f"LICENSE {json.dumps(spec.license_spdx)}"