| 1 |
"""License acceptance records for gated base models. |
| 2 |
|
| 3 |
The `BaseModelSpec` schema already carries `requires_acceptance`, |
| 4 |
`redistributable`, `license_spdx`, and `license_url`. This module adds |
| 5 |
the *acceptance record* — a small Pydantic model that stores "user X |
| 6 |
accepted license Y at time T via path Z", plus a helper that validates |
| 7 |
an `accept_license` flag against the spec. |
| 8 |
|
| 9 |
`LicenseAcceptance` rides on two load-bearing files: |
| 10 |
|
| 11 |
- `manifest.json.license_acceptance`: the per-store durable record; |
| 12 |
read on every subsequent `dlm train` to verify the acceptance |
| 13 |
fingerprint is still present. |
| 14 |
- Per-store `dlm.lock.license_acceptance`: the determinism-contract |
| 15 |
mirror; divergence between the two triggers a lock re-check. |
| 16 |
|
| 17 |
The interactive prompt in `dlm init` lives in the CLI layer; this |
| 18 |
module ships the data types + helpers that prompt calls. |
| 19 |
""" |
| 20 |
|
| 21 |
from __future__ import annotations |
| 22 |
|
| 23 |
from datetime import UTC, datetime |
| 24 |
from typing import Literal |
| 25 |
|
| 26 |
from pydantic import BaseModel, ConfigDict, Field |
| 27 |
|
| 28 |
from dlm.base_models.errors import GatedModelError |
| 29 |
from dlm.base_models.schema import BaseModelSpec |
| 30 |
|
| 31 |
AcceptanceVia = Literal["cli_flag", "interactive", "frontmatter"] |
| 32 |
|
| 33 |
|
| 34 |
class LicenseAcceptance(BaseModel): |
| 35 |
"""One acceptance record for a gated base. |
| 36 |
|
| 37 |
`via` records *how* acceptance was captured: |
| 38 |
|
| 39 |
- `"cli_flag"` — `--i-accept-license` on init/train (explicit). |
| 40 |
- `"interactive"` — `y/N` prompt. |
| 41 |
- `"frontmatter"` — persisted in `.dlm` frontmatter. |
| 42 |
|
| 43 |
The `license_url` is captured at acceptance time so a later |
| 44 |
upstream URL change is auditable (the recorded URL stays the |
| 45 |
user's contract; drift is visible without rewriting history). |
| 46 |
""" |
| 47 |
|
| 48 |
model_config = ConfigDict(extra="forbid", frozen=True) |
| 49 |
|
| 50 |
accepted_at: datetime |
| 51 |
license_url: str = Field(..., min_length=1) |
| 52 |
license_spdx: str = Field(..., min_length=1) |
| 53 |
via: AcceptanceVia |
| 54 |
|
| 55 |
|
| 56 |
def _utcnow() -> datetime: |
| 57 |
"""Tz-naive UTC, microseconds zeroed — matches Manifest's clock.""" |
| 58 |
return datetime.now(UTC).replace(tzinfo=None, microsecond=0) |
| 59 |
|
| 60 |
|
| 61 |
def is_gated(spec: BaseModelSpec) -> bool: |
| 62 |
"""Return True iff this base requires explicit acceptance. |
| 63 |
|
| 64 |
Thin wrapper over `spec.requires_acceptance` that callers import |
| 65 |
from one named entry point, keeping the "is this model gated?" |
| 66 |
question symmetric with `require_acceptance` below. |
| 67 |
""" |
| 68 |
return spec.requires_acceptance |
| 69 |
|
| 70 |
|
| 71 |
def require_acceptance( |
| 72 |
spec: BaseModelSpec, |
| 73 |
*, |
| 74 |
accept_license: bool, |
| 75 |
via: AcceptanceVia, |
| 76 |
) -> LicenseAcceptance | None: |
| 77 |
"""Produce a `LicenseAcceptance` for `spec` or raise `GatedModelError`. |
| 78 |
|
| 79 |
Non-gated specs return `None` — no record needed. |
| 80 |
|
| 81 |
Gated specs with `accept_license=True` return a fresh acceptance |
| 82 |
stamped at the current UTC minute. Gated specs with |
| 83 |
`accept_license=False` raise `GatedModelError` so the caller (CLI) |
| 84 |
can surface the license URL + flag instruction. |
| 85 |
""" |
| 86 |
if not is_gated(spec): |
| 87 |
return None |
| 88 |
if not accept_license: |
| 89 |
raise GatedModelError(spec.hf_id, spec.license_url) |
| 90 |
if spec.license_url is None: |
| 91 |
# Defensive: `requires_acceptance=True` without a URL is a |
| 92 |
# registry bug. Fail loud so the registry tests catch it. |
| 93 |
raise GatedModelError(spec.hf_id, license_url=None) |
| 94 |
return LicenseAcceptance( |
| 95 |
accepted_at=_utcnow(), |
| 96 |
license_url=spec.license_url, |
| 97 |
license_spdx=spec.license_spdx, |
| 98 |
via=via, |
| 99 |
) |