Python · 3452 bytes Raw Blame History
1 """License acceptance records for gated base models.
2
3 The `BaseModelSpec` schema already carries `requires_acceptance`,
4 `redistributable`, `license_spdx`, and `license_url`. This module adds
5 the *acceptance record* — a small Pydantic model that stores "user X
6 accepted license Y at time T via path Z", plus a helper that validates
7 an `accept_license` flag against the spec.
8
9 `LicenseAcceptance` rides on two load-bearing files:
10
11 - `manifest.json.license_acceptance`: the per-store durable record;
12 read on every subsequent `dlm train` to verify the acceptance
13 fingerprint is still present.
14 - Per-store `dlm.lock.license_acceptance`: the determinism-contract
15 mirror; divergence between the two triggers a lock re-check.
16
17 The interactive prompt in `dlm init` lives in the CLI layer; this
18 module ships the data types + helpers that prompt calls.
19 """
20
21 from __future__ import annotations
22
23 from datetime import UTC, datetime
24 from typing import Literal
25
26 from pydantic import BaseModel, ConfigDict, Field
27
28 from dlm.base_models.errors import GatedModelError
29 from dlm.base_models.schema import BaseModelSpec
30
31 AcceptanceVia = Literal["cli_flag", "interactive", "frontmatter"]
32
33
34 class LicenseAcceptance(BaseModel):
35 """One acceptance record for a gated base.
36
37 `via` records *how* acceptance was captured:
38
39 - `"cli_flag"` — `--i-accept-license` on init/train (explicit).
40 - `"interactive"` — `y/N` prompt.
41 - `"frontmatter"` — persisted in `.dlm` frontmatter.
42
43 The `license_url` is captured at acceptance time so a later
44 upstream URL change is auditable (the recorded URL stays the
45 user's contract; drift is visible without rewriting history).
46 """
47
48 model_config = ConfigDict(extra="forbid", frozen=True)
49
50 accepted_at: datetime
51 license_url: str = Field(..., min_length=1)
52 license_spdx: str = Field(..., min_length=1)
53 via: AcceptanceVia
54
55
56 def _utcnow() -> datetime:
57 """Tz-naive UTC, microseconds zeroed — matches Manifest's clock."""
58 return datetime.now(UTC).replace(tzinfo=None, microsecond=0)
59
60
61 def is_gated(spec: BaseModelSpec) -> bool:
62 """Return True iff this base requires explicit acceptance.
63
64 Thin wrapper over `spec.requires_acceptance` that callers import
65 from one named entry point, keeping the "is this model gated?"
66 question symmetric with `require_acceptance` below.
67 """
68 return spec.requires_acceptance
69
70
71 def require_acceptance(
72 spec: BaseModelSpec,
73 *,
74 accept_license: bool,
75 via: AcceptanceVia,
76 ) -> LicenseAcceptance | None:
77 """Produce a `LicenseAcceptance` for `spec` or raise `GatedModelError`.
78
79 Non-gated specs return `None` — no record needed.
80
81 Gated specs with `accept_license=True` return a fresh acceptance
82 stamped at the current UTC minute. Gated specs with
83 `accept_license=False` raise `GatedModelError` so the caller (CLI)
84 can surface the license URL + flag instruction.
85 """
86 if not is_gated(spec):
87 return None
88 if not accept_license:
89 raise GatedModelError(spec.hf_id, spec.license_url)
90 if spec.license_url is None:
91 # Defensive: `requires_acceptance=True` without a URL is a
92 # registry bug. Fail loud so the registry tests catch it.
93 raise GatedModelError(spec.hf_id, license_url=None)
94 return LicenseAcceptance(
95 accepted_at=_utcnow(),
96 license_url=spec.license_url,
97 license_spdx=spec.license_spdx,
98 via=via,
99 )