Python · 3766 bytes Raw Blame History
1 """`export_manifest.json` schema + atomic I/O.
2
3 One file per `exports/<quant>/` directory. Records:
4
5 - export target (`ollama`, `vllm`, `llama-server`, `mlx-serve`)
6 - quant level
7 - checksums of emitted GGUF artifacts
8 - pinned llama.cpp tag (so a future upstream bump can detect drift)
9 - created_at + created_by (dlm version)
10 - path to the generated Modelfile
11 - `ollama_name` the user registered with (if provided)
12
13 Frozen + `extra="forbid"` per the project's strict-schema norm. Byte-
14 identical JSON round-trip via `dlm.io.atomic.write_text`.
15 """
16
17 from __future__ import annotations
18
19 import hashlib
20 import json
21 from datetime import UTC, datetime
22 from pathlib import Path
23
24 from pydantic import BaseModel, ConfigDict, Field
25
26 from dlm.export.errors import ExportManifestError
27 from dlm.io.atomic import write_text
28
29 EXPORT_MANIFEST_FILENAME = "export_manifest.json"
30
31
32 class ExportArtifact(BaseModel):
33 """One emitted file: path relative to the export dir + sha256."""
34
35 model_config = ConfigDict(extra="forbid", frozen=True)
36
37 path: str = Field(..., min_length=1)
38 sha256: str = Field(..., min_length=64, max_length=64)
39 size_bytes: int = Field(..., ge=0)
40
41
42 class ExportManifest(BaseModel):
43 """Self-describing record of one export run."""
44
45 model_config = ConfigDict(extra="forbid", frozen=True)
46
47 target: str = Field("ollama", min_length=1)
48 quant: str = Field(..., min_length=1)
49 merged: bool = False
50 dequantized: bool = False
51 ollama_name: str | None = None
52 created_at: datetime
53 created_by: str = Field(..., description="dlm version that wrote this manifest.")
54 llama_cpp_tag: str | None = None
55 base_model_hf_id: str
56 base_model_revision: str
57 adapter_version: int = Field(..., ge=1)
58 artifacts: list[ExportArtifact] = Field(default_factory=list)
59
60
61 def compute_sha256(path: Path, *, chunk_size: int = 1 << 20) -> str:
62 """Streaming sha256 of `path`. Reused by the runner for every artifact."""
63 h = hashlib.sha256()
64 with path.open("rb") as fh:
65 while True:
66 chunk = fh.read(chunk_size)
67 if not chunk:
68 break
69 h.update(chunk)
70 return h.hexdigest()
71
72
73 def build_artifact(export_dir: Path, artifact_path: Path) -> ExportArtifact:
74 """Assemble an `ExportArtifact` entry from a path on disk."""
75 stat = artifact_path.stat()
76 relative = artifact_path.resolve().relative_to(export_dir.resolve())
77 return ExportArtifact(
78 path=str(relative),
79 sha256=compute_sha256(artifact_path),
80 size_bytes=stat.st_size,
81 )
82
83
84 def save_export_manifest(export_dir: Path, manifest: ExportManifest) -> Path:
85 """Atomic JSON write to `<export_dir>/export_manifest.json`."""
86 path = export_dir / EXPORT_MANIFEST_FILENAME
87 payload = manifest.model_dump(mode="json")
88 blob = json.dumps(payload, sort_keys=True, indent=2) + "\n"
89 write_text(path, blob)
90 return path
91
92
93 def load_export_manifest(export_dir: Path) -> ExportManifest:
94 """Read + validate the JSON manifest at `<export_dir>/export_manifest.json`."""
95 path = export_dir / EXPORT_MANIFEST_FILENAME
96 if not path.exists():
97 raise ExportManifestError(f"missing {path}")
98 try:
99 data = json.loads(path.read_text(encoding="utf-8"))
100 except (OSError, json.JSONDecodeError) as exc:
101 raise ExportManifestError(f"cannot parse {path}: {exc}") from exc
102 try:
103 return ExportManifest.model_validate(data)
104 except Exception as exc: # pydantic ValidationError, etc.
105 raise ExportManifestError(f"{path} has invalid shape: {exc}") from exc
106
107
108 def utc_now() -> datetime:
109 """Tz-naive UTC with microseconds zeroed — matches the project convention."""
110 return datetime.now(UTC).replace(tzinfo=None, microsecond=0)