Python · 5877 bytes Raw Blame History
1 """In-place frontmatter migration — the write path.
2
3 Complements `dlm.doc.versioned.validate_versioned` (the *read* path
4 used by `parse_file`). The read path migrates in memory and never
5 touches the source file; the write path is what flips a document's
6 on-disk `dlm_version` and rewrites the frontmatter.
7
8 `migrate_file(path, ...)` is the single entry point. The CLI shell
9 in `dlm.cli.commands.migrate_cmd` is a thin wrapper over this.
10
11 Flow:
12
13 1. Read raw text (UTF-8 strict, LF-normalized — the project-wide
14 contract from `dlm.io.text.read_text`).
15 2. Split frontmatter and body on the `---` delimiters.
16 3. YAML-parse the raw frontmatter into a dict.
17 4. Run `apply_pending` up to `CURRENT_SCHEMA_VERSION`.
18 5. If nothing applied → return `[]` (idempotent exit).
19 6. Otherwise: Pydantic-validate the migrated dict, serialize the new
20 frontmatter, join with the original body text verbatim, and atomically
21 replace `path` (after writing `<path>.bak` unless `no_backup=True`).
22 """
23
24 from __future__ import annotations
25
26 import shutil
27 from dataclasses import dataclass
28 from pathlib import Path
29
30 import yaml
31
32 from dlm.doc.errors import FrontmatterError
33 from dlm.doc.migrations.dispatch import apply_pending
34 from dlm.doc.schema import CURRENT_SCHEMA_VERSION, DlmFrontmatter
35 from dlm.doc.sections import Section, SectionType
36 from dlm.doc.serializer import serialize
37 from dlm.io.atomic import write_text
38 from dlm.io.text import read_text
39
40 _FRONTMATTER_DELIM = "---"
41
42
43 @dataclass(frozen=True)
44 class MigrationResult:
45 """Outcome of a `migrate_file` call."""
46
47 path: Path
48 applied: list[int]
49 target_version: int
50 backup_path: Path | None
51 wrote: bool
52
53
54 def migrate_file(
55 path: Path,
56 *,
57 dry_run: bool = False,
58 no_backup: bool = False,
59 ) -> MigrationResult:
60 """Migrate `path` up to `CURRENT_SCHEMA_VERSION`.
61
62 - `dry_run=True` reports what *would* run without writing.
63 - `no_backup=True` skips the `<path>.bak` safety copy.
64
65 Returns a `MigrationResult`. `applied=[]` means the document was
66 already at or beyond `CURRENT_SCHEMA_VERSION` — a clean no-op.
67 """
68 text = read_text(path)
69 yaml_text, body_text = _split_for_migrate(text, path=path)
70
71 try:
72 raw = yaml.safe_load(yaml_text) if yaml_text.strip() else {}
73 except yaml.YAMLError as exc:
74 raise FrontmatterError(
75 f"invalid YAML: {exc}",
76 path=path,
77 line=2,
78 ) from exc
79
80 if not isinstance(raw, dict):
81 raise FrontmatterError(
82 f"frontmatter must be a mapping, got {type(raw).__name__}",
83 path=path,
84 line=2,
85 )
86
87 migrated, applied = apply_pending(raw, target_version=CURRENT_SCHEMA_VERSION)
88 if not applied:
89 return MigrationResult(
90 path=path,
91 applied=[],
92 target_version=CURRENT_SCHEMA_VERSION,
93 backup_path=None,
94 wrote=False,
95 )
96
97 # Validate post-migration dict against the current schema so a bad
98 # migrator can't silently smear garbage into the document.
99 fm = DlmFrontmatter.model_validate(migrated)
100 new_text = _rejoin(fm, body_text)
101
102 if dry_run:
103 return MigrationResult(
104 path=path,
105 applied=applied,
106 target_version=CURRENT_SCHEMA_VERSION,
107 backup_path=None,
108 wrote=False,
109 )
110
111 backup_path: Path | None = None
112 if not no_backup:
113 backup_path = path.with_suffix(path.suffix + ".bak")
114 shutil.copy2(path, backup_path)
115
116 write_text(path, new_text)
117 return MigrationResult(
118 path=path,
119 applied=applied,
120 target_version=CURRENT_SCHEMA_VERSION,
121 backup_path=backup_path,
122 wrote=True,
123 )
124
125
126 # --- internals ------------------------------------------------------------
127
128
129 def _split_for_migrate(text: str, *, path: Path) -> tuple[str, str]:
130 """Split `text` into (frontmatter_yaml, body_text).
131
132 Mirrors the parser's frontmatter split but does not track body line
133 numbers — the body is returned verbatim for rewrite purposes.
134 """
135 lines = text.split("\n")
136 if not lines or lines[0] != _FRONTMATTER_DELIM:
137 raise FrontmatterError(
138 "expected '---' on line 1 to open frontmatter",
139 path=path,
140 line=1,
141 col=1,
142 )
143 for i in range(1, len(lines)):
144 if lines[i] == _FRONTMATTER_DELIM:
145 yaml_text = "\n".join(lines[1:i])
146 body = "\n".join(lines[i + 1 :])
147 return yaml_text, body
148 raise FrontmatterError(
149 "no closing '---' found for frontmatter block",
150 path=path,
151 line=1,
152 )
153
154
155 def _rejoin(fm: DlmFrontmatter, body_text: str) -> str:
156 """Re-assemble a `.dlm` file from a migrated frontmatter + raw body.
157
158 Preserves the body verbatim (migration never touches section content);
159 the serializer is only invoked for the frontmatter header. Ensures a
160 single trailing newline on the combined output.
161 """
162 from dlm.doc.parser import ParsedDlm
163
164 # ParsedDlm serializer emits frontmatter + "\n" + sections. We bypass
165 # section serialization by handing an empty sections tuple and
166 # concatenating the raw body manually.
167 empty = ParsedDlm(frontmatter=fm, sections=_empty_sections())
168 header = serialize(empty) # always ends with "\n"
169
170 # Normalize leading/trailing whitespace on the body to match the
171 # canonical layout: exactly one blank line between `---\n` closer
172 # and the first body line, and exactly one trailing newline.
173 body = body_text.lstrip("\n").rstrip("\n")
174 if body:
175 return f"{header}\n{body}\n"
176 return header
177
178
179 def _empty_sections() -> tuple[Section, ...]:
180 """Placeholder tuple for the serializer call; actual body is spliced."""
181 _ = SectionType # imported for typing; unused here
182 return ()