| 1 |
"""In-place frontmatter migration — the write path. |
| 2 |
|
| 3 |
Complements `dlm.doc.versioned.validate_versioned` (the *read* path |
| 4 |
used by `parse_file`). The read path migrates in memory and never |
| 5 |
touches the source file; the write path is what flips a document's |
| 6 |
on-disk `dlm_version` and rewrites the frontmatter. |
| 7 |
|
| 8 |
`migrate_file(path, ...)` is the single entry point. The CLI shell |
| 9 |
in `dlm.cli.commands.migrate_cmd` is a thin wrapper over this. |
| 10 |
|
| 11 |
Flow: |
| 12 |
|
| 13 |
1. Read raw text (UTF-8 strict, LF-normalized — the project-wide |
| 14 |
contract from `dlm.io.text.read_text`). |
| 15 |
2. Split frontmatter and body on the `---` delimiters. |
| 16 |
3. YAML-parse the raw frontmatter into a dict. |
| 17 |
4. Run `apply_pending` up to `CURRENT_SCHEMA_VERSION`. |
| 18 |
5. If nothing applied → return `[]` (idempotent exit). |
| 19 |
6. Otherwise: Pydantic-validate the migrated dict, serialize the new |
| 20 |
frontmatter, join with the original body text verbatim, and atomically |
| 21 |
replace `path` (after writing `<path>.bak` unless `no_backup=True`). |
| 22 |
""" |
| 23 |
|
| 24 |
from __future__ import annotations |
| 25 |
|
| 26 |
import shutil |
| 27 |
from dataclasses import dataclass |
| 28 |
from pathlib import Path |
| 29 |
|
| 30 |
import yaml |
| 31 |
|
| 32 |
from dlm.doc.errors import FrontmatterError |
| 33 |
from dlm.doc.migrations.dispatch import apply_pending |
| 34 |
from dlm.doc.schema import CURRENT_SCHEMA_VERSION, DlmFrontmatter |
| 35 |
from dlm.doc.sections import Section, SectionType |
| 36 |
from dlm.doc.serializer import serialize |
| 37 |
from dlm.io.atomic import write_text |
| 38 |
from dlm.io.text import read_text |
| 39 |
|
| 40 |
_FRONTMATTER_DELIM = "---" |
| 41 |
|
| 42 |
|
| 43 |
@dataclass(frozen=True) |
| 44 |
class MigrationResult: |
| 45 |
"""Outcome of a `migrate_file` call.""" |
| 46 |
|
| 47 |
path: Path |
| 48 |
applied: list[int] |
| 49 |
target_version: int |
| 50 |
backup_path: Path | None |
| 51 |
wrote: bool |
| 52 |
|
| 53 |
|
| 54 |
def migrate_file( |
| 55 |
path: Path, |
| 56 |
*, |
| 57 |
dry_run: bool = False, |
| 58 |
no_backup: bool = False, |
| 59 |
) -> MigrationResult: |
| 60 |
"""Migrate `path` up to `CURRENT_SCHEMA_VERSION`. |
| 61 |
|
| 62 |
- `dry_run=True` reports what *would* run without writing. |
| 63 |
- `no_backup=True` skips the `<path>.bak` safety copy. |
| 64 |
|
| 65 |
Returns a `MigrationResult`. `applied=[]` means the document was |
| 66 |
already at or beyond `CURRENT_SCHEMA_VERSION` — a clean no-op. |
| 67 |
""" |
| 68 |
text = read_text(path) |
| 69 |
yaml_text, body_text = _split_for_migrate(text, path=path) |
| 70 |
|
| 71 |
try: |
| 72 |
raw = yaml.safe_load(yaml_text) if yaml_text.strip() else {} |
| 73 |
except yaml.YAMLError as exc: |
| 74 |
raise FrontmatterError( |
| 75 |
f"invalid YAML: {exc}", |
| 76 |
path=path, |
| 77 |
line=2, |
| 78 |
) from exc |
| 79 |
|
| 80 |
if not isinstance(raw, dict): |
| 81 |
raise FrontmatterError( |
| 82 |
f"frontmatter must be a mapping, got {type(raw).__name__}", |
| 83 |
path=path, |
| 84 |
line=2, |
| 85 |
) |
| 86 |
|
| 87 |
migrated, applied = apply_pending(raw, target_version=CURRENT_SCHEMA_VERSION) |
| 88 |
if not applied: |
| 89 |
return MigrationResult( |
| 90 |
path=path, |
| 91 |
applied=[], |
| 92 |
target_version=CURRENT_SCHEMA_VERSION, |
| 93 |
backup_path=None, |
| 94 |
wrote=False, |
| 95 |
) |
| 96 |
|
| 97 |
# Validate post-migration dict against the current schema so a bad |
| 98 |
# migrator can't silently smear garbage into the document. |
| 99 |
fm = DlmFrontmatter.model_validate(migrated) |
| 100 |
new_text = _rejoin(fm, body_text) |
| 101 |
|
| 102 |
if dry_run: |
| 103 |
return MigrationResult( |
| 104 |
path=path, |
| 105 |
applied=applied, |
| 106 |
target_version=CURRENT_SCHEMA_VERSION, |
| 107 |
backup_path=None, |
| 108 |
wrote=False, |
| 109 |
) |
| 110 |
|
| 111 |
backup_path: Path | None = None |
| 112 |
if not no_backup: |
| 113 |
backup_path = path.with_suffix(path.suffix + ".bak") |
| 114 |
shutil.copy2(path, backup_path) |
| 115 |
|
| 116 |
write_text(path, new_text) |
| 117 |
return MigrationResult( |
| 118 |
path=path, |
| 119 |
applied=applied, |
| 120 |
target_version=CURRENT_SCHEMA_VERSION, |
| 121 |
backup_path=backup_path, |
| 122 |
wrote=True, |
| 123 |
) |
| 124 |
|
| 125 |
|
| 126 |
# --- internals ------------------------------------------------------------ |
| 127 |
|
| 128 |
|
| 129 |
def _split_for_migrate(text: str, *, path: Path) -> tuple[str, str]: |
| 130 |
"""Split `text` into (frontmatter_yaml, body_text). |
| 131 |
|
| 132 |
Mirrors the parser's frontmatter split but does not track body line |
| 133 |
numbers — the body is returned verbatim for rewrite purposes. |
| 134 |
""" |
| 135 |
lines = text.split("\n") |
| 136 |
if not lines or lines[0] != _FRONTMATTER_DELIM: |
| 137 |
raise FrontmatterError( |
| 138 |
"expected '---' on line 1 to open frontmatter", |
| 139 |
path=path, |
| 140 |
line=1, |
| 141 |
col=1, |
| 142 |
) |
| 143 |
for i in range(1, len(lines)): |
| 144 |
if lines[i] == _FRONTMATTER_DELIM: |
| 145 |
yaml_text = "\n".join(lines[1:i]) |
| 146 |
body = "\n".join(lines[i + 1 :]) |
| 147 |
return yaml_text, body |
| 148 |
raise FrontmatterError( |
| 149 |
"no closing '---' found for frontmatter block", |
| 150 |
path=path, |
| 151 |
line=1, |
| 152 |
) |
| 153 |
|
| 154 |
|
| 155 |
def _rejoin(fm: DlmFrontmatter, body_text: str) -> str: |
| 156 |
"""Re-assemble a `.dlm` file from a migrated frontmatter + raw body. |
| 157 |
|
| 158 |
Preserves the body verbatim (migration never touches section content); |
| 159 |
the serializer is only invoked for the frontmatter header. Ensures a |
| 160 |
single trailing newline on the combined output. |
| 161 |
""" |
| 162 |
from dlm.doc.parser import ParsedDlm |
| 163 |
|
| 164 |
# ParsedDlm serializer emits frontmatter + "\n" + sections. We bypass |
| 165 |
# section serialization by handing an empty sections tuple and |
| 166 |
# concatenating the raw body manually. |
| 167 |
empty = ParsedDlm(frontmatter=fm, sections=_empty_sections()) |
| 168 |
header = serialize(empty) # always ends with "\n" |
| 169 |
|
| 170 |
# Normalize leading/trailing whitespace on the body to match the |
| 171 |
# canonical layout: exactly one blank line between `---\n` closer |
| 172 |
# and the first body line, and exactly one trailing newline. |
| 173 |
body = body_text.lstrip("\n").rstrip("\n") |
| 174 |
if body: |
| 175 |
return f"{header}\n{body}\n" |
| 176 |
return header |
| 177 |
|
| 178 |
|
| 179 |
def _empty_sections() -> tuple[Section, ...]: |
| 180 |
"""Placeholder tuple for the serializer call; actual body is spliced.""" |
| 181 |
_ = SectionType # imported for typing; unused here |
| 182 |
return () |