Python · 4717 bytes Raw Blame History
1 """Walk a source tree and collect every `.dlm/` configuration.
2
3 One `DiscoveredConfig` per `.dlm/` directory found under the walk
4 root. Each config aggregates both `.dlm/training.yaml` (parsed as
5 `DlmTrainingConfig`) and `.dlm/ignore` (parsed as a tuple of
6 `IgnoreRule`). Either or both may be absent — the presence of the
7 `.dlm/` directory alone is enough to produce a record (useful when a
8 user wants a pure drive-by `.dlm/ignore` without writing YAML).
9
10 Results are sorted by anchor path length ascending, so parents
11 appear before descendants. This matches the resolution order in
12 `dlm.directives.merge.effective_config_for`.
13
14 Malformed YAML or broken lines in `.dlm/ignore` log + degrade — the
15 walk never fails. The CLI has no way to recover from a mid-train
16 discovery crash, so tolerance here is load-bearing.
17 """
18
19 from __future__ import annotations
20
21 import logging
22 from dataclasses import dataclass
23 from pathlib import Path
24
25 import yaml
26 from pydantic import ValidationError
27
28 from dlm.directives.ignore_parser import IgnoreRule, parse_ignore_file
29 from dlm.directives.schema import DlmTrainingConfig
30 from dlm.io.text import DlmEncodingError, read_text
31
32 _LOG = logging.getLogger(__name__)
33
34 _CONFIG_FILENAME = "training.yaml"
35 _IGNORE_FILENAME = "ignore"
36
37
38 @dataclass(frozen=True)
39 class DiscoveredConfig:
40 """Aggregated `.dlm/` config at one anchor directory.
41
42 `anchor` is the directory that *contains* the `.dlm/` dir (i.e.
43 the repo root, or a subtree root). Relative paths in
44 `config.include` / `config.exclude` and ignore rules resolve
45 against this anchor.
46
47 Both `config` and `ignore_rules` can be empty — a bare `.dlm/`
48 directory with no files inside still produces a (no-op)
49 DiscoveredConfig, letting users mark subtrees explicitly without
50 writing YAML.
51 """
52
53 anchor: Path
54 config: DlmTrainingConfig | None
55 ignore_rules: tuple[IgnoreRule, ...]
56
57
58 def discover_configs(root: Path) -> tuple[DiscoveredConfig, ...]:
59 """Walk `root` top-down and return a `DiscoveredConfig` per `.dlm/`.
60
61 `root` itself is included — if `<root>/.dlm/` exists, it becomes
62 the first (shallowest) discovered config. Each deeper `.dlm/`
63 dir produces an additional record.
64
65 Results are sorted by anchor path depth ascending so callers
66 iterating can apply parent rules before child rules.
67 """
68 discovered: list[DiscoveredConfig] = []
69
70 if not root.is_dir():
71 return ()
72
73 for dlm_dir in sorted(root.rglob(".dlm")):
74 if not dlm_dir.is_dir():
75 continue
76 anchor = dlm_dir.parent
77 config = _load_training_yaml(dlm_dir / _CONFIG_FILENAME)
78 ignore_rules = _load_ignore(dlm_dir / _IGNORE_FILENAME)
79 discovered.append(DiscoveredConfig(anchor=anchor, config=config, ignore_rules=ignore_rules))
80
81 discovered.sort(key=lambda d: len(d.anchor.as_posix()))
82 return tuple(discovered)
83
84
85 def _load_training_yaml(path: Path) -> DlmTrainingConfig | None:
86 """Load + validate a `.dlm/training.yaml`. Missing file → None.
87
88 Malformed YAML, schema violations, or encoding errors log one
89 warning and return None. The anchor still produces a
90 DiscoveredConfig (just with `config=None`), so a neighboring
91 `.dlm/ignore` at the same anchor keeps working.
92 """
93 if not path.is_file():
94 return None
95 try:
96 text = read_text(path)
97 except DlmEncodingError as exc:
98 _LOG.warning("discovery: %s: not UTF-8 (%s); skipping config", path, exc)
99 return None
100
101 try:
102 raw = yaml.safe_load(text) if text.strip() else {}
103 except yaml.YAMLError as exc:
104 _LOG.warning("discovery: %s: invalid YAML (%s); skipping config", path, exc)
105 return None
106
107 if raw is None:
108 raw = {}
109 if not isinstance(raw, dict):
110 _LOG.warning(
111 "discovery: %s: top-level must be a mapping, got %s; skipping config",
112 path,
113 type(raw).__name__,
114 )
115 return None
116
117 try:
118 return DlmTrainingConfig.model_validate(raw)
119 except ValidationError as exc:
120 _LOG.warning("discovery: %s: schema violation (%s); skipping config", path, exc)
121 return None
122
123
124 def _load_ignore(path: Path) -> tuple[IgnoreRule, ...]:
125 """Load + parse a `.dlm/ignore`. Missing file → empty tuple.
126
127 The parser itself never raises; malformed lines log + skip. An
128 unreadable file (encoding error) logs once and degrades to empty
129 rules.
130 """
131 if not path.is_file():
132 return ()
133 try:
134 text = read_text(path)
135 except DlmEncodingError as exc:
136 _LOG.warning("discovery: %s: not UTF-8 (%s); skipping ignore", path, exc)
137 return ()
138 return parse_ignore_file(text)