Python · 6472 bytes Raw Blame History
1 """Resolve per-file effective config from parent directive + discovered `.dlm/`.
2
3 Decision order (applied top-down; last match wins within the exclude
4 bucket, matching `.gitignore`'s semantics exactly):
5
6 1. Parent directive's `include` / `exclude` (outermost shell)
7 2. Default-exclude set, unless the nearest `training.yaml` sets
8 `exclude_defaults: false`
9 3. For each ancestor anchor (shallowest → deepest):
10 a. `training.yaml.exclude` patterns
11 b. `.dlm/ignore` rules (including `!negation`)
12 Later rules can un-exclude earlier ones via negation.
13 4. Include resolution: nearest-ancestor `training.yaml.include` if
14 non-empty, else parent-directive include.
15 5. Metadata: shallow-to-deep merge of every `training.yaml.metadata`
16 on the ancestor path; deeper keys overwrite shallower on collision.
17
18 Returns `None` from `is_included` when the file is either not matched
19 by any include pattern OR matches a final exclude — the caller treats
20 None as "skip this file".
21 """
22
23 from __future__ import annotations
24
25 from collections.abc import Mapping
26 from dataclasses import dataclass
27 from pathlib import Path
28
29 from dlm.directives.defaults import DEFAULT_EXCLUDES
30 from dlm.directives.discovery import DiscoveredConfig
31 from dlm.directives.ignore_parser import matches as ignore_matches
32 from dlm.directives.safety import _compile_glob
33 from dlm.doc.schema import SourceDirective
34
35
36 @dataclass(frozen=True)
37 class EffectiveConfig:
38 """Resolved rules for one file within a directive walk.
39
40 `included` collapses the include/exclude verdict: True means the
41 file should be ingested, False means skip. `tags` is the merged
42 metadata dict to flow onto the synthesized Section.
43 """
44
45 included: bool
46 tags: Mapping[str, str]
47
48
49 def ancestors_of(
50 file_path: Path, discovered: tuple[DiscoveredConfig, ...]
51 ) -> tuple[DiscoveredConfig, ...]:
52 """Return DiscoveredConfigs whose anchor is an ancestor of file_path,
53 sorted shallowest → deepest. Includes the direct-parent anchor."""
54 abs_file = file_path.resolve()
55 result = [d for d in discovered if _is_ancestor(d.anchor.resolve(), abs_file)]
56 result.sort(key=lambda d: len(d.anchor.as_posix()))
57 return tuple(result)
58
59
60 def _is_ancestor(anchor: Path, file_path: Path) -> bool:
61 try:
62 file_path.relative_to(anchor)
63 except ValueError:
64 return False
65 return True
66
67
68 def effective_config_for(
69 file_path: Path,
70 *,
71 source_root: Path,
72 discovered: tuple[DiscoveredConfig, ...],
73 parent_directive: SourceDirective,
74 is_dir: bool = False,
75 ) -> EffectiveConfig:
76 """Resolve the applicable rules for `file_path`.
77
78 `file_path` is the candidate file (already resolved from the
79 filesystem walk). `source_root` is the directive's resolved
80 root — used to compute paths relative to the include/exclude
81 glob space. `parent_directive` is the frontmatter directive
82 driving this walk; its `include`/`exclude` act as the outer shell.
83 """
84 ancestors = ancestors_of(file_path, discovered)
85 rel_to_root = _relpath(file_path, source_root)
86
87 # --- Include resolution -------------------------------------------
88 # Nearest-ancestor training.yaml with non-empty include wins;
89 # otherwise fall back to the parent directive's include.
90 effective_include: tuple[str, ...] = parent_directive.include
91 for d in reversed(ancestors):
92 if d.config is not None and d.config.include:
93 effective_include = d.config.include
94 break
95 included_by_positive = _matches_any(rel_to_root, effective_include)
96
97 # --- Exclude resolution (last-match-wins across all sources) ------
98 # Track a single boolean; flip it as we encounter matches. A
99 # `!negation` match flips it back to included. Final value after
100 # walking all sources is the verdict.
101 excluded = False
102
103 # Layer 1: parent directive's explicit excludes.
104 if _matches_any(rel_to_root, parent_directive.exclude):
105 excluded = True
106
107 # Layer 2: default excludes (unless opted out by nearest training.yaml).
108 apply_defaults = True
109 for d in reversed(ancestors):
110 if d.config is not None:
111 apply_defaults = d.config.exclude_defaults
112 break
113 if apply_defaults and _matches_any(rel_to_root, DEFAULT_EXCLUDES):
114 excluded = True
115
116 # Layer 3: per-anchor training.yaml excludes, shallowest → deepest.
117 for d in ancestors:
118 if d.config is not None:
119 rel_to_anchor = _relpath(file_path, d.anchor)
120 if _matches_any(rel_to_anchor, d.config.exclude):
121 excluded = True
122
123 # Layer 4: per-anchor .dlm/ignore rules, shallowest → deepest.
124 # Within a single file's rules, last match wins (including negation).
125 for d in ancestors:
126 if not d.ignore_rules:
127 continue
128 rel_to_anchor = _relpath(file_path, d.anchor)
129 for rule in d.ignore_rules:
130 if ignore_matches(rule, rel_to_anchor, is_dir=is_dir):
131 excluded = not rule.negate
132
133 # --- Metadata merge (shallow-to-deep) -----------------------------
134 tags: dict[str, str] = {}
135 for d in ancestors:
136 if d.config is not None and d.config.metadata:
137 tags.update(d.config.metadata)
138
139 return EffectiveConfig(
140 included=(included_by_positive and not excluded),
141 tags=tags,
142 )
143
144
145 def _matches_any(rel_path: str, patterns: tuple[str, ...]) -> bool:
146 """Positive match against a tuple of globs using the shared
147 `_compile_glob`. Empty tuple → False (callers handle the include
148 case where empty means "inherit from parent")."""
149 return any(_compile_glob(p).fullmatch(rel_path) is not None for p in patterns)
150
151
152 def _relpath(file_path: Path, anchor: Path) -> str:
153 """POSIX-form relative path. Handles two edge cases:
154
155 - Single-file directive: `file_path == anchor`. `relative_to`
156 returns `.` in that case, but the include/exclude globs expect
157 the filename, so we return `file_path.name`.
158 - Non-ancestor anchor: defensive fallback to `file_path.name`.
159 Shouldn't happen on the hot path, but returning the name keeps
160 basename-style globs (`*.py`) working.
161 """
162 resolved_file = file_path.resolve()
163 resolved_anchor = anchor.resolve()
164 if resolved_file == resolved_anchor:
165 return file_path.name
166 try:
167 return resolved_file.relative_to(resolved_anchor).as_posix()
168 except ValueError:
169 return file_path.name