| 1 |
"""Resolve per-file effective config from parent directive + discovered `.dlm/`. |
| 2 |
|
| 3 |
Decision order (applied top-down; last match wins within the exclude |
| 4 |
bucket, matching `.gitignore`'s semantics exactly): |
| 5 |
|
| 6 |
1. Parent directive's `include` / `exclude` (outermost shell) |
| 7 |
2. Default-exclude set, unless the nearest `training.yaml` sets |
| 8 |
`exclude_defaults: false` |
| 9 |
3. For each ancestor anchor (shallowest → deepest): |
| 10 |
a. `training.yaml.exclude` patterns |
| 11 |
b. `.dlm/ignore` rules (including `!negation`) |
| 12 |
Later rules can un-exclude earlier ones via negation. |
| 13 |
4. Include resolution: nearest-ancestor `training.yaml.include` if |
| 14 |
non-empty, else parent-directive include. |
| 15 |
5. Metadata: shallow-to-deep merge of every `training.yaml.metadata` |
| 16 |
on the ancestor path; deeper keys overwrite shallower on collision. |
| 17 |
|
| 18 |
Returns `None` from `is_included` when the file is either not matched |
| 19 |
by any include pattern OR matches a final exclude — the caller treats |
| 20 |
None as "skip this file". |
| 21 |
""" |
| 22 |
|
| 23 |
from __future__ import annotations |
| 24 |
|
| 25 |
from collections.abc import Mapping |
| 26 |
from dataclasses import dataclass |
| 27 |
from pathlib import Path |
| 28 |
|
| 29 |
from dlm.directives.defaults import DEFAULT_EXCLUDES |
| 30 |
from dlm.directives.discovery import DiscoveredConfig |
| 31 |
from dlm.directives.ignore_parser import matches as ignore_matches |
| 32 |
from dlm.directives.safety import _compile_glob |
| 33 |
from dlm.doc.schema import SourceDirective |
| 34 |
|
| 35 |
|
| 36 |
@dataclass(frozen=True) |
| 37 |
class EffectiveConfig: |
| 38 |
"""Resolved rules for one file within a directive walk. |
| 39 |
|
| 40 |
`included` collapses the include/exclude verdict: True means the |
| 41 |
file should be ingested, False means skip. `tags` is the merged |
| 42 |
metadata dict to flow onto the synthesized Section. |
| 43 |
""" |
| 44 |
|
| 45 |
included: bool |
| 46 |
tags: Mapping[str, str] |
| 47 |
|
| 48 |
|
| 49 |
def ancestors_of( |
| 50 |
file_path: Path, discovered: tuple[DiscoveredConfig, ...] |
| 51 |
) -> tuple[DiscoveredConfig, ...]: |
| 52 |
"""Return DiscoveredConfigs whose anchor is an ancestor of file_path, |
| 53 |
sorted shallowest → deepest. Includes the direct-parent anchor.""" |
| 54 |
abs_file = file_path.resolve() |
| 55 |
result = [d for d in discovered if _is_ancestor(d.anchor.resolve(), abs_file)] |
| 56 |
result.sort(key=lambda d: len(d.anchor.as_posix())) |
| 57 |
return tuple(result) |
| 58 |
|
| 59 |
|
| 60 |
def _is_ancestor(anchor: Path, file_path: Path) -> bool: |
| 61 |
try: |
| 62 |
file_path.relative_to(anchor) |
| 63 |
except ValueError: |
| 64 |
return False |
| 65 |
return True |
| 66 |
|
| 67 |
|
| 68 |
def effective_config_for( |
| 69 |
file_path: Path, |
| 70 |
*, |
| 71 |
source_root: Path, |
| 72 |
discovered: tuple[DiscoveredConfig, ...], |
| 73 |
parent_directive: SourceDirective, |
| 74 |
is_dir: bool = False, |
| 75 |
) -> EffectiveConfig: |
| 76 |
"""Resolve the applicable rules for `file_path`. |
| 77 |
|
| 78 |
`file_path` is the candidate file (already resolved from the |
| 79 |
filesystem walk). `source_root` is the directive's resolved |
| 80 |
root — used to compute paths relative to the include/exclude |
| 81 |
glob space. `parent_directive` is the frontmatter directive |
| 82 |
driving this walk; its `include`/`exclude` act as the outer shell. |
| 83 |
""" |
| 84 |
ancestors = ancestors_of(file_path, discovered) |
| 85 |
rel_to_root = _relpath(file_path, source_root) |
| 86 |
|
| 87 |
# --- Include resolution ------------------------------------------- |
| 88 |
# Nearest-ancestor training.yaml with non-empty include wins; |
| 89 |
# otherwise fall back to the parent directive's include. |
| 90 |
effective_include: tuple[str, ...] = parent_directive.include |
| 91 |
for d in reversed(ancestors): |
| 92 |
if d.config is not None and d.config.include: |
| 93 |
effective_include = d.config.include |
| 94 |
break |
| 95 |
included_by_positive = _matches_any(rel_to_root, effective_include) |
| 96 |
|
| 97 |
# --- Exclude resolution (last-match-wins across all sources) ------ |
| 98 |
# Track a single boolean; flip it as we encounter matches. A |
| 99 |
# `!negation` match flips it back to included. Final value after |
| 100 |
# walking all sources is the verdict. |
| 101 |
excluded = False |
| 102 |
|
| 103 |
# Layer 1: parent directive's explicit excludes. |
| 104 |
if _matches_any(rel_to_root, parent_directive.exclude): |
| 105 |
excluded = True |
| 106 |
|
| 107 |
# Layer 2: default excludes (unless opted out by nearest training.yaml). |
| 108 |
apply_defaults = True |
| 109 |
for d in reversed(ancestors): |
| 110 |
if d.config is not None: |
| 111 |
apply_defaults = d.config.exclude_defaults |
| 112 |
break |
| 113 |
if apply_defaults and _matches_any(rel_to_root, DEFAULT_EXCLUDES): |
| 114 |
excluded = True |
| 115 |
|
| 116 |
# Layer 3: per-anchor training.yaml excludes, shallowest → deepest. |
| 117 |
for d in ancestors: |
| 118 |
if d.config is not None: |
| 119 |
rel_to_anchor = _relpath(file_path, d.anchor) |
| 120 |
if _matches_any(rel_to_anchor, d.config.exclude): |
| 121 |
excluded = True |
| 122 |
|
| 123 |
# Layer 4: per-anchor .dlm/ignore rules, shallowest → deepest. |
| 124 |
# Within a single file's rules, last match wins (including negation). |
| 125 |
for d in ancestors: |
| 126 |
if not d.ignore_rules: |
| 127 |
continue |
| 128 |
rel_to_anchor = _relpath(file_path, d.anchor) |
| 129 |
for rule in d.ignore_rules: |
| 130 |
if ignore_matches(rule, rel_to_anchor, is_dir=is_dir): |
| 131 |
excluded = not rule.negate |
| 132 |
|
| 133 |
# --- Metadata merge (shallow-to-deep) ----------------------------- |
| 134 |
tags: dict[str, str] = {} |
| 135 |
for d in ancestors: |
| 136 |
if d.config is not None and d.config.metadata: |
| 137 |
tags.update(d.config.metadata) |
| 138 |
|
| 139 |
return EffectiveConfig( |
| 140 |
included=(included_by_positive and not excluded), |
| 141 |
tags=tags, |
| 142 |
) |
| 143 |
|
| 144 |
|
| 145 |
def _matches_any(rel_path: str, patterns: tuple[str, ...]) -> bool: |
| 146 |
"""Positive match against a tuple of globs using the shared |
| 147 |
`_compile_glob`. Empty tuple → False (callers handle the include |
| 148 |
case where empty means "inherit from parent").""" |
| 149 |
return any(_compile_glob(p).fullmatch(rel_path) is not None for p in patterns) |
| 150 |
|
| 151 |
|
| 152 |
def _relpath(file_path: Path, anchor: Path) -> str: |
| 153 |
"""POSIX-form relative path. Handles two edge cases: |
| 154 |
|
| 155 |
- Single-file directive: `file_path == anchor`. `relative_to` |
| 156 |
returns `.` in that case, but the include/exclude globs expect |
| 157 |
the filename, so we return `file_path.name`. |
| 158 |
- Non-ancestor anchor: defensive fallback to `file_path.name`. |
| 159 |
Shouldn't happen on the hot path, but returning the name keeps |
| 160 |
basename-style globs (`*.py`) working. |
| 161 |
""" |
| 162 |
resolved_file = file_path.resolve() |
| 163 |
resolved_anchor = anchor.resolve() |
| 164 |
if resolved_file == resolved_anchor: |
| 165 |
return file_path.name |
| 166 |
try: |
| 167 |
return resolved_file.relative_to(resolved_anchor).as_posix() |
| 168 |
except ValueError: |
| 169 |
return file_path.name |