Rust · 17076 bytes Raw Blame History
1 //! Glob pattern expansion
2 //!
3 //! Supports:
4 //! - Basic patterns: *, ?, [abc], [a-z]
5 //! - Recursive globbing: **
6 //! - Extended glob: !(pattern), ?(pattern), *(pattern), +(pattern), @(pattern)
7 //! - Dotfile handling
8
9 use std::fs;
10 use std::path::{Path, PathBuf};
11 use globset::{Glob, GlobBuilder, GlobMatcher};
12 use regex::Regex;
13
14 #[derive(Debug)]
15 pub struct GlobOptions {
16 /// Match dotfiles (files starting with .)
17 pub match_dotfiles: bool,
18 /// Allow recursive ** patterns
19 pub globstar: bool,
20 /// Return empty list if no matches (vs literal pattern)
21 pub nullglob: bool,
22 /// Enable extended glob patterns: !(pat), ?(pat), *(pat), +(pat), @(pat)
23 pub extglob: bool,
24 }
25
26 impl Default for GlobOptions {
27 fn default() -> Self {
28 Self {
29 match_dotfiles: false,
30 globstar: true,
31 nullglob: false,
32 extglob: false,
33 }
34 }
35 }
36
37 /// Extended glob pattern representation
38 #[derive(Debug)]
39 enum ExtGlobPattern {
40 /// No extended glob - use standard matching
41 Standard(String),
42 /// Negation pattern - requires two-stage filtering
43 Negation {
44 base_pattern: String,
45 exclude_regex: Regex,
46 },
47 }
48
49 /// Expand a glob pattern into matching file paths
50 pub fn expand_glob(pattern: &str, options: &GlobOptions) -> Result<Vec<String>, String> {
51 // Check for extended glob patterns first (before metacharacter check)
52 let is_extglob = options.extglob && (
53 pattern.contains("!(") || pattern.contains("?(") || pattern.contains("*(")
54 || pattern.contains("+(") || pattern.contains("@(")
55 );
56
57 // Check if pattern contains glob metacharacters
58 // Skip this check for extglob patterns as they need processing even without metacharacters
59 if !is_extglob && !has_glob_chars(pattern) {
60 // No unescaped glob characters - strip backslash escapes and return as literal
61 return Ok(vec![strip_backslash_escapes(pattern)]);
62 }
63
64 // Parse and convert extended glob patterns (if extglob is enabled)
65 let extglob_result = parse_extglob(pattern, options.extglob)?;
66
67 match extglob_result {
68 ExtGlobPattern::Standard(base_pattern) => {
69 // Standard glob - no negation filtering needed
70 expand_standard_glob(&base_pattern, options)
71 }
72 ExtGlobPattern::Negation { base_pattern, exclude_regex } => {
73 // Two-stage filtering for negation patterns
74 // Stage 1: Expand base pattern to get candidates
75 let candidates = expand_standard_glob(&base_pattern, options)?;
76
77 // Stage 2: Filter out matches of the exclude pattern
78 let filtered: Vec<String> = candidates
79 .into_iter()
80 .filter(|path| {
81 // Extract just the filename for matching
82 let filename = Path::new(path)
83 .file_name()
84 .and_then(|n| n.to_str())
85 .unwrap_or(path);
86
87 // Keep if it does NOT match the exclude pattern
88 !exclude_regex.is_match(filename)
89 })
90 .collect();
91
92 // Handle no matches
93 if filtered.is_empty() && !options.nullglob {
94 Ok(vec![pattern.to_string()])
95 } else {
96 Ok(filtered)
97 }
98 }
99 }
100 }
101
102 /// Expand a standard (non-negated) glob pattern
103 fn expand_standard_glob(pattern: &str, options: &GlobOptions) -> Result<Vec<String>, String> {
104 // Build glob matcher
105 let glob = GlobBuilder::new(pattern)
106 .literal_separator(false) // Allow * to match /
107 .build()
108 .map_err(|e| format!("Invalid glob pattern: {}", e))?;
109
110 let matcher = glob.compile_matcher();
111
112 // Get base directory and relative pattern
113 let (base_dir, rel_pattern) = split_pattern(pattern);
114
115 // Expand the pattern
116 let mut matches = Vec::new();
117 expand_pattern(&base_dir, &rel_pattern, &matcher, options, &mut matches)?;
118
119 // Sort results for consistency
120 matches.sort();
121
122 // Handle no matches
123 if matches.is_empty() {
124 if options.nullglob {
125 Ok(vec![])
126 } else {
127 // Return literal pattern if no matches
128 Ok(vec![pattern.to_string()])
129 }
130 } else {
131 Ok(matches)
132 }
133 }
134
135 /// Check if a pattern contains unescaped glob metacharacters or extended glob patterns
136 /// Backslash-escaped characters (\*, \?, etc.) are not counted as glob chars
137 fn has_glob_chars(s: &str) -> bool {
138 let mut chars = s.chars().peekable();
139 while let Some(ch) = chars.next() {
140 if ch == '\\' {
141 // Skip the next character (it's escaped)
142 chars.next();
143 continue;
144 }
145 if ch == '*' || ch == '?' || ch == '[' || ch == ']' {
146 return true;
147 }
148 }
149 // Check for extended glob patterns (they can't be easily escaped)
150 s.contains("!(") || s.contains("?(") || s.contains("*(")
151 || s.contains("+(") || s.contains("@(")
152 }
153
154 /// Strip backslash escapes from a string (for returning literal patterns)
155 /// Only strips backslashes that escape glob metacharacters (* ? [ ])
156 /// Preserves other backslashes for escape sequences like \n \t
157 fn strip_backslash_escapes(s: &str) -> String {
158 let mut result = String::with_capacity(s.len());
159 let mut chars = s.chars().peekable();
160 while let Some(ch) = chars.next() {
161 if ch == '\\' {
162 if let Some(&next) = chars.peek() {
163 // Only strip backslash if it's escaping a glob metacharacter
164 if next == '*' || next == '?' || next == '[' || next == ']' || next == '\\' {
165 result.push(chars.next().unwrap());
166 } else {
167 // Preserve the backslash for other escape sequences
168 result.push(ch);
169 }
170 } else {
171 result.push(ch);
172 }
173 } else {
174 result.push(ch);
175 }
176 }
177 result
178 }
179
180 /// Parse extended glob patterns and convert appropriately
181 fn parse_extglob(pattern: &str, extglob_enabled: bool) -> Result<ExtGlobPattern, String> {
182 // If extglob is disabled, treat everything as standard glob
183 if !extglob_enabled {
184 return Ok(ExtGlobPattern::Standard(pattern.to_string()));
185 }
186
187 // Check for extended glob patterns: !(pat), ?(pat), *(pat), +(pat), @(pat)
188 if !pattern.contains("!(") && !pattern.contains("?(") &&
189 !pattern.contains("*(") && !pattern.contains("+(") && !pattern.contains("@(") {
190 // No extended glob - return as standard
191 return Ok(ExtGlobPattern::Standard(pattern.to_string()));
192 }
193
194 // Convert extglob - check if it contains negation
195 let (converted_pattern, has_negation, exclude_pattern) = convert_extglob(pattern)?;
196
197 if has_negation {
198 // Build regex from the exclude pattern
199 let regex_pattern = glob_to_regex(&exclude_pattern)?;
200 let exclude_regex = Regex::new(&regex_pattern)
201 .map_err(|e| format!("Invalid regex pattern: {}", e))?;
202
203 Ok(ExtGlobPattern::Negation {
204 base_pattern: converted_pattern,
205 exclude_regex,
206 })
207 } else {
208 Ok(ExtGlobPattern::Standard(converted_pattern))
209 }
210 }
211
212 /// Convert glob pattern to regex pattern
213 fn glob_to_regex(pattern: &str) -> Result<String, String> {
214 let mut regex = String::from("^");
215
216 for ch in pattern.chars() {
217 match ch {
218 '*' => regex.push_str(".*"),
219 '?' => regex.push('.'),
220 '.' => regex.push_str("\\."),
221 '[' => regex.push('['),
222 ']' => regex.push(']'),
223 '(' => regex.push_str("\\("),
224 ')' => regex.push_str("\\)"),
225 '{' => regex.push_str("\\{"),
226 '}' => regex.push_str("\\}"),
227 '+' => regex.push_str("\\+"),
228 '^' => regex.push_str("\\^"),
229 '$' => regex.push_str("\\$"),
230 '|' => regex.push_str("\\|"),
231 '\\' => regex.push_str("\\\\"),
232 _ => regex.push(ch),
233 }
234 }
235
236 regex.push('$');
237 Ok(regex)
238 }
239
240 /// Convert extended glob patterns to standard glob
241 /// Returns (converted_pattern, has_negation, exclude_pattern)
242 fn convert_extglob(pattern: &str) -> Result<(String, bool, String), String> {
243 let mut result = String::new();
244 let mut chars = pattern.chars().peekable();
245 let mut has_negation = false;
246 let mut exclude_pattern = String::new();
247
248 // For simplicity, we only handle a single !(pattern) at the top level
249 // Multiple or nested negations are not yet supported
250
251 while let Some(ch) = chars.next() {
252 match ch {
253 '!' | '?' | '*' | '+' | '@' => {
254 if chars.peek() == Some(&'(') {
255 chars.next(); // consume '('
256
257 // Find matching ')'
258 let mut depth = 1;
259 let mut inner = String::new();
260 while let Some(c) = chars.next() {
261 if c == '(' {
262 depth += 1;
263 inner.push(c);
264 } else if c == ')' {
265 depth -= 1;
266 if depth == 0 {
267 break;
268 }
269 inner.push(c);
270 } else {
271 inner.push(c);
272 }
273 }
274
275 // Convert based on type
276 match ch {
277 '!' => {
278 // !(pattern) - negative match
279 // Use two-stage filtering: match all (*), then exclude pattern
280 result.push('*');
281 has_negation = true;
282 exclude_pattern = inner;
283 }
284 '?' => {
285 // ?(pattern) - zero or one
286 // In glob: not directly supported, approximate with optional match
287 // For single char patterns, just make it optional
288 if inner.len() == 1 {
289 result.push('[');
290 result.push_str(&inner);
291 result.push(']');
292 result.push('?');
293 } else {
294 // Multi-char: can't express in glob, use pattern or nothing
295 // This is lossy but better than nothing
296 result.push('*');
297 }
298 }
299 '*' => {
300 // *(pattern) - zero or more
301 // In glob: approximate with *
302 result.push('*');
303 }
304 '+' => {
305 // +(pattern) - one or more
306 // In glob: approximate with pattern followed by *
307 result.push_str(&inner);
308 result.push('*');
309 }
310 '@' => {
311 // @(pattern) - exactly one
312 result.push_str(&inner);
313 }
314 _ => unreachable!(),
315 }
316 } else {
317 result.push(ch);
318 }
319 }
320 _ => result.push(ch),
321 }
322 }
323
324 Ok((result, has_negation, exclude_pattern))
325 }
326
327 /// Split pattern into base directory and relative pattern
328 fn split_pattern(pattern: &str) -> (PathBuf, String) {
329 let path = Path::new(pattern);
330
331 // Find the first component with glob characters
332 let mut base = PathBuf::new();
333 let mut rel_parts = Vec::new();
334 let mut found_glob = false;
335
336 for component in path.components() {
337 let comp_str = component.as_os_str().to_string_lossy();
338 if !found_glob && !has_glob_chars(&comp_str) {
339 base.push(component);
340 } else {
341 found_glob = true;
342 rel_parts.push(comp_str.to_string());
343 }
344 }
345
346 let rel_pattern = if rel_parts.is_empty() {
347 pattern.to_string()
348 } else {
349 rel_parts.join("/")
350 };
351
352 if base.as_os_str().is_empty() {
353 base = PathBuf::from(".");
354 }
355
356 (base, rel_pattern)
357 }
358
359 /// Recursively expand a glob pattern
360 fn expand_pattern(
361 dir: &Path,
362 pattern: &str,
363 matcher: &GlobMatcher,
364 options: &GlobOptions,
365 results: &mut Vec<String>,
366 ) -> Result<(), String> {
367 // Handle ** recursive glob
368 if pattern.starts_with("**/") || pattern == "**" {
369 if options.globstar {
370 let rest = if pattern == "**" {
371 "*"
372 } else {
373 &pattern[3..]
374 };
375
376 // Recursively search all subdirectories
377 expand_recursive(dir, rest, matcher, options, results)?;
378 return Ok(());
379 }
380 }
381
382 // If pattern contains /, we need to handle directory traversal
383 if pattern.contains('/') {
384 let parts: Vec<&str> = pattern.split('/').collect();
385 if parts.len() > 1 {
386 let first_part = parts[0];
387 let rest = parts[1..].join("/");
388
389 // Read directory and match first part
390 let entries = fs::read_dir(dir)
391 .map_err(|e| format!("Failed to read directory {}: {}", dir.display(), e))?;
392
393 for entry in entries {
394 let entry = entry.map_err(|e| format!("Failed to read entry: {}", e))?;
395 let path = entry.path();
396 let file_name = entry.file_name();
397 let name = file_name.to_string_lossy();
398
399 // Skip dotfiles unless enabled
400 if !options.match_dotfiles && name.starts_with('.') {
401 continue;
402 }
403
404 // Match first part
405 if glob_match_simple(first_part, &name) && path.is_dir() {
406 expand_pattern(&path, &rest, matcher, options, results)?;
407 }
408 }
409 return Ok(());
410 }
411 }
412
413 // Simple case: pattern is just a filename pattern in current directory
414 let entries = fs::read_dir(dir)
415 .map_err(|e| format!("Failed to read directory {}: {}", dir.display(), e))?;
416
417 for entry in entries {
418 let entry = entry.map_err(|e| format!("Failed to read entry: {}", e))?;
419 let path = entry.path();
420 let file_name = entry.file_name();
421 let name = file_name.to_string_lossy();
422
423 // Skip dotfiles unless enabled
424 if !options.match_dotfiles && name.starts_with('.') {
425 continue;
426 }
427
428 // Match against pattern
429 if glob_match_simple(pattern, &name) {
430 results.push(path.to_string_lossy().to_string());
431 }
432 }
433
434 Ok(())
435 }
436
437 /// Recursively expand with ** pattern
438 fn expand_recursive(
439 dir: &Path,
440 pattern: &str,
441 matcher: &GlobMatcher,
442 options: &GlobOptions,
443 results: &mut Vec<String>,
444 ) -> Result<(), String> {
445 // Try to match in current directory
446 expand_pattern(dir, pattern, matcher, options, results)?;
447
448 // Recurse into subdirectories
449 let entries = fs::read_dir(dir)
450 .map_err(|e| format!("Failed to read directory {}: {}", dir.display(), e))?;
451
452 for entry in entries {
453 let entry = entry.map_err(|e| format!("Failed to read entry: {}", e))?;
454 let path = entry.path();
455
456 if path.is_dir() {
457 let file_name = entry.file_name();
458 let name = file_name.to_string_lossy();
459
460 // Skip dotfiles unless enabled
461 if !options.match_dotfiles && name.starts_with('.') {
462 continue;
463 }
464
465 // Recurse
466 expand_recursive(&path, pattern, matcher, options, results)?;
467 }
468 }
469
470 Ok(())
471 }
472
473 /// Simple glob matching for a single path component
474 fn glob_match_simple(pattern: &str, text: &str) -> bool {
475 // Handle simple cases
476 if pattern == "*" {
477 return true;
478 }
479
480 if pattern == text {
481 return true;
482 }
483
484 // Use globset for more complex patterns
485 if let Ok(glob) = Glob::new(pattern) {
486 glob.compile_matcher().is_match(text)
487 } else {
488 false
489 }
490 }
491
492 #[cfg(test)]
493 mod tests {
494 use super::*;
495
496 #[test]
497 fn test_has_glob_chars() {
498 assert!(has_glob_chars("*.txt"));
499 assert!(has_glob_chars("file?.rs"));
500 assert!(has_glob_chars("[abc]"));
501 assert!(!has_glob_chars("plain.txt"));
502 }
503
504 #[test]
505 fn test_glob_match_simple() {
506 assert!(glob_match_simple("*", "anything"));
507 assert!(glob_match_simple("*.txt", "file.txt"));
508 assert!(glob_match_simple("file", "file"));
509 assert!(!glob_match_simple("*.txt", "file.rs"));
510 }
511 }
512