Fortran · 14868 bytes Raw Blame History
1 module ferp_dir
2 !> Directory operations for FERP using POSIX C interop
3 use ferp_kinds
4 use, intrinsic :: iso_c_binding
5 implicit none
6 private
7
8 public :: is_directory, is_regular_file, is_symlink, collect_files
9 public :: glob_match
10 public :: read_patterns_from_file, matches_any_pattern
11
12 ! C interfaces for POSIX directory functions
13 interface
14 function c_opendir(dirname) bind(C, name="opendir")
15 import :: c_ptr, c_char
16 character(kind=c_char), intent(in) :: dirname(*)
17 type(c_ptr) :: c_opendir
18 end function c_opendir
19
20 function c_readdir(dirp) bind(C, name="readdir")
21 import :: c_ptr
22 type(c_ptr), value :: dirp
23 type(c_ptr) :: c_readdir
24 end function c_readdir
25
26 function c_closedir(dirp) bind(C, name="closedir")
27 import :: c_ptr, c_int
28 type(c_ptr), value :: dirp
29 integer(c_int) :: c_closedir
30 end function c_closedir
31
32 function c_stat(pathname, statbuf) bind(C, name="stat")
33 import :: c_char, c_int, c_ptr
34 character(kind=c_char), intent(in) :: pathname(*)
35 type(c_ptr), value :: statbuf
36 integer(c_int) :: c_stat
37 end function c_stat
38
39 function c_lstat(pathname, statbuf) bind(C, name="lstat")
40 import :: c_char, c_int, c_ptr
41 character(kind=c_char), intent(in) :: pathname(*)
42 type(c_ptr), value :: statbuf
43 integer(c_int) :: c_lstat
44 end function c_lstat
45 end interface
46
47 ! Size of struct stat varies by platform, use generous size
48 integer, parameter :: STAT_BUF_SIZE = 256
49
50 contains
51
52 function is_directory(path) result(is_dir)
53 !> Check if path is a directory
54 character(len=*), intent(in) :: path
55 logical :: is_dir
56
57 type(c_ptr) :: dirp
58 character(len=max_path_len+1) :: c_path
59 integer(c_int) :: istat
60
61 is_dir = .false.
62
63 ! Try to open as directory
64 c_path = trim(path) // c_null_char
65 dirp = c_opendir(c_path)
66
67 if (c_associated(dirp)) then
68 is_dir = .true.
69 istat = c_closedir(dirp)
70 end if
71
72 end function is_directory
73
74 function is_regular_file(path) result(is_file)
75 !> Check if path is a regular file (not directory, symlink, etc.)
76 character(len=*), intent(in) :: path
77 logical :: is_file
78
79 logical :: exists
80
81 is_file = .false.
82
83 ! Use Fortran inquire
84 inquire(file=path, exist=exists)
85 if (.not. exists) return
86
87 ! If it's not a directory, treat as regular file
88 is_file = .not. is_directory(path)
89
90 end function is_regular_file
91
92 function is_symlink(path) result(is_link)
93 !> Check if path is a symbolic link using lstat
94 character(len=*), intent(in) :: path
95 logical :: is_link
96
97 character(len=max_path_len+1) :: c_path
98 character(len=STAT_BUF_SIZE), target :: statbuf
99 integer(c_int) :: istat
100 integer :: mode_offset, mode_val
101
102 is_link = .false.
103
104 c_path = trim(path) // c_null_char
105 istat = c_lstat(c_path, c_loc(statbuf))
106
107 if (istat /= 0) return
108
109 ! On Linux x86_64, st_mode is at offset 24 (bytes 25-28)
110 ! st_mode is typically uint32_t
111 mode_offset = 24
112 mode_val = transfer(statbuf(mode_offset+1:mode_offset+4), 0)
113
114 ! S_IFLNK = 0120000 (octal) = 40960 (decimal)
115 ! The file type is in bits 12-15 of mode
116 ! S_IFMT mask = 0170000 (octal) = 61440
117 is_link = iand(mode_val, 61440) == 40960
118
119 end function is_symlink
120
121 subroutine collect_files(start_path, file_list, num_files, recursive, &
122 follow_links, include_globs, num_include, &
123 exclude_globs, num_exclude, exclude_dirs, num_exclude_dirs)
124 !> Collect files from a path, optionally recursively
125 character(len=*), intent(in) :: start_path
126 character(len=max_path_len), intent(out) :: file_list(:)
127 integer, intent(out) :: num_files
128 logical, intent(in) :: recursive
129 logical, intent(in) :: follow_links
130 character(len=max_path_len), intent(in) :: include_globs(:)
131 integer, intent(in) :: num_include
132 character(len=max_path_len), intent(in) :: exclude_globs(:)
133 integer, intent(in) :: num_exclude
134 character(len=max_path_len), intent(in) :: exclude_dirs(:)
135 integer, intent(in) :: num_exclude_dirs
136
137 ! SAVE used for large array - safe since file collection runs before parallel section
138 ! MAX_DEPTH is the max number of directories that can be queued at once (not depth)
139 integer, parameter :: MAX_DEPTH = 10000
140 character(len=max_path_len), save :: dir_stack(MAX_DEPTH)
141 integer :: stack_top
142 character(len=max_path_len) :: current_dir, entry_path, entry_name
143 type(c_ptr) :: dirp, entry_ptr
144 integer(c_int) :: istat
145
146 num_files = 0
147
148 ! Check if start_path is a file or directory
149 if (.not. is_directory(start_path)) then
150 ! It's a file, just add it if it passes filters
151 if (should_include_file_multi(start_path, include_globs, num_include, &
152 exclude_globs, num_exclude)) then
153 num_files = 1
154 file_list(1) = start_path
155 end if
156 return
157 end if
158
159 ! Initialize directory stack
160 stack_top = 1
161 dir_stack(1) = start_path
162
163 do while (stack_top > 0 .and. num_files < size(file_list))
164 ! Pop directory from stack
165 current_dir = dir_stack(stack_top)
166 stack_top = stack_top - 1
167
168 ! Open directory
169 dirp = c_opendir(trim(current_dir) // c_null_char)
170 if (.not. c_associated(dirp)) cycle
171
172 ! Read directory entries
173 do
174 entry_ptr = c_readdir(dirp)
175 if (.not. c_associated(entry_ptr)) exit
176
177 ! Get entry name from dirent struct
178 call get_dirent_name(entry_ptr, entry_name)
179
180 ! Skip . and ..
181 if (trim(entry_name) == '.' .or. trim(entry_name) == '..') cycle
182 if (len_trim(entry_name) == 0) cycle
183
184 ! Build full path
185 if (current_dir(len_trim(current_dir):len_trim(current_dir)) == '/') then
186 entry_path = trim(current_dir) // trim(entry_name)
187 else
188 entry_path = trim(current_dir) // '/' // trim(entry_name)
189 end if
190
191 ! Skip symlinks if not following them (like grep -r vs grep -R)
192 if (.not. follow_links .and. is_symlink(entry_path)) cycle
193
194 ! Check if it's a directory
195 if (is_directory(entry_path)) then
196 if (recursive) then
197 ! Check exclude-dir patterns
198 if (num_exclude_dirs > 0) then
199 if (matches_any_pattern(trim(entry_name), exclude_dirs, num_exclude_dirs)) cycle
200 end if
201
202 ! Push to stack for later processing
203 if (stack_top < MAX_DEPTH) then
204 stack_top = stack_top + 1
205 dir_stack(stack_top) = entry_path
206 end if
207 end if
208 else
209 ! It's a file - check filters and add
210 if (should_include_file_multi(entry_path, include_globs, num_include, &
211 exclude_globs, num_exclude)) then
212 if (num_files < size(file_list)) then
213 num_files = num_files + 1
214 file_list(num_files) = entry_path
215 end if
216 end if
217 end if
218 end do
219
220 istat = c_closedir(dirp)
221 end do
222
223 end subroutine collect_files
224
225 subroutine get_dirent_name(entry_ptr, name)
226 !> Extract filename from dirent struct pointer
227 type(c_ptr), intent(in) :: entry_ptr
228 character(len=*), intent(out) :: name
229
230 ! dirent.d_name starts at offset after d_ino and d_off (platform dependent)
231 ! On most systems, d_name is at offset ~19-21 bytes
232 ! We'll use a more robust approach: scan for printable chars
233 character(len=256, kind=c_char), pointer :: raw_data
234 integer :: i, start_pos, name_len
235
236 name = ''
237 if (.not. c_associated(entry_ptr)) return
238
239 ! Map memory to character array
240 call c_f_pointer(entry_ptr, raw_data)
241
242 ! On macOS/Linux, d_name typically starts around byte 19-21
243 ! Find start of name by scanning for first printable character after struct header
244 ! Starting from offset 19 works on both platforms
245 start_pos = 19
246
247 ! Find actual start (first printable character after header)
248 do i = start_pos, min(40, len(raw_data))
249 if (ichar(raw_data(i:i)) >= 32 .and. ichar(raw_data(i:i)) < 127) then
250 start_pos = i
251 exit
252 end if
253 end do
254
255 ! Copy name until null terminator
256 name_len = 0
257 do i = start_pos, min(start_pos + max_path_len - 1, len(raw_data))
258 if (raw_data(i:i) == c_null_char) exit
259 if (ichar(raw_data(i:i)) < 32 .or. ichar(raw_data(i:i)) >= 127) exit
260 name_len = name_len + 1
261 name(name_len:name_len) = raw_data(i:i)
262 end do
263
264 end subroutine get_dirent_name
265
266 function should_include_file(filepath, include_glob, exclude_glob) result(include)
267 !> Check if file should be included based on glob patterns
268 character(len=*), intent(in) :: filepath
269 character(len=*), intent(in) :: include_glob
270 character(len=*), intent(in) :: exclude_glob
271 logical :: include
272
273 character(len=max_path_len) :: basename
274 integer :: i
275
276 include = .true.
277
278 ! Extract basename
279 basename = filepath
280 do i = len_trim(filepath), 1, -1
281 if (filepath(i:i) == '/') then
282 basename = filepath(i+1:)
283 exit
284 end if
285 end do
286
287 ! Check include pattern (if specified, file must match)
288 if (len_trim(include_glob) > 0) then
289 include = glob_match(trim(basename), trim(include_glob))
290 if (.not. include) return
291 end if
292
293 ! Check exclude pattern (if specified and matches, exclude)
294 if (len_trim(exclude_glob) > 0) then
295 if (glob_match(trim(basename), trim(exclude_glob))) then
296 include = .false.
297 end if
298 end if
299
300 end function should_include_file
301
302 function should_include_file_multi(filepath, include_globs, num_include, &
303 exclude_globs, num_exclude) result(include)
304 !> Check if file should be included based on multiple glob patterns
305 character(len=*), intent(in) :: filepath
306 character(len=max_path_len), intent(in) :: include_globs(:)
307 integer, intent(in) :: num_include
308 character(len=max_path_len), intent(in) :: exclude_globs(:)
309 integer, intent(in) :: num_exclude
310 logical :: include
311
312 character(len=max_path_len) :: basename
313 integer :: i
314
315 include = .true.
316
317 ! Extract basename
318 basename = filepath
319 do i = len_trim(filepath), 1, -1
320 if (filepath(i:i) == '/') then
321 basename = filepath(i+1:)
322 exit
323 end if
324 end do
325
326 ! Check include patterns (if any specified, file must match at least one)
327 if (num_include > 0) then
328 include = .false.
329 do i = 1, num_include
330 if (glob_match(trim(basename), trim(include_globs(i)))) then
331 include = .true.
332 exit
333 end if
334 end do
335 if (.not. include) return
336 end if
337
338 ! Check exclude patterns (if any match, exclude the file)
339 if (num_exclude > 0) then
340 do i = 1, num_exclude
341 if (glob_match(trim(basename), trim(exclude_globs(i)))) then
342 include = .false.
343 return
344 end if
345 end do
346 end if
347
348 end function should_include_file_multi
349
350 recursive function glob_match(str, pattern) result(matches)
351 !> Simple glob pattern matching (* and ? wildcards)
352 character(len=*), intent(in) :: str
353 character(len=*), intent(in) :: pattern
354 logical :: matches
355
356 integer :: s, p, str_len, pat_len
357
358 matches = .false.
359 str_len = len(str)
360 pat_len = len(pattern)
361
362 ! Handle empty pattern
363 if (pat_len == 0) then
364 matches = (str_len == 0)
365 return
366 end if
367
368 s = 1
369 p = 1
370
371 main_loop: do while (s <= str_len .and. p <= pat_len)
372 if (pattern(p:p) == '*') then
373 ! * matches zero or more characters
374 ! Skip consecutive stars
375 skip_stars: do while (p <= pat_len)
376 if (pattern(p:p) /= '*') exit skip_stars
377 p = p + 1
378 end do skip_stars
379
380 if (p > pat_len) then
381 ! Pattern ends with * - matches rest of string
382 matches = .true.
383 return
384 end if
385
386 ! Try matching from each position in str
387 try_positions: do while (s <= str_len)
388 if (glob_match(str(s:str_len), pattern(p:pat_len))) then
389 matches = .true.
390 return
391 end if
392 s = s + 1
393 end do try_positions
394
395 ! Also try empty match (s > str_len)
396 matches = glob_match('', pattern(p:pat_len))
397 return
398 else if (pattern(p:p) == '?') then
399 ! ? matches exactly one character
400 s = s + 1
401 p = p + 1
402 else if (pattern(p:p) == str(s:s)) then
403 ! Exact character match
404 s = s + 1
405 p = p + 1
406 else
407 ! No match
408 return
409 end if
410 end do main_loop
411
412 ! Skip trailing wildcards in pattern
413 trailing_stars: do while (p <= pat_len)
414 if (pattern(p:p) /= '*') exit trailing_stars
415 p = p + 1
416 end do trailing_stars
417
418 ! Match if both consumed
419 matches = (s > str_len .and. p > pat_len)
420
421 end function glob_match
422
423 subroutine read_patterns_from_file(filename, patterns, num_patterns, ierr)
424 !> Read glob patterns from a file, one per line
425 character(len=*), intent(in) :: filename
426 character(len=max_path_len), intent(out) :: patterns(:)
427 integer, intent(out) :: num_patterns
428 integer, intent(out) :: ierr
429
430 integer :: unit_num, ios
431 character(len=max_path_len) :: line
432
433 num_patterns = 0
434 ierr = 0
435
436 open(newunit=unit_num, file=filename, status='old', action='read', iostat=ios)
437 if (ios /= 0) then
438 ierr = 1
439 return
440 end if
441
442 do
443 read(unit_num, '(A)', iostat=ios) line
444 if (ios /= 0) exit
445
446 ! Skip empty lines and comments
447 if (len_trim(line) == 0) cycle
448 if (line(1:1) == '#') cycle
449
450 if (num_patterns < size(patterns)) then
451 num_patterns = num_patterns + 1
452 patterns(num_patterns) = trim(line)
453 end if
454 end do
455
456 close(unit_num)
457
458 end subroutine read_patterns_from_file
459
460 function matches_any_pattern(filename, patterns, num_patterns) result(matches)
461 !> Check if filename matches any pattern in the list
462 character(len=*), intent(in) :: filename
463 character(len=max_path_len), intent(in) :: patterns(:)
464 integer, intent(in) :: num_patterns
465 logical :: matches
466
467 character(len=max_path_len) :: basename
468 integer :: i
469
470 matches = .false.
471 if (num_patterns == 0) return
472
473 ! Extract basename
474 basename = filename
475 do i = len_trim(filename), 1, -1
476 if (filename(i:i) == '/') then
477 basename = filename(i+1:)
478 exit
479 end if
480 end do
481
482 ! Check against each pattern
483 do i = 1, num_patterns
484 if (glob_match(trim(basename), trim(patterns(i)))) then
485 matches = .true.
486 return
487 end if
488 end do
489
490 end function matches_any_pattern
491
492 end module ferp_dir
493