| 1 | module ferp_dir |
| 2 | !> Directory operations for FERP using POSIX C interop |
| 3 | use ferp_kinds |
| 4 | use, intrinsic :: iso_c_binding |
| 5 | implicit none |
| 6 | private |
| 7 | |
| 8 | public :: is_directory, is_regular_file, is_symlink, collect_files |
| 9 | public :: glob_match |
| 10 | public :: read_patterns_from_file, matches_any_pattern |
| 11 | |
| 12 | ! C interfaces for POSIX directory functions |
| 13 | interface |
| 14 | function c_opendir(dirname) bind(C, name="opendir") |
| 15 | import :: c_ptr, c_char |
| 16 | character(kind=c_char), intent(in) :: dirname(*) |
| 17 | type(c_ptr) :: c_opendir |
| 18 | end function c_opendir |
| 19 | |
| 20 | function c_readdir(dirp) bind(C, name="readdir") |
| 21 | import :: c_ptr |
| 22 | type(c_ptr), value :: dirp |
| 23 | type(c_ptr) :: c_readdir |
| 24 | end function c_readdir |
| 25 | |
| 26 | function c_closedir(dirp) bind(C, name="closedir") |
| 27 | import :: c_ptr, c_int |
| 28 | type(c_ptr), value :: dirp |
| 29 | integer(c_int) :: c_closedir |
| 30 | end function c_closedir |
| 31 | |
| 32 | function c_stat(pathname, statbuf) bind(C, name="stat") |
| 33 | import :: c_char, c_int, c_ptr |
| 34 | character(kind=c_char), intent(in) :: pathname(*) |
| 35 | type(c_ptr), value :: statbuf |
| 36 | integer(c_int) :: c_stat |
| 37 | end function c_stat |
| 38 | |
| 39 | function c_lstat(pathname, statbuf) bind(C, name="lstat") |
| 40 | import :: c_char, c_int, c_ptr |
| 41 | character(kind=c_char), intent(in) :: pathname(*) |
| 42 | type(c_ptr), value :: statbuf |
| 43 | integer(c_int) :: c_lstat |
| 44 | end function c_lstat |
| 45 | end interface |
| 46 | |
| 47 | ! Size of struct stat varies by platform, use generous size |
| 48 | integer, parameter :: STAT_BUF_SIZE = 256 |
| 49 | |
| 50 | contains |
| 51 | |
| 52 | function is_directory(path) result(is_dir) |
| 53 | !> Check if path is a directory |
| 54 | character(len=*), intent(in) :: path |
| 55 | logical :: is_dir |
| 56 | |
| 57 | type(c_ptr) :: dirp |
| 58 | character(len=max_path_len+1) :: c_path |
| 59 | integer(c_int) :: istat |
| 60 | |
| 61 | is_dir = .false. |
| 62 | |
| 63 | ! Try to open as directory |
| 64 | c_path = trim(path) // c_null_char |
| 65 | dirp = c_opendir(c_path) |
| 66 | |
| 67 | if (c_associated(dirp)) then |
| 68 | is_dir = .true. |
| 69 | istat = c_closedir(dirp) |
| 70 | end if |
| 71 | |
| 72 | end function is_directory |
| 73 | |
| 74 | function is_regular_file(path) result(is_file) |
| 75 | !> Check if path is a regular file (not directory, symlink, etc.) |
| 76 | character(len=*), intent(in) :: path |
| 77 | logical :: is_file |
| 78 | |
| 79 | logical :: exists |
| 80 | |
| 81 | is_file = .false. |
| 82 | |
| 83 | ! Use Fortran inquire |
| 84 | inquire(file=path, exist=exists) |
| 85 | if (.not. exists) return |
| 86 | |
| 87 | ! If it's not a directory, treat as regular file |
| 88 | is_file = .not. is_directory(path) |
| 89 | |
| 90 | end function is_regular_file |
| 91 | |
| 92 | function is_symlink(path) result(is_link) |
| 93 | !> Check if path is a symbolic link using lstat |
| 94 | character(len=*), intent(in) :: path |
| 95 | logical :: is_link |
| 96 | |
| 97 | character(len=max_path_len+1) :: c_path |
| 98 | character(len=STAT_BUF_SIZE), target :: statbuf |
| 99 | integer(c_int) :: istat |
| 100 | integer :: mode_offset, mode_val |
| 101 | |
| 102 | is_link = .false. |
| 103 | |
| 104 | c_path = trim(path) // c_null_char |
| 105 | istat = c_lstat(c_path, c_loc(statbuf)) |
| 106 | |
| 107 | if (istat /= 0) return |
| 108 | |
| 109 | ! On Linux x86_64, st_mode is at offset 24 (bytes 25-28) |
| 110 | ! st_mode is typically uint32_t |
| 111 | mode_offset = 24 |
| 112 | mode_val = transfer(statbuf(mode_offset+1:mode_offset+4), 0) |
| 113 | |
| 114 | ! S_IFLNK = 0120000 (octal) = 40960 (decimal) |
| 115 | ! The file type is in bits 12-15 of mode |
| 116 | ! S_IFMT mask = 0170000 (octal) = 61440 |
| 117 | is_link = iand(mode_val, 61440) == 40960 |
| 118 | |
| 119 | end function is_symlink |
| 120 | |
| 121 | subroutine collect_files(start_path, file_list, num_files, recursive, & |
| 122 | follow_links, include_globs, num_include, & |
| 123 | exclude_globs, num_exclude, exclude_dirs, num_exclude_dirs) |
| 124 | !> Collect files from a path, optionally recursively |
| 125 | character(len=*), intent(in) :: start_path |
| 126 | character(len=max_path_len), intent(out) :: file_list(:) |
| 127 | integer, intent(out) :: num_files |
| 128 | logical, intent(in) :: recursive |
| 129 | logical, intent(in) :: follow_links |
| 130 | character(len=max_path_len), intent(in) :: include_globs(:) |
| 131 | integer, intent(in) :: num_include |
| 132 | character(len=max_path_len), intent(in) :: exclude_globs(:) |
| 133 | integer, intent(in) :: num_exclude |
| 134 | character(len=max_path_len), intent(in) :: exclude_dirs(:) |
| 135 | integer, intent(in) :: num_exclude_dirs |
| 136 | |
| 137 | ! SAVE used for large array - safe since file collection runs before parallel section |
| 138 | ! MAX_DEPTH is the max number of directories that can be queued at once (not depth) |
| 139 | integer, parameter :: MAX_DEPTH = 10000 |
| 140 | character(len=max_path_len), save :: dir_stack(MAX_DEPTH) |
| 141 | integer :: stack_top |
| 142 | character(len=max_path_len) :: current_dir, entry_path, entry_name |
| 143 | type(c_ptr) :: dirp, entry_ptr |
| 144 | integer(c_int) :: istat |
| 145 | |
| 146 | num_files = 0 |
| 147 | |
| 148 | ! Check if start_path is a file or directory |
| 149 | if (.not. is_directory(start_path)) then |
| 150 | ! It's a file, just add it if it passes filters |
| 151 | if (should_include_file_multi(start_path, include_globs, num_include, & |
| 152 | exclude_globs, num_exclude)) then |
| 153 | num_files = 1 |
| 154 | file_list(1) = start_path |
| 155 | end if |
| 156 | return |
| 157 | end if |
| 158 | |
| 159 | ! Initialize directory stack |
| 160 | stack_top = 1 |
| 161 | dir_stack(1) = start_path |
| 162 | |
| 163 | do while (stack_top > 0 .and. num_files < size(file_list)) |
| 164 | ! Pop directory from stack |
| 165 | current_dir = dir_stack(stack_top) |
| 166 | stack_top = stack_top - 1 |
| 167 | |
| 168 | ! Open directory |
| 169 | dirp = c_opendir(trim(current_dir) // c_null_char) |
| 170 | if (.not. c_associated(dirp)) cycle |
| 171 | |
| 172 | ! Read directory entries |
| 173 | do |
| 174 | entry_ptr = c_readdir(dirp) |
| 175 | if (.not. c_associated(entry_ptr)) exit |
| 176 | |
| 177 | ! Get entry name from dirent struct |
| 178 | call get_dirent_name(entry_ptr, entry_name) |
| 179 | |
| 180 | ! Skip . and .. |
| 181 | if (trim(entry_name) == '.' .or. trim(entry_name) == '..') cycle |
| 182 | if (len_trim(entry_name) == 0) cycle |
| 183 | |
| 184 | ! Build full path |
| 185 | if (current_dir(len_trim(current_dir):len_trim(current_dir)) == '/') then |
| 186 | entry_path = trim(current_dir) // trim(entry_name) |
| 187 | else |
| 188 | entry_path = trim(current_dir) // '/' // trim(entry_name) |
| 189 | end if |
| 190 | |
| 191 | ! Skip symlinks if not following them (like grep -r vs grep -R) |
| 192 | if (.not. follow_links .and. is_symlink(entry_path)) cycle |
| 193 | |
| 194 | ! Check if it's a directory |
| 195 | if (is_directory(entry_path)) then |
| 196 | if (recursive) then |
| 197 | ! Check exclude-dir patterns |
| 198 | if (num_exclude_dirs > 0) then |
| 199 | if (matches_any_pattern(trim(entry_name), exclude_dirs, num_exclude_dirs)) cycle |
| 200 | end if |
| 201 | |
| 202 | ! Push to stack for later processing |
| 203 | if (stack_top < MAX_DEPTH) then |
| 204 | stack_top = stack_top + 1 |
| 205 | dir_stack(stack_top) = entry_path |
| 206 | end if |
| 207 | end if |
| 208 | else |
| 209 | ! It's a file - check filters and add |
| 210 | if (should_include_file_multi(entry_path, include_globs, num_include, & |
| 211 | exclude_globs, num_exclude)) then |
| 212 | if (num_files < size(file_list)) then |
| 213 | num_files = num_files + 1 |
| 214 | file_list(num_files) = entry_path |
| 215 | end if |
| 216 | end if |
| 217 | end if |
| 218 | end do |
| 219 | |
| 220 | istat = c_closedir(dirp) |
| 221 | end do |
| 222 | |
| 223 | end subroutine collect_files |
| 224 | |
| 225 | subroutine get_dirent_name(entry_ptr, name) |
| 226 | !> Extract filename from dirent struct pointer |
| 227 | type(c_ptr), intent(in) :: entry_ptr |
| 228 | character(len=*), intent(out) :: name |
| 229 | |
| 230 | ! dirent.d_name starts at offset after d_ino and d_off (platform dependent) |
| 231 | ! On most systems, d_name is at offset ~19-21 bytes |
| 232 | ! We'll use a more robust approach: scan for printable chars |
| 233 | character(len=256, kind=c_char), pointer :: raw_data |
| 234 | integer :: i, start_pos, name_len |
| 235 | |
| 236 | name = '' |
| 237 | if (.not. c_associated(entry_ptr)) return |
| 238 | |
| 239 | ! Map memory to character array |
| 240 | call c_f_pointer(entry_ptr, raw_data) |
| 241 | |
| 242 | ! On macOS/Linux, d_name typically starts around byte 19-21 |
| 243 | ! Find start of name by scanning for first printable character after struct header |
| 244 | ! Starting from offset 19 works on both platforms |
| 245 | start_pos = 19 |
| 246 | |
| 247 | ! Find actual start (first printable character after header) |
| 248 | do i = start_pos, min(40, len(raw_data)) |
| 249 | if (ichar(raw_data(i:i)) >= 32 .and. ichar(raw_data(i:i)) < 127) then |
| 250 | start_pos = i |
| 251 | exit |
| 252 | end if |
| 253 | end do |
| 254 | |
| 255 | ! Copy name until null terminator |
| 256 | name_len = 0 |
| 257 | do i = start_pos, min(start_pos + max_path_len - 1, len(raw_data)) |
| 258 | if (raw_data(i:i) == c_null_char) exit |
| 259 | if (ichar(raw_data(i:i)) < 32 .or. ichar(raw_data(i:i)) >= 127) exit |
| 260 | name_len = name_len + 1 |
| 261 | name(name_len:name_len) = raw_data(i:i) |
| 262 | end do |
| 263 | |
| 264 | end subroutine get_dirent_name |
| 265 | |
| 266 | function should_include_file(filepath, include_glob, exclude_glob) result(include) |
| 267 | !> Check if file should be included based on glob patterns |
| 268 | character(len=*), intent(in) :: filepath |
| 269 | character(len=*), intent(in) :: include_glob |
| 270 | character(len=*), intent(in) :: exclude_glob |
| 271 | logical :: include |
| 272 | |
| 273 | character(len=max_path_len) :: basename |
| 274 | integer :: i |
| 275 | |
| 276 | include = .true. |
| 277 | |
| 278 | ! Extract basename |
| 279 | basename = filepath |
| 280 | do i = len_trim(filepath), 1, -1 |
| 281 | if (filepath(i:i) == '/') then |
| 282 | basename = filepath(i+1:) |
| 283 | exit |
| 284 | end if |
| 285 | end do |
| 286 | |
| 287 | ! Check include pattern (if specified, file must match) |
| 288 | if (len_trim(include_glob) > 0) then |
| 289 | include = glob_match(trim(basename), trim(include_glob)) |
| 290 | if (.not. include) return |
| 291 | end if |
| 292 | |
| 293 | ! Check exclude pattern (if specified and matches, exclude) |
| 294 | if (len_trim(exclude_glob) > 0) then |
| 295 | if (glob_match(trim(basename), trim(exclude_glob))) then |
| 296 | include = .false. |
| 297 | end if |
| 298 | end if |
| 299 | |
| 300 | end function should_include_file |
| 301 | |
| 302 | function should_include_file_multi(filepath, include_globs, num_include, & |
| 303 | exclude_globs, num_exclude) result(include) |
| 304 | !> Check if file should be included based on multiple glob patterns |
| 305 | character(len=*), intent(in) :: filepath |
| 306 | character(len=max_path_len), intent(in) :: include_globs(:) |
| 307 | integer, intent(in) :: num_include |
| 308 | character(len=max_path_len), intent(in) :: exclude_globs(:) |
| 309 | integer, intent(in) :: num_exclude |
| 310 | logical :: include |
| 311 | |
| 312 | character(len=max_path_len) :: basename |
| 313 | integer :: i |
| 314 | |
| 315 | include = .true. |
| 316 | |
| 317 | ! Extract basename |
| 318 | basename = filepath |
| 319 | do i = len_trim(filepath), 1, -1 |
| 320 | if (filepath(i:i) == '/') then |
| 321 | basename = filepath(i+1:) |
| 322 | exit |
| 323 | end if |
| 324 | end do |
| 325 | |
| 326 | ! Check include patterns (if any specified, file must match at least one) |
| 327 | if (num_include > 0) then |
| 328 | include = .false. |
| 329 | do i = 1, num_include |
| 330 | if (glob_match(trim(basename), trim(include_globs(i)))) then |
| 331 | include = .true. |
| 332 | exit |
| 333 | end if |
| 334 | end do |
| 335 | if (.not. include) return |
| 336 | end if |
| 337 | |
| 338 | ! Check exclude patterns (if any match, exclude the file) |
| 339 | if (num_exclude > 0) then |
| 340 | do i = 1, num_exclude |
| 341 | if (glob_match(trim(basename), trim(exclude_globs(i)))) then |
| 342 | include = .false. |
| 343 | return |
| 344 | end if |
| 345 | end do |
| 346 | end if |
| 347 | |
| 348 | end function should_include_file_multi |
| 349 | |
| 350 | recursive function glob_match(str, pattern) result(matches) |
| 351 | !> Simple glob pattern matching (* and ? wildcards) |
| 352 | character(len=*), intent(in) :: str |
| 353 | character(len=*), intent(in) :: pattern |
| 354 | logical :: matches |
| 355 | |
| 356 | integer :: s, p, str_len, pat_len |
| 357 | |
| 358 | matches = .false. |
| 359 | str_len = len(str) |
| 360 | pat_len = len(pattern) |
| 361 | |
| 362 | ! Handle empty pattern |
| 363 | if (pat_len == 0) then |
| 364 | matches = (str_len == 0) |
| 365 | return |
| 366 | end if |
| 367 | |
| 368 | s = 1 |
| 369 | p = 1 |
| 370 | |
| 371 | main_loop: do while (s <= str_len .and. p <= pat_len) |
| 372 | if (pattern(p:p) == '*') then |
| 373 | ! * matches zero or more characters |
| 374 | ! Skip consecutive stars |
| 375 | skip_stars: do while (p <= pat_len) |
| 376 | if (pattern(p:p) /= '*') exit skip_stars |
| 377 | p = p + 1 |
| 378 | end do skip_stars |
| 379 | |
| 380 | if (p > pat_len) then |
| 381 | ! Pattern ends with * - matches rest of string |
| 382 | matches = .true. |
| 383 | return |
| 384 | end if |
| 385 | |
| 386 | ! Try matching from each position in str |
| 387 | try_positions: do while (s <= str_len) |
| 388 | if (glob_match(str(s:str_len), pattern(p:pat_len))) then |
| 389 | matches = .true. |
| 390 | return |
| 391 | end if |
| 392 | s = s + 1 |
| 393 | end do try_positions |
| 394 | |
| 395 | ! Also try empty match (s > str_len) |
| 396 | matches = glob_match('', pattern(p:pat_len)) |
| 397 | return |
| 398 | else if (pattern(p:p) == '?') then |
| 399 | ! ? matches exactly one character |
| 400 | s = s + 1 |
| 401 | p = p + 1 |
| 402 | else if (pattern(p:p) == str(s:s)) then |
| 403 | ! Exact character match |
| 404 | s = s + 1 |
| 405 | p = p + 1 |
| 406 | else |
| 407 | ! No match |
| 408 | return |
| 409 | end if |
| 410 | end do main_loop |
| 411 | |
| 412 | ! Skip trailing wildcards in pattern |
| 413 | trailing_stars: do while (p <= pat_len) |
| 414 | if (pattern(p:p) /= '*') exit trailing_stars |
| 415 | p = p + 1 |
| 416 | end do trailing_stars |
| 417 | |
| 418 | ! Match if both consumed |
| 419 | matches = (s > str_len .and. p > pat_len) |
| 420 | |
| 421 | end function glob_match |
| 422 | |
| 423 | subroutine read_patterns_from_file(filename, patterns, num_patterns, ierr) |
| 424 | !> Read glob patterns from a file, one per line |
| 425 | character(len=*), intent(in) :: filename |
| 426 | character(len=max_path_len), intent(out) :: patterns(:) |
| 427 | integer, intent(out) :: num_patterns |
| 428 | integer, intent(out) :: ierr |
| 429 | |
| 430 | integer :: unit_num, ios |
| 431 | character(len=max_path_len) :: line |
| 432 | |
| 433 | num_patterns = 0 |
| 434 | ierr = 0 |
| 435 | |
| 436 | open(newunit=unit_num, file=filename, status='old', action='read', iostat=ios) |
| 437 | if (ios /= 0) then |
| 438 | ierr = 1 |
| 439 | return |
| 440 | end if |
| 441 | |
| 442 | do |
| 443 | read(unit_num, '(A)', iostat=ios) line |
| 444 | if (ios /= 0) exit |
| 445 | |
| 446 | ! Skip empty lines and comments |
| 447 | if (len_trim(line) == 0) cycle |
| 448 | if (line(1:1) == '#') cycle |
| 449 | |
| 450 | if (num_patterns < size(patterns)) then |
| 451 | num_patterns = num_patterns + 1 |
| 452 | patterns(num_patterns) = trim(line) |
| 453 | end if |
| 454 | end do |
| 455 | |
| 456 | close(unit_num) |
| 457 | |
| 458 | end subroutine read_patterns_from_file |
| 459 | |
| 460 | function matches_any_pattern(filename, patterns, num_patterns) result(matches) |
| 461 | !> Check if filename matches any pattern in the list |
| 462 | character(len=*), intent(in) :: filename |
| 463 | character(len=max_path_len), intent(in) :: patterns(:) |
| 464 | integer, intent(in) :: num_patterns |
| 465 | logical :: matches |
| 466 | |
| 467 | character(len=max_path_len) :: basename |
| 468 | integer :: i |
| 469 | |
| 470 | matches = .false. |
| 471 | if (num_patterns == 0) return |
| 472 | |
| 473 | ! Extract basename |
| 474 | basename = filename |
| 475 | do i = len_trim(filename), 1, -1 |
| 476 | if (filename(i:i) == '/') then |
| 477 | basename = filename(i+1:) |
| 478 | exit |
| 479 | end if |
| 480 | end do |
| 481 | |
| 482 | ! Check against each pattern |
| 483 | do i = 1, num_patterns |
| 484 | if (glob_match(trim(basename), trim(patterns(i)))) then |
| 485 | matches = .true. |
| 486 | return |
| 487 | end if |
| 488 | end do |
| 489 | |
| 490 | end function matches_any_pattern |
| 491 | |
| 492 | end module ferp_dir |
| 493 |