| 1 | ! ============================================================================== |
| 2 | ! Module: glob |
| 3 | ! Purpose: Pattern matching and file globbing functionality |
| 4 | ! ============================================================================== |
| 5 | module glob |
| 6 | use shell_types |
| 7 | use system_interface |
| 8 | use performance |
| 9 | use iso_fortran_env, only: output_unit, error_unit |
| 10 | use iso_c_binding |
| 11 | implicit none |
| 12 | |
| 13 | integer, parameter :: MAX_GLOB_MATCHES = 1000 |
| 14 | integer, parameter :: MAX_FILENAME_LEN = 256 |
| 15 | integer, parameter :: MAX_GLOB_RECURSION = 4096 |
| 16 | integer :: glob_recursion_depth = 0 |
| 17 | |
| 18 | contains |
| 19 | |
| 20 | ! Check if string contains unescaped glob characters |
| 21 | function has_unescaped_glob_chars(str) result(has_unescaped) |
| 22 | character(len=*), intent(in) :: str |
| 23 | logical :: has_unescaped |
| 24 | integer :: i, len_str |
| 25 | logical :: escaped |
| 26 | character(len=1) :: backslash |
| 27 | |
| 28 | has_unescaped = .false. |
| 29 | len_str = len_trim(str) |
| 30 | escaped = .false. |
| 31 | backslash = char(92) ! ASCII code for backslash |
| 32 | |
| 33 | do i = 1, len_str |
| 34 | if (escaped) then |
| 35 | ! Previous char was backslash, so this char is escaped |
| 36 | escaped = .false. |
| 37 | else if (str(i:i) == backslash) then |
| 38 | ! This is an escape character |
| 39 | escaped = .true. |
| 40 | else if (str(i:i) == '*' .or. str(i:i) == '?' .or. str(i:i) == '[') then |
| 41 | ! Found unescaped glob character |
| 42 | has_unescaped = .true. |
| 43 | return |
| 44 | end if |
| 45 | end do |
| 46 | end function |
| 47 | |
| 48 | ! Main glob expansion function |
| 49 | subroutine expand_glob_patterns(tokens, num_tokens, expanded_tokens, expanded_count, token_quoted) |
| 50 | character(len=*), intent(in) :: tokens(:) |
| 51 | integer, intent(in) :: num_tokens |
| 52 | character(len=MAX_TOKEN_LEN), allocatable, intent(out) :: expanded_tokens(:) |
| 53 | integer, intent(out) :: expanded_count |
| 54 | logical, intent(in), optional :: token_quoted(:) |
| 55 | |
| 56 | ! Use allocatable arrays to avoid static storage |
| 57 | character(len=MAX_TOKEN_LEN), allocatable :: temp_tokens(:) |
| 58 | character(len=MAX_TOKEN_LEN), allocatable :: matches(:) |
| 59 | integer :: i, j, match_count, total_count, current_size |
| 60 | logical :: has_glob_chars, is_quoted |
| 61 | integer(int64) :: glob_start_time |
| 62 | |
| 63 | ! Start performance timing |
| 64 | call start_timer('glob_expansion', glob_start_time) |
| 65 | |
| 66 | ! Allocate initial arrays |
| 67 | allocate(temp_tokens(100)) ! Start with reasonable size |
| 68 | allocate(matches(MAX_GLOB_MATCHES)) |
| 69 | current_size = 100 |
| 70 | total_count = 0 |
| 71 | |
| 72 | do i = 1, num_tokens |
| 73 | ! Check if this token was quoted (skip glob expansion if so) |
| 74 | is_quoted = .false. |
| 75 | if (present(token_quoted)) then |
| 76 | if (i <= size(token_quoted)) then |
| 77 | is_quoted = token_quoted(i) |
| 78 | end if |
| 79 | end if |
| 80 | |
| 81 | ! Check if token contains unescaped glob characters (skip if quoted) |
| 82 | if (is_quoted) then |
| 83 | has_glob_chars = .false. ! Quoted tokens don't get glob expanded |
| 84 | else |
| 85 | has_glob_chars = has_unescaped_glob_chars(tokens(i)) |
| 86 | end if |
| 87 | |
| 88 | if (has_glob_chars) then |
| 89 | ! Expand the glob pattern |
| 90 | call glob_match(tokens(i), matches, match_count) |
| 91 | |
| 92 | if (match_count > 0) then |
| 93 | ! Add matches to result |
| 94 | do j = 1, match_count |
| 95 | ! Grow array if needed |
| 96 | if (total_count >= current_size) then |
| 97 | call grow_token_array(temp_tokens, current_size) |
| 98 | end if |
| 99 | total_count = total_count + 1 |
| 100 | temp_tokens(total_count) = trim(matches(j)) |
| 101 | end do |
| 102 | else |
| 103 | ! No matches found - keep original token |
| 104 | if (total_count >= current_size) then |
| 105 | call grow_token_array(temp_tokens, current_size) |
| 106 | end if |
| 107 | total_count = total_count + 1 |
| 108 | temp_tokens(total_count) = tokens(i) |
| 109 | end if |
| 110 | else |
| 111 | ! No glob characters - keep original token |
| 112 | if (total_count >= current_size) then |
| 113 | call grow_token_array(temp_tokens, current_size) |
| 114 | end if |
| 115 | total_count = total_count + 1 |
| 116 | temp_tokens(total_count) = tokens(i) |
| 117 | end if |
| 118 | end do |
| 119 | |
| 120 | ! Allocate result array |
| 121 | expanded_count = total_count |
| 122 | if (expanded_count > 0) then |
| 123 | allocate(expanded_tokens(expanded_count)) |
| 124 | do i = 1, expanded_count |
| 125 | expanded_tokens(i) = temp_tokens(i) |
| 126 | end do |
| 127 | else |
| 128 | allocate(expanded_tokens(1)) |
| 129 | expanded_tokens(1) = '' |
| 130 | expanded_count = 0 |
| 131 | end if |
| 132 | |
| 133 | ! Track memory allocation |
| 134 | if (allocated(expanded_tokens)) then |
| 135 | call track_allocation(size(expanded_tokens) * MAX_TOKEN_LEN, 'expanded_tokens') |
| 136 | end if |
| 137 | |
| 138 | ! Clean up allocatable arrays |
| 139 | if (allocated(temp_tokens)) deallocate(temp_tokens) |
| 140 | if (allocated(matches)) deallocate(matches) |
| 141 | |
| 142 | ! End performance timing |
| 143 | call end_timer('glob_expansion', glob_start_time, total_glob_time) |
| 144 | end subroutine |
| 145 | |
| 146 | ! Helper subroutine to grow token array |
| 147 | subroutine grow_token_array(array, current_size) |
| 148 | character(len=MAX_TOKEN_LEN), allocatable, intent(inout) :: array(:) |
| 149 | integer, intent(inout) :: current_size |
| 150 | character(len=MAX_TOKEN_LEN), allocatable :: new_array(:) |
| 151 | integer :: new_size |
| 152 | |
| 153 | new_size = current_size * 2 |
| 154 | allocate(new_array(new_size)) |
| 155 | |
| 156 | ! Copy existing data |
| 157 | new_array(1:current_size) = array(1:current_size) |
| 158 | |
| 159 | ! Swap arrays |
| 160 | call move_alloc(new_array, array) |
| 161 | current_size = new_size |
| 162 | end subroutine |
| 163 | |
| 164 | ! Match a glob pattern against files in current directory |
| 165 | subroutine glob_match(pattern, matches, match_count) |
| 166 | character(len=*), intent(in) :: pattern |
| 167 | character(len=MAX_TOKEN_LEN), intent(out) :: matches(:) |
| 168 | integer, intent(out) :: match_count |
| 169 | |
| 170 | character(len=MAX_FILENAME_LEN) :: directory_path, filename |
| 171 | character(len=MAX_FILENAME_LEN) :: full_pattern |
| 172 | integer :: dir_pos, i |
| 173 | logical :: is_dir_pattern |
| 174 | |
| 175 | match_count = 0 |
| 176 | full_pattern = trim(pattern) |
| 177 | |
| 178 | ! Check if pattern contains directory separator |
| 179 | dir_pos = 0 |
| 180 | do i = len_trim(pattern), 1, -1 |
| 181 | if (pattern(i:i) == '/') then |
| 182 | dir_pos = i |
| 183 | exit |
| 184 | end if |
| 185 | end do |
| 186 | |
| 187 | if (dir_pos > 0) then |
| 188 | ! Pattern has directory component |
| 189 | directory_path = pattern(1:dir_pos-1) |
| 190 | if (len_trim(directory_path) == 0) directory_path = '/' |
| 191 | filename = pattern(dir_pos+1:) |
| 192 | is_dir_pattern = .true. |
| 193 | else |
| 194 | ! Pattern is just a filename in current directory |
| 195 | directory_path = '.' |
| 196 | filename = pattern |
| 197 | is_dir_pattern = .false. |
| 198 | end if |
| 199 | |
| 200 | ! List files in directory and match pattern |
| 201 | call match_files_in_directory(directory_path, filename, is_dir_pattern, matches, match_count) |
| 202 | end subroutine |
| 203 | |
| 204 | ! Match files in a specific directory against a pattern |
| 205 | subroutine match_files_in_directory(dir_path, file_pattern, include_dir_path, matches, match_count) |
| 206 | character(len=*), intent(in) :: dir_path, file_pattern |
| 207 | logical, intent(in) :: include_dir_path |
| 208 | character(len=MAX_TOKEN_LEN), intent(out) :: matches(:) |
| 209 | integer, intent(out) :: match_count |
| 210 | |
| 211 | ! Simple implementation - in a full shell this would use opendir/readdir |
| 212 | ! Use allocatable to avoid stack overflow on macOS |
| 213 | character(len=MAX_FILENAME_LEN), allocatable :: test_files(:) |
| 214 | integer :: num_test_files, i |
| 215 | |
| 216 | match_count = 0 |
| 217 | allocate(test_files(10000)) ! Increased to handle directories with many files |
| 218 | |
| 219 | ! For now, simulate some common files for testing |
| 220 | ! In a real implementation, this would read the actual directory |
| 221 | call get_simulated_directory_contents(dir_path, test_files, num_test_files) |
| 222 | |
| 223 | do i = 1, num_test_files |
| 224 | if (pattern_matches(file_pattern, test_files(i))) then |
| 225 | if (match_count < size(matches)) then |
| 226 | match_count = match_count + 1 |
| 227 | if (include_dir_path) then |
| 228 | if (trim(dir_path) == '.') then |
| 229 | matches(match_count) = trim(test_files(i)) |
| 230 | else |
| 231 | matches(match_count) = trim(dir_path) // '/' // trim(test_files(i)) |
| 232 | end if |
| 233 | else |
| 234 | matches(match_count) = trim(test_files(i)) |
| 235 | end if |
| 236 | end if |
| 237 | end if |
| 238 | end do |
| 239 | |
| 240 | if (allocated(test_files)) deallocate(test_files) |
| 241 | end subroutine |
| 242 | |
| 243 | ! Get actual directory contents (simplified implementation) |
| 244 | subroutine get_simulated_directory_contents(dir_path, files, count) |
| 245 | character(len=*), intent(in) :: dir_path |
| 246 | character(len=MAX_FILENAME_LEN), intent(out) :: files(:) |
| 247 | integer, intent(out) :: count |
| 248 | |
| 249 | ! For now, keep simulation but add note |
| 250 | ! In a production shell, this would use opendir/readdir system calls |
| 251 | ! or execute 'ls' and parse output |
| 252 | |
| 253 | ! Try to read actual directory via ls command |
| 254 | call read_directory_with_ls(dir_path, files, count) |
| 255 | |
| 256 | ! No fallback simulation - if ls returns nothing, return nothing |
| 257 | end subroutine |
| 258 | |
| 259 | ! Read directory contents using ls command |
| 260 | subroutine read_directory_with_ls(dir_path, files, count) |
| 261 | character(len=*), intent(in) :: dir_path |
| 262 | character(len=MAX_FILENAME_LEN), intent(out) :: files(:) |
| 263 | integer, intent(out) :: count |
| 264 | |
| 265 | ! Increased buffer size to handle directories with many files (like /tmp) |
| 266 | integer, parameter :: BUFFER_SIZE = 524288 ! 512KB buffer for large directories |
| 267 | |
| 268 | character(len=512) :: command |
| 269 | integer(c_pid_t) :: pid |
| 270 | integer(c_int), target :: status, pipefd(2) |
| 271 | integer :: ret, i, line_start, total_bytes |
| 272 | character(len=BUFFER_SIZE), allocatable :: buffer |
| 273 | integer(c_size_t) :: bytes_read |
| 274 | character(kind=c_char), target :: c_buffer(8192) ! Read in 8KB chunks |
| 275 | |
| 276 | count = 0 |
| 277 | allocate(character(len=BUFFER_SIZE) :: buffer) |
| 278 | |
| 279 | ! Construct ls command (-A excludes . and .., -1 means one per line) |
| 280 | if (trim(dir_path) == '.' .or. trim(dir_path) == '') then |
| 281 | command = 'ls -A1 2>/dev/null' |
| 282 | else |
| 283 | command = 'ls -A1 ' // trim(dir_path) // ' 2>/dev/null' |
| 284 | end if |
| 285 | |
| 286 | ! Create pipe for reading output |
| 287 | ret = c_pipe(c_loc(pipefd)) |
| 288 | if (ret /= 0) return |
| 289 | |
| 290 | ! Fork to execute ls |
| 291 | pid = c_fork() |
| 292 | |
| 293 | if (pid == 0) then |
| 294 | ! Child: redirect stdout to pipe and execute ls |
| 295 | ret = c_close(pipefd(1)) ! Close read end |
| 296 | ret = c_dup2(pipefd(2), STDOUT_FD) |
| 297 | ret = c_close(pipefd(2)) ! Close original write end |
| 298 | |
| 299 | ! Execute ls via shell |
| 300 | call execute_ls_command(trim(command)) |
| 301 | call c_exit(127) |
| 302 | else if (pid > 0) then |
| 303 | ! Parent: read from pipe |
| 304 | ret = c_close(pipefd(2)) ! Close write end |
| 305 | |
| 306 | ! Read output from pipe in chunks until EOF |
| 307 | buffer = '' |
| 308 | total_bytes = 0 |
| 309 | do |
| 310 | bytes_read = c_read(pipefd(1), c_loc(c_buffer), int(8192, c_size_t)) |
| 311 | if (bytes_read <= 0) exit ! EOF or error |
| 312 | |
| 313 | ! Append to buffer |
| 314 | do i = 1, min(int(bytes_read), 8192) |
| 315 | if (c_buffer(i) == c_null_char) exit |
| 316 | if (total_bytes < BUFFER_SIZE) then |
| 317 | total_bytes = total_bytes + 1 |
| 318 | buffer(total_bytes:total_bytes) = c_buffer(i) |
| 319 | end if |
| 320 | end do |
| 321 | |
| 322 | ! Stop if buffer is full |
| 323 | if (total_bytes >= BUFFER_SIZE) exit |
| 324 | end do |
| 325 | |
| 326 | ! Parse lines from buffer |
| 327 | line_start = 1 |
| 328 | do i = 1, total_bytes |
| 329 | if (buffer(i:i) == char(10)) then ! Newline |
| 330 | if (i > line_start .and. count < size(files)) then |
| 331 | count = count + 1 |
| 332 | files(count) = buffer(line_start:i-1) |
| 333 | end if |
| 334 | line_start = i + 1 |
| 335 | end if |
| 336 | end do |
| 337 | |
| 338 | ! Handle last line if no trailing newline |
| 339 | if (line_start <= total_bytes .and. count < size(files)) then |
| 340 | count = count + 1 |
| 341 | files(count) = buffer(line_start:total_bytes) |
| 342 | end if |
| 343 | |
| 344 | ret = c_close(pipefd(1)) |
| 345 | ret = c_waitpid(pid, c_loc(status), 0) |
| 346 | end if |
| 347 | end subroutine |
| 348 | |
| 349 | ! Execute ls command via shell |
| 350 | subroutine execute_ls_command(command) |
| 351 | character(len=*), intent(in) :: command |
| 352 | character(kind=c_char), target :: shell_path(10) |
| 353 | character(kind=c_char), target :: c_flag(3) |
| 354 | character(kind=c_char), target :: c_command(512) |
| 355 | type(c_ptr), target :: argv(4) |
| 356 | integer :: i, ret |
| 357 | |
| 358 | ! Prepare /bin/sh |
| 359 | shell_path(1) = '/' |
| 360 | shell_path(2) = 'b' |
| 361 | shell_path(3) = 'i' |
| 362 | shell_path(4) = 'n' |
| 363 | shell_path(5) = '/' |
| 364 | shell_path(6) = 's' |
| 365 | shell_path(7) = 'h' |
| 366 | shell_path(8) = c_null_char |
| 367 | |
| 368 | ! Prepare -c flag |
| 369 | c_flag(1) = '-' |
| 370 | c_flag(2) = 'c' |
| 371 | c_flag(3) = c_null_char |
| 372 | |
| 373 | ! Prepare command string |
| 374 | do i = 1, min(len_trim(command), 500) |
| 375 | c_command(i) = command(i:i) |
| 376 | end do |
| 377 | c_command(min(len_trim(command), 500) + 1) = c_null_char |
| 378 | |
| 379 | ! Setup argv: ["/bin/sh", "-c", command, NULL] |
| 380 | argv(1) = c_loc(shell_path) ! /bin/sh |
| 381 | argv(2) = c_loc(c_flag) ! -c |
| 382 | argv(3) = c_loc(c_command) ! command |
| 383 | argv(4) = c_null_ptr |
| 384 | |
| 385 | ! Execute |
| 386 | ret = c_execvp(argv(1), c_loc(argv)) |
| 387 | end subroutine |
| 388 | |
| 389 | ! Check if a filename matches a glob pattern |
| 390 | function pattern_matches(pattern, filename) result(matches) |
| 391 | character(len=*), intent(in) :: pattern, filename |
| 392 | logical :: matches |
| 393 | |
| 394 | ! POSIX: * and ? should not match files starting with . (dotfiles) |
| 395 | ! unless the pattern explicitly starts with . |
| 396 | if (len_trim(filename) > 0 .and. filename(1:1) == '.') then |
| 397 | ! This is a dotfile |
| 398 | if (len_trim(pattern) > 0 .and. pattern(1:1) == '.') then |
| 399 | ! Pattern explicitly starts with ., so allow matching |
| 400 | matches = glob_match_recursive(pattern, filename, 1, 1) |
| 401 | else |
| 402 | ! Pattern doesn't start with ., so don't match dotfiles |
| 403 | matches = .false. |
| 404 | end if |
| 405 | else |
| 406 | ! Not a dotfile, normal matching |
| 407 | matches = glob_match_recursive(pattern, filename, 1, 1) |
| 408 | end if |
| 409 | end function |
| 410 | |
| 411 | ! Pattern matching without dotfile exclusion (for case statements, etc.) |
| 412 | function pattern_matches_no_dotfile_check(pattern, text) result(matches) |
| 413 | character(len=*), intent(in) :: pattern, text |
| 414 | logical :: matches |
| 415 | |
| 416 | ! Direct pattern matching without dotfile exclusion |
| 417 | matches = glob_match_recursive(pattern, text, 1, 1) |
| 418 | end function |
| 419 | |
| 420 | ! Recursive pattern matching function |
| 421 | recursive function glob_match_recursive(pattern, text, p_pos, t_pos) result(matches) |
| 422 | character(len=*), intent(in) :: pattern, text |
| 423 | integer, intent(in) :: p_pos, t_pos |
| 424 | logical :: matches |
| 425 | |
| 426 | integer :: p_len, t_len, i, bracket_end |
| 427 | character(len=1) :: p_char, t_char |
| 428 | logical :: bracket_match |
| 429 | |
| 430 | ! Guard against runaway recursion (e.g., pathological patterns like **...**) |
| 431 | glob_recursion_depth = glob_recursion_depth + 1 |
| 432 | if (glob_recursion_depth > MAX_GLOB_RECURSION) then |
| 433 | glob_recursion_depth = glob_recursion_depth - 1 |
| 434 | matches = .false. |
| 435 | return |
| 436 | end if |
| 437 | |
| 438 | p_len = len_trim(pattern) |
| 439 | t_len = len_trim(text) |
| 440 | |
| 441 | ! Special case: empty pattern should only match empty text |
| 442 | if (p_len == 0) then |
| 443 | matches = (t_len == 0) |
| 444 | glob_recursion_depth = glob_recursion_depth - 1 |
| 445 | return |
| 446 | end if |
| 447 | |
| 448 | ! Handle whitespace-only text (e.g., " " should match [[:space:]]) |
| 449 | ! len_trim returns 0 for whitespace, but we need to match it |
| 450 | ! Only do this if pattern is NOT empty (otherwise we'd match padding) |
| 451 | if (t_len == 0 .and. len(text) > 0) then |
| 452 | ! Check if first char is whitespace - if so, use length 1 |
| 453 | if (text(1:1) == ' ' .or. ichar(text(1:1)) == 9) then |
| 454 | t_len = 1 |
| 455 | end if |
| 456 | end if |
| 457 | |
| 458 | ! End conditions |
| 459 | if (p_pos > p_len) then |
| 460 | matches = (t_pos > t_len) |
| 461 | glob_recursion_depth = glob_recursion_depth - 1 |
| 462 | return |
| 463 | end if |
| 464 | |
| 465 | p_char = pattern(p_pos:p_pos) |
| 466 | |
| 467 | select case(p_char) |
| 468 | case('*') |
| 469 | ! Match zero or more characters |
| 470 | ! Try matching rest of pattern at current position |
| 471 | if (glob_match_recursive(pattern, text, p_pos + 1, t_pos)) then |
| 472 | matches = .true. |
| 473 | glob_recursion_depth = glob_recursion_depth - 1 |
| 474 | return |
| 475 | end if |
| 476 | |
| 477 | ! Try consuming one character from text and continue |
| 478 | do i = t_pos, t_len |
| 479 | if (glob_match_recursive(pattern, text, p_pos + 1, i + 1)) then |
| 480 | matches = .true. |
| 481 | glob_recursion_depth = glob_recursion_depth - 1 |
| 482 | return |
| 483 | end if |
| 484 | end do |
| 485 | |
| 486 | matches = .false. |
| 487 | |
| 488 | case('?') |
| 489 | ! Match exactly one character |
| 490 | if (t_pos <= t_len) then |
| 491 | matches = glob_match_recursive(pattern, text, p_pos + 1, t_pos + 1) |
| 492 | else |
| 493 | matches = .false. |
| 494 | end if |
| 495 | |
| 496 | case('[') |
| 497 | ! Character class matching |
| 498 | if (t_pos > t_len) then |
| 499 | matches = .false. |
| 500 | glob_recursion_depth = glob_recursion_depth - 1 |
| 501 | return |
| 502 | end if |
| 503 | |
| 504 | ! Find end of bracket expression (handling nested [:...:]) |
| 505 | ! POSIX: ] is literal if it's first char after [ or [! or [^ |
| 506 | bracket_end = p_pos + 1 |
| 507 | ! Skip negation marker if present |
| 508 | if (bracket_end <= p_len .and. & |
| 509 | (pattern(bracket_end:bracket_end) == '!' .or. pattern(bracket_end:bracket_end) == '^')) then |
| 510 | bracket_end = bracket_end + 1 |
| 511 | end if |
| 512 | ! Skip ] if it's first (literal ]) |
| 513 | if (bracket_end <= p_len .and. pattern(bracket_end:bracket_end) == ']') then |
| 514 | bracket_end = bracket_end + 1 |
| 515 | end if |
| 516 | do while (bracket_end <= p_len) |
| 517 | ! Check for character class [:...:] and skip over it |
| 518 | if (bracket_end + 1 <= p_len .and. pattern(bracket_end:bracket_end+1) == '[:') then |
| 519 | ! Skip to the end of the character class |
| 520 | bracket_end = bracket_end + 2 |
| 521 | do while (bracket_end + 1 <= p_len) |
| 522 | if (pattern(bracket_end:bracket_end+1) == ':]') then |
| 523 | bracket_end = bracket_end + 2 |
| 524 | exit |
| 525 | end if |
| 526 | bracket_end = bracket_end + 1 |
| 527 | end do |
| 528 | else if (pattern(bracket_end:bracket_end) == ']') then |
| 529 | ! Found the closing bracket |
| 530 | exit |
| 531 | else |
| 532 | bracket_end = bracket_end + 1 |
| 533 | end if |
| 534 | end do |
| 535 | |
| 536 | if (bracket_end > p_len) then |
| 537 | ! Invalid bracket expression - treat as literal |
| 538 | matches = (t_pos <= t_len .and. text(t_pos:t_pos) == '[') .and. & |
| 539 | glob_match_recursive(pattern, text, p_pos + 1, t_pos + 1) |
| 540 | else |
| 541 | t_char = text(t_pos:t_pos) |
| 542 | bracket_match = match_bracket_expression(pattern(p_pos+1:bracket_end-1), t_char) |
| 543 | |
| 544 | if (bracket_match) then |
| 545 | matches = glob_match_recursive(pattern, text, bracket_end + 1, t_pos + 1) |
| 546 | else |
| 547 | matches = .false. |
| 548 | end if |
| 549 | end if |
| 550 | |
| 551 | case default |
| 552 | ! Literal character match |
| 553 | if (t_pos <= t_len .and. text(t_pos:t_pos) == p_char) then |
| 554 | matches = glob_match_recursive(pattern, text, p_pos + 1, t_pos + 1) |
| 555 | else |
| 556 | matches = .false. |
| 557 | end if |
| 558 | end select |
| 559 | glob_recursion_depth = glob_recursion_depth - 1 |
| 560 | end function |
| 561 | |
| 562 | ! Match bracket expression [abc], [a-z], [!abc], [[:class:]] |
| 563 | function match_bracket_expression(bracket_content, test_char) result(matches) |
| 564 | character(len=*), intent(in) :: bracket_content |
| 565 | character(len=1), intent(in) :: test_char |
| 566 | logical :: matches |
| 567 | |
| 568 | logical :: negated, found |
| 569 | integer :: i, content_len, class_end |
| 570 | character(len=1) :: current_char, next_char, range_start |
| 571 | character(len=20) :: char_class |
| 572 | |
| 573 | content_len = len_trim(bracket_content) |
| 574 | if (content_len == 0) then |
| 575 | matches = .false. |
| 576 | return |
| 577 | end if |
| 578 | |
| 579 | ! Check for negation (POSIX uses ! but ^ is also common) |
| 580 | negated = (bracket_content(1:1) == '!' .or. bracket_content(1:1) == '^') |
| 581 | i = 1 |
| 582 | if (negated) i = 2 |
| 583 | |
| 584 | found = .false. |
| 585 | |
| 586 | do while (i <= content_len .and. .not. found) |
| 587 | current_char = bracket_content(i:i) |
| 588 | |
| 589 | ! Check for POSIX character class [:class:] |
| 590 | if (i + 3 <= content_len .and. bracket_content(i:i+1) == '[:') then |
| 591 | ! Find the closing :] |
| 592 | class_end = index(bracket_content(i+2:), ':]') |
| 593 | if (class_end > 0) then |
| 594 | ! Extract the class name |
| 595 | char_class = bracket_content(i+2:i+class_end) |
| 596 | |
| 597 | ! Check if character matches the class |
| 598 | found = match_char_class(trim(char_class), test_char) |
| 599 | |
| 600 | ! Move past the character class |
| 601 | i = i + class_end + 3 ! Move past [:class:] |
| 602 | else |
| 603 | ! Malformed character class, treat as literal characters |
| 604 | if (test_char == current_char) then |
| 605 | found = .true. |
| 606 | end if |
| 607 | i = i + 1 |
| 608 | end if |
| 609 | ! Check for range (a-z) |
| 610 | else if (i + 2 <= content_len .and. bracket_content(i+1:i+1) == '-') then |
| 611 | range_start = current_char |
| 612 | next_char = bracket_content(i+2:i+2) |
| 613 | |
| 614 | ! Check if character is in range |
| 615 | if (ichar(test_char) >= ichar(range_start) .and. ichar(test_char) <= ichar(next_char)) then |
| 616 | found = .true. |
| 617 | end if |
| 618 | i = i + 3 |
| 619 | else |
| 620 | ! Single character match |
| 621 | if (test_char == current_char) then |
| 622 | found = .true. |
| 623 | end if |
| 624 | i = i + 1 |
| 625 | end if |
| 626 | end do |
| 627 | |
| 628 | if (negated) then |
| 629 | matches = .not. found |
| 630 | else |
| 631 | matches = found |
| 632 | end if |
| 633 | end function |
| 634 | |
| 635 | ! Match POSIX character class |
| 636 | function match_char_class(class_name, test_char) result(matches) |
| 637 | character(len=*), intent(in) :: class_name |
| 638 | character(len=1), intent(in) :: test_char |
| 639 | logical :: matches |
| 640 | integer :: char_code |
| 641 | |
| 642 | char_code = ichar(test_char) |
| 643 | matches = .false. |
| 644 | |
| 645 | select case (trim(class_name)) |
| 646 | case ('alnum') |
| 647 | ! Alphanumeric: [A-Za-z0-9] |
| 648 | matches = (char_code >= ichar('A') .and. char_code <= ichar('Z')) .or. & |
| 649 | (char_code >= ichar('a') .and. char_code <= ichar('z')) .or. & |
| 650 | (char_code >= ichar('0') .and. char_code <= ichar('9')) |
| 651 | |
| 652 | case ('alpha') |
| 653 | ! Alphabetic: [A-Za-z] |
| 654 | matches = (char_code >= ichar('A') .and. char_code <= ichar('Z')) .or. & |
| 655 | (char_code >= ichar('a') .and. char_code <= ichar('z')) |
| 656 | |
| 657 | case ('blank') |
| 658 | ! Space and tab |
| 659 | matches = (test_char == ' ' .or. test_char == char(9)) |
| 660 | |
| 661 | case ('cntrl') |
| 662 | ! Control characters (0-31, 127) |
| 663 | matches = (char_code >= 0 .and. char_code <= 31) .or. char_code == 127 |
| 664 | |
| 665 | case ('digit') |
| 666 | ! Digits: [0-9] |
| 667 | matches = (char_code >= ichar('0') .and. char_code <= ichar('9')) |
| 668 | |
| 669 | case ('graph') |
| 670 | ! Visible characters (33-126) |
| 671 | matches = (char_code >= 33 .and. char_code <= 126) |
| 672 | |
| 673 | case ('lower') |
| 674 | ! Lowercase letters: [a-z] |
| 675 | matches = (char_code >= ichar('a') .and. char_code <= ichar('z')) |
| 676 | |
| 677 | case ('print') |
| 678 | ! Printable characters (32-126) |
| 679 | matches = (char_code >= 32 .and. char_code <= 126) |
| 680 | |
| 681 | case ('punct') |
| 682 | ! Punctuation (visible non-alphanumeric) |
| 683 | matches = ((char_code >= 33 .and. char_code <= 47) .or. & |
| 684 | (char_code >= 58 .and. char_code <= 64) .or. & |
| 685 | (char_code >= 91 .and. char_code <= 96) .or. & |
| 686 | (char_code >= 123 .and. char_code <= 126)) |
| 687 | |
| 688 | case ('space') |
| 689 | ! Whitespace: space, tab, newline, etc. |
| 690 | matches = (test_char == ' ' .or. test_char == char(9) .or. test_char == char(10) .or. & |
| 691 | test_char == char(11) .or. test_char == char(12) .or. test_char == char(13)) |
| 692 | |
| 693 | case ('upper') |
| 694 | ! Uppercase letters: [A-Z] |
| 695 | matches = (char_code >= ichar('A') .and. char_code <= ichar('Z')) |
| 696 | |
| 697 | case ('xdigit') |
| 698 | ! Hexadecimal digits: [0-9A-Fa-f] |
| 699 | matches = (char_code >= ichar('0') .and. char_code <= ichar('9')) .or. & |
| 700 | (char_code >= ichar('A') .and. char_code <= ichar('F')) .or. & |
| 701 | (char_code >= ichar('a') .and. char_code <= ichar('f')) |
| 702 | |
| 703 | case default |
| 704 | ! Unknown character class |
| 705 | matches = .false. |
| 706 | end select |
| 707 | end function |
| 708 | |
| 709 | ! Sort matches alphabetically (simple bubble sort) |
| 710 | subroutine sort_matches(matches, count) |
| 711 | character(len=*), intent(inout) :: matches(:) |
| 712 | integer, intent(in) :: count |
| 713 | |
| 714 | integer :: i, j |
| 715 | character(len=len(matches)) :: temp |
| 716 | |
| 717 | do i = 1, count - 1 |
| 718 | do j = i + 1, count |
| 719 | if (matches(i) > matches(j)) then |
| 720 | temp = matches(i) |
| 721 | matches(i) = matches(j) |
| 722 | matches(j) = temp |
| 723 | end if |
| 724 | end do |
| 725 | end do |
| 726 | end subroutine |
| 727 | |
| 728 | end module glob |