@@ -1,7 +1,7 @@ |
| 1 | module ferp_matcher | 1 | module ferp_matcher |
| 2 | !> Pattern matching orchestration for FERP | 2 | !> Pattern matching orchestration for FERP |
| 3 | !> Thread-safe: no SAVE variables, all buffers are dynamically allocated | 3 | !> Thread-safe: no SAVE variables, all buffers are dynamically allocated |
| 4 | - use ferp_kinds | 4 | + use ferp_kinds, only: i64, max_pattern_len, pattern_len |
| 5 | use ferp_options | 5 | use ferp_options |
| 6 | use ferp_io | 6 | use ferp_io |
| 7 | use ferp_output | 7 | use ferp_output |
@@ -43,7 +43,7 @@ contains |
| 43 | type(compiled_patterns_t), intent(out) :: compiled | 43 | type(compiled_patterns_t), intent(out) :: compiled |
| 44 | integer, intent(out) :: ierr | 44 | integer, intent(out) :: ierr |
| 45 | | 45 | |
| 46 | - integer :: i, n | 46 | + integer :: i, n, plen |
| 47 | logical :: is_ere | 47 | logical :: is_ere |
| 48 | character(len=max_pattern_len) :: pattern | 48 | character(len=max_pattern_len) :: pattern |
| 49 | | 49 | |
@@ -58,12 +58,12 @@ contains |
| 58 | allocate(compiled%bm_pats(n)) | 58 | allocate(compiled%bm_pats(n)) |
| 59 | | 59 | |
| 60 | do i = 1, n | 60 | do i = 1, n |
| 61 | - pattern = patterns(i) | 61 | + plen = pattern_len(patterns(i)) |
| 62 | ! For case-insensitive, convert pattern to lowercase | 62 | ! For case-insensitive, convert pattern to lowercase |
| 63 | if (opts%ignore_case) then | 63 | if (opts%ignore_case) then |
| 64 | - call bm_compile(compiled%bm_pats(i), trim(pattern), .true.) | 64 | + call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .true.) |
| 65 | else | 65 | else |
| 66 | - call bm_compile(compiled%bm_pats(i), trim(pattern), .false.) | 66 | + call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .false.) |
| 67 | end if | 67 | end if |
| 68 | end do | 68 | end do |
| 69 | | 69 | |
@@ -76,19 +76,21 @@ contains |
| 76 | allocate(compiled%pcres(n)) | 76 | allocate(compiled%pcres(n)) |
| 77 | | 77 | |
| 78 | do i = 1, n | 78 | do i = 1, n |
| 79 | - pattern = patterns(i) | 79 | + plen = pattern_len(patterns(i)) |
| 80 | | 80 | |
| 81 | ! Apply -w (word) transformation using PCRE word boundaries | 81 | ! Apply -w (word) transformation using PCRE word boundaries |
| 82 | if (opts%word_regexp) then | 82 | if (opts%word_regexp) then |
| 83 | - pattern = '\b' // trim(pattern) // '\b' | 83 | + pattern = '\b' // patterns(i)(1:plen) // '\b' |
| 84 | - end if | 84 | + plen = plen + 4 ! \b and \b |
| 85 | - | | |
| 86 | ! Apply -x (line) transformation | 85 | ! Apply -x (line) transformation |
| 87 | - if (opts%line_regexp) then | 86 | + else if (opts%line_regexp) then |
| 88 | - pattern = '^' // trim(pattern) // '$' | 87 | + pattern = '^' // patterns(i)(1:plen) // '$' |
| | 88 | + plen = plen + 2 ! ^ and $ |
| | 89 | + else |
| | 90 | + pattern = patterns(i)(1:plen) |
| 89 | end if | 91 | end if |
| 90 | | 92 | |
| 91 | - call pcre_compile(compiled%pcres(i), trim(pattern), opts%ignore_case, ierr) | 93 | + call pcre_compile(compiled%pcres(i), pattern(1:plen), opts%ignore_case, ierr) |
| 92 | if (ierr /= 0) then | 94 | if (ierr /= 0) then |
| 93 | compiled%compiled = .false. | 95 | compiled%compiled = .false. |
| 94 | return | 96 | return |
@@ -105,19 +107,22 @@ contains |
| 105 | is_ere = (opts%pattern_type == PATTERN_ERE) | 107 | is_ere = (opts%pattern_type == PATTERN_ERE) |
| 106 | | 108 | |
| 107 | do i = 1, n | 109 | do i = 1, n |
| 108 | - pattern = patterns(i) | 110 | + plen = pattern_len(patterns(i)) |
| 109 | | 111 | |
| 110 | ! Apply -w (word) transformation | 112 | ! Apply -w (word) transformation |
| 111 | if (opts%word_regexp .and. opts%pattern_type /= PATTERN_FIXED) then | 113 | if (opts%word_regexp .and. opts%pattern_type /= PATTERN_FIXED) then |
| 112 | - pattern = '\<' // trim(pattern) // '\>' | 114 | + pattern = '\<' // patterns(i)(1:plen) // '\>' |
| 113 | - end if | 115 | + plen = plen + 4 ! \< and \> |
| 114 | - | | |
| 115 | ! Apply -x (line) transformation | 116 | ! Apply -x (line) transformation |
| 116 | - if (opts%line_regexp .and. opts%pattern_type /= PATTERN_FIXED) then | 117 | + else if (opts%line_regexp .and. opts%pattern_type /= PATTERN_FIXED) then |
| 117 | - pattern = '^' // trim(pattern) // '$' | 118 | + pattern = '^' // patterns(i)(1:plen) // '$' |
| | 119 | + plen = plen + 2 ! ^ and $ |
| | 120 | + else |
| | 121 | + pattern = patterns(i)(1:plen) |
| 118 | end if | 122 | end if |
| 119 | | 123 | |
| 120 | - call regex_compile(compiled%regexes(i), trim(pattern), is_ere, ierr) | 124 | + ! Compile with exact pattern length |
| | 125 | + call regex_compile(compiled%regexes(i), pattern(1:plen), is_ere, ierr) |
| 121 | if (ierr /= 0) then | 126 | if (ierr /= 0) then |
| 122 | compiled%compiled = .false. | 127 | compiled%compiled = .false. |
| 123 | return | 128 | return |
@@ -276,7 +281,7 @@ contains |
| 276 | | 281 | |
| 277 | matches = .false. | 282 | matches = .false. |
| 278 | line_len = len_trim(line) | 283 | line_len = len_trim(line) |
| 279 | - pat_len = len_trim(pattern) | 284 | + pat_len = pattern_len(pattern) ! Use pattern_len to preserve whitespace patterns |
| 280 | | 285 | |
| 281 | if (pat_len == 0) then | 286 | if (pat_len == 0) then |
| 282 | ! Empty pattern matches everything | 287 | ! Empty pattern matches everything |
@@ -284,8 +289,8 @@ contains |
| 284 | return | 289 | return |
| 285 | end if | 290 | end if |
| 286 | | 291 | |
| 287 | - ! Find pattern in line | 292 | + ! Find pattern in line (use exact length, not trim) |
| 288 | - pos = index(line(1:line_len), trim(pattern)) | 293 | + pos = index(line(1:line_len), pattern(1:pat_len)) |
| 289 | | 294 | |
| 290 | if (pos == 0) return | 295 | if (pos == 0) return |
| 291 | | 296 | |
@@ -552,7 +557,7 @@ contains |
| 552 | integer, intent(out) :: match_starts(:), match_ends(:) | 557 | integer, intent(out) :: match_starts(:), match_ends(:) |
| 553 | integer, intent(out) :: num_matches | 558 | integer, intent(out) :: num_matches |
| 554 | | 559 | |
| 555 | - integer :: i, pos, line_len | 560 | + integer :: i, pos, line_len, pat_len |
| 556 | type(match_result_t) :: res | 561 | type(match_result_t) :: res |
| 557 | type(pcre_match_result_t) :: pcre_res | 562 | type(pcre_match_result_t) :: pcre_res |
| 558 | character(len=:), allocatable :: search_line | 563 | character(len=:), allocatable :: search_line |
@@ -571,15 +576,19 @@ contains |
| 571 | end if | 576 | end if |
| 572 | | 577 | |
| 573 | do i = 1, size(patterns) | 578 | do i = 1, size(patterns) |
| | 579 | + ! Get pattern length (preserving whitespace patterns) |
| | 580 | + pat_len = pattern_len(patterns(i)) |
| | 581 | + if (pat_len == 0) cycle |
| | 582 | + |
| 574 | if (opts%ignore_case) then | 583 | if (opts%ignore_case) then |
| 575 | - search_pattern = to_lower(patterns(i)) | 584 | + search_pattern = to_lower(patterns(i)(1:pat_len)) |
| 576 | else | 585 | else |
| 577 | - search_pattern = patterns(i) | 586 | + search_pattern = patterns(i)(1:pat_len) |
| 578 | end if | 587 | end if |
| 579 | | 588 | |
| 580 | pos = 1 | 589 | pos = 1 |
| 581 | do while (pos <= line_len) | 590 | do while (pos <= line_len) |
| 582 | - pos = index(search_line(pos:line_len), trim(search_pattern)) | 591 | + pos = index(search_line(pos:line_len), search_pattern(1:pat_len)) |
| 583 | if (pos == 0) exit | 592 | if (pos == 0) exit |
| 584 | | 593 | |
| 585 | ! Adjust for substring offset | 594 | ! Adjust for substring offset |
@@ -590,11 +599,11 @@ contains |
| 590 | if (num_matches < size(match_starts)) then | 599 | if (num_matches < size(match_starts)) then |
| 591 | num_matches = num_matches + 1 | 600 | num_matches = num_matches + 1 |
| 592 | match_starts(num_matches) = pos | 601 | match_starts(num_matches) = pos |
| 593 | - match_ends(num_matches) = pos + len_trim(search_pattern) - 1 | 602 | + match_ends(num_matches) = pos + pat_len - 1 |
| 594 | end if | 603 | end if |
| 595 | | 604 | |
| 596 | ! Move past this match | 605 | ! Move past this match |
| 597 | - pos = pos + len_trim(search_pattern) | 606 | + pos = pos + pat_len |
| 598 | end do | 607 | end do |
| 599 | end do | 608 | end do |
| 600 | return | 609 | return |