@@ -1,7 +1,7 @@ |
| 1 | 1 | module ferp_matcher |
| 2 | 2 | !> Pattern matching orchestration for FERP |
| 3 | 3 | !> Thread-safe: no SAVE variables, all buffers are dynamically allocated |
| 4 | | - use ferp_kinds |
| 4 | + use ferp_kinds, only: i64, max_pattern_len, pattern_len |
| 5 | 5 | use ferp_options |
| 6 | 6 | use ferp_io |
| 7 | 7 | use ferp_output |
@@ -43,7 +43,7 @@ contains |
| 43 | 43 | type(compiled_patterns_t), intent(out) :: compiled |
| 44 | 44 | integer, intent(out) :: ierr |
| 45 | 45 | |
| 46 | | - integer :: i, n |
| 46 | + integer :: i, n, plen |
| 47 | 47 | logical :: is_ere |
| 48 | 48 | character(len=max_pattern_len) :: pattern |
| 49 | 49 | |
@@ -58,12 +58,12 @@ contains |
| 58 | 58 | allocate(compiled%bm_pats(n)) |
| 59 | 59 | |
| 60 | 60 | do i = 1, n |
| 61 | | - pattern = patterns(i) |
| 61 | + plen = pattern_len(patterns(i)) |
| 62 | 62 | ! For case-insensitive, convert pattern to lowercase |
| 63 | 63 | if (opts%ignore_case) then |
| 64 | | - call bm_compile(compiled%bm_pats(i), trim(pattern), .true.) |
| 64 | + call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .true.) |
| 65 | 65 | else |
| 66 | | - call bm_compile(compiled%bm_pats(i), trim(pattern), .false.) |
| 66 | + call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .false.) |
| 67 | 67 | end if |
| 68 | 68 | end do |
| 69 | 69 | |
@@ -76,19 +76,21 @@ contains |
| 76 | 76 | allocate(compiled%pcres(n)) |
| 77 | 77 | |
| 78 | 78 | do i = 1, n |
| 79 | | - pattern = patterns(i) |
| 79 | + plen = pattern_len(patterns(i)) |
| 80 | 80 | |
| 81 | 81 | ! Apply -w (word) transformation using PCRE word boundaries |
| 82 | 82 | if (opts%word_regexp) then |
| 83 | | - pattern = '\b' // trim(pattern) // '\b' |
| 84 | | - end if |
| 85 | | - |
| 83 | + pattern = '\b' // patterns(i)(1:plen) // '\b' |
| 84 | + plen = plen + 4 ! \b and \b |
| 86 | 85 | ! Apply -x (line) transformation |
| 87 | | - if (opts%line_regexp) then |
| 88 | | - pattern = '^' // trim(pattern) // '$' |
| 86 | + else if (opts%line_regexp) then |
| 87 | + pattern = '^' // patterns(i)(1:plen) // '$' |
| 88 | + plen = plen + 2 ! ^ and $ |
| 89 | + else |
| 90 | + pattern = patterns(i)(1:plen) |
| 89 | 91 | end if |
| 90 | 92 | |
| 91 | | - call pcre_compile(compiled%pcres(i), trim(pattern), opts%ignore_case, ierr) |
| 93 | + call pcre_compile(compiled%pcres(i), pattern(1:plen), opts%ignore_case, ierr) |
| 92 | 94 | if (ierr /= 0) then |
| 93 | 95 | compiled%compiled = .false. |
| 94 | 96 | return |
@@ -105,19 +107,22 @@ contains |
| 105 | 107 | is_ere = (opts%pattern_type == PATTERN_ERE) |
| 106 | 108 | |
| 107 | 109 | do i = 1, n |
| 108 | | - pattern = patterns(i) |
| 110 | + plen = pattern_len(patterns(i)) |
| 109 | 111 | |
| 110 | 112 | ! Apply -w (word) transformation |
| 111 | 113 | if (opts%word_regexp .and. opts%pattern_type /= PATTERN_FIXED) then |
| 112 | | - pattern = '\<' // trim(pattern) // '\>' |
| 113 | | - end if |
| 114 | | - |
| 114 | + pattern = '\<' // patterns(i)(1:plen) // '\>' |
| 115 | + plen = plen + 4 ! \< and \> |
| 115 | 116 | ! Apply -x (line) transformation |
| 116 | | - if (opts%line_regexp .and. opts%pattern_type /= PATTERN_FIXED) then |
| 117 | | - pattern = '^' // trim(pattern) // '$' |
| 117 | + else if (opts%line_regexp .and. opts%pattern_type /= PATTERN_FIXED) then |
| 118 | + pattern = '^' // patterns(i)(1:plen) // '$' |
| 119 | + plen = plen + 2 ! ^ and $ |
| 120 | + else |
| 121 | + pattern = patterns(i)(1:plen) |
| 118 | 122 | end if |
| 119 | 123 | |
| 120 | | - call regex_compile(compiled%regexes(i), trim(pattern), is_ere, ierr) |
| 124 | + ! Compile with exact pattern length |
| 125 | + call regex_compile(compiled%regexes(i), pattern(1:plen), is_ere, ierr) |
| 121 | 126 | if (ierr /= 0) then |
| 122 | 127 | compiled%compiled = .false. |
| 123 | 128 | return |
@@ -276,7 +281,7 @@ contains |
| 276 | 281 | |
| 277 | 282 | matches = .false. |
| 278 | 283 | line_len = len_trim(line) |
| 279 | | - pat_len = len_trim(pattern) |
| 284 | + pat_len = pattern_len(pattern) ! Use pattern_len to preserve whitespace patterns |
| 280 | 285 | |
| 281 | 286 | if (pat_len == 0) then |
| 282 | 287 | ! Empty pattern matches everything |
@@ -284,8 +289,8 @@ contains |
| 284 | 289 | return |
| 285 | 290 | end if |
| 286 | 291 | |
| 287 | | - ! Find pattern in line |
| 288 | | - pos = index(line(1:line_len), trim(pattern)) |
| 292 | + ! Find pattern in line (use exact length, not trim) |
| 293 | + pos = index(line(1:line_len), pattern(1:pat_len)) |
| 289 | 294 | |
| 290 | 295 | if (pos == 0) return |
| 291 | 296 | |
@@ -552,7 +557,7 @@ contains |
| 552 | 557 | integer, intent(out) :: match_starts(:), match_ends(:) |
| 553 | 558 | integer, intent(out) :: num_matches |
| 554 | 559 | |
| 555 | | - integer :: i, pos, line_len |
| 560 | + integer :: i, pos, line_len, pat_len |
| 556 | 561 | type(match_result_t) :: res |
| 557 | 562 | type(pcre_match_result_t) :: pcre_res |
| 558 | 563 | character(len=:), allocatable :: search_line |
@@ -571,15 +576,19 @@ contains |
| 571 | 576 | end if |
| 572 | 577 | |
| 573 | 578 | do i = 1, size(patterns) |
| 579 | + ! Get pattern length (preserving whitespace patterns) |
| 580 | + pat_len = pattern_len(patterns(i)) |
| 581 | + if (pat_len == 0) cycle |
| 582 | + |
| 574 | 583 | if (opts%ignore_case) then |
| 575 | | - search_pattern = to_lower(patterns(i)) |
| 584 | + search_pattern = to_lower(patterns(i)(1:pat_len)) |
| 576 | 585 | else |
| 577 | | - search_pattern = patterns(i) |
| 586 | + search_pattern = patterns(i)(1:pat_len) |
| 578 | 587 | end if |
| 579 | 588 | |
| 580 | 589 | pos = 1 |
| 581 | 590 | do while (pos <= line_len) |
| 582 | | - pos = index(search_line(pos:line_len), trim(search_pattern)) |
| 591 | + pos = index(search_line(pos:line_len), search_pattern(1:pat_len)) |
| 583 | 592 | if (pos == 0) exit |
| 584 | 593 | |
| 585 | 594 | ! Adjust for substring offset |
@@ -590,11 +599,11 @@ contains |
| 590 | 599 | if (num_matches < size(match_starts)) then |
| 591 | 600 | num_matches = num_matches + 1 |
| 592 | 601 | match_starts(num_matches) = pos |
| 593 | | - match_ends(num_matches) = pos + len_trim(search_pattern) - 1 |
| 602 | + match_ends(num_matches) = pos + pat_len - 1 |
| 594 | 603 | end if |
| 595 | 604 | |
| 596 | 605 | ! Move past this match |
| 597 | | - pos = pos + len_trim(search_pattern) |
| 606 | + pos = pos + pat_len |
| 598 | 607 | end do |
| 599 | 608 | end do |
| 600 | 609 | return |