fortrangoingonforty/ferp / 6a04d1a

Browse files

Update matcher to use pattern_len for -w/-x options

Use explicit pattern lengths when applying word boundary (-w) and
line regexp (-x) transformations. This prevents null terminators
from being included in the transformed patterns.
Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
6a04d1af1422fb10330a8f4598185ffa84af34cb
Parents
5263050
Tree
d4260e0

1 changed file

StatusFile+-
M src/ferp_matcher.f90 37 28
src/ferp_matcher.f90modified
@@ -1,7 +1,7 @@
11
 module ferp_matcher
22
   !> Pattern matching orchestration for FERP
33
   !> Thread-safe: no SAVE variables, all buffers are dynamically allocated
4
-  use ferp_kinds
4
+  use ferp_kinds, only: i64, max_pattern_len, pattern_len
55
   use ferp_options
66
   use ferp_io
77
   use ferp_output
@@ -43,7 +43,7 @@ contains
4343
     type(compiled_patterns_t), intent(out) :: compiled
4444
     integer, intent(out) :: ierr
4545
 
46
-    integer :: i, n
46
+    integer :: i, n, plen
4747
     logical :: is_ere
4848
     character(len=max_pattern_len) :: pattern
4949
 
@@ -58,12 +58,12 @@ contains
5858
       allocate(compiled%bm_pats(n))
5959
 
6060
       do i = 1, n
61
-        pattern = patterns(i)
61
+        plen = pattern_len(patterns(i))
6262
         ! For case-insensitive, convert pattern to lowercase
6363
         if (opts%ignore_case) then
64
-          call bm_compile(compiled%bm_pats(i), trim(pattern), .true.)
64
+          call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .true.)
6565
         else
66
-          call bm_compile(compiled%bm_pats(i), trim(pattern), .false.)
66
+          call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .false.)
6767
         end if
6868
       end do
6969
 
@@ -76,19 +76,21 @@ contains
7676
       allocate(compiled%pcres(n))
7777
 
7878
       do i = 1, n
79
-        pattern = patterns(i)
79
+        plen = pattern_len(patterns(i))
8080
 
8181
         ! Apply -w (word) transformation using PCRE word boundaries
8282
         if (opts%word_regexp) then
83
-          pattern = '\b' // trim(pattern) // '\b'
84
-        end if
85
-
83
+          pattern = '\b' // patterns(i)(1:plen) // '\b'
84
+          plen = plen + 4  ! \b and \b
8685
         ! Apply -x (line) transformation
87
-        if (opts%line_regexp) then
88
-          pattern = '^' // trim(pattern) // '$'
86
+        else if (opts%line_regexp) then
87
+          pattern = '^' // patterns(i)(1:plen) // '$'
88
+          plen = plen + 2  ! ^ and $
89
+        else
90
+          pattern = patterns(i)(1:plen)
8991
         end if
9092
 
91
-        call pcre_compile(compiled%pcres(i), trim(pattern), opts%ignore_case, ierr)
93
+        call pcre_compile(compiled%pcres(i), pattern(1:plen), opts%ignore_case, ierr)
9294
         if (ierr /= 0) then
9395
           compiled%compiled = .false.
9496
           return
@@ -105,19 +107,22 @@ contains
105107
     is_ere = (opts%pattern_type == PATTERN_ERE)
106108
 
107109
     do i = 1, n
108
-      pattern = patterns(i)
110
+      plen = pattern_len(patterns(i))
109111
 
110112
       ! Apply -w (word) transformation
111113
       if (opts%word_regexp .and. opts%pattern_type /= PATTERN_FIXED) then
112
-        pattern = '\<' // trim(pattern) // '\>'
113
-      end if
114
-
114
+        pattern = '\<' // patterns(i)(1:plen) // '\>'
115
+        plen = plen + 4  ! \< and \>
115116
       ! Apply -x (line) transformation
116
-      if (opts%line_regexp .and. opts%pattern_type /= PATTERN_FIXED) then
117
-        pattern = '^' // trim(pattern) // '$'
117
+      else if (opts%line_regexp .and. opts%pattern_type /= PATTERN_FIXED) then
118
+        pattern = '^' // patterns(i)(1:plen) // '$'
119
+        plen = plen + 2  ! ^ and $
120
+      else
121
+        pattern = patterns(i)(1:plen)
118122
       end if
119123
 
120
-      call regex_compile(compiled%regexes(i), trim(pattern), is_ere, ierr)
124
+      ! Compile with exact pattern length
125
+      call regex_compile(compiled%regexes(i), pattern(1:plen), is_ere, ierr)
121126
       if (ierr /= 0) then
122127
         compiled%compiled = .false.
123128
         return
@@ -276,7 +281,7 @@ contains
276281
 
277282
     matches = .false.
278283
     line_len = len_trim(line)
279
-    pat_len = len_trim(pattern)
284
+    pat_len = pattern_len(pattern)  ! Use pattern_len to preserve whitespace patterns
280285
 
281286
     if (pat_len == 0) then
282287
       ! Empty pattern matches everything
@@ -284,8 +289,8 @@ contains
284289
       return
285290
     end if
286291
 
287
-    ! Find pattern in line
288
-    pos = index(line(1:line_len), trim(pattern))
292
+    ! Find pattern in line (use exact length, not trim)
293
+    pos = index(line(1:line_len), pattern(1:pat_len))
289294
 
290295
     if (pos == 0) return
291296
 
@@ -552,7 +557,7 @@ contains
552557
     integer, intent(out) :: match_starts(:), match_ends(:)
553558
     integer, intent(out) :: num_matches
554559
 
555
-    integer :: i, pos, line_len
560
+    integer :: i, pos, line_len, pat_len
556561
     type(match_result_t) :: res
557562
     type(pcre_match_result_t) :: pcre_res
558563
     character(len=:), allocatable :: search_line
@@ -571,15 +576,19 @@ contains
571576
       end if
572577
 
573578
       do i = 1, size(patterns)
579
+        ! Get pattern length (preserving whitespace patterns)
580
+        pat_len = pattern_len(patterns(i))
581
+        if (pat_len == 0) cycle
582
+
574583
         if (opts%ignore_case) then
575
-          search_pattern = to_lower(patterns(i))
584
+          search_pattern = to_lower(patterns(i)(1:pat_len))
576585
         else
577
-          search_pattern = patterns(i)
586
+          search_pattern = patterns(i)(1:pat_len)
578587
         end if
579588
 
580589
         pos = 1
581590
         do while (pos <= line_len)
582
-          pos = index(search_line(pos:line_len), trim(search_pattern))
591
+          pos = index(search_line(pos:line_len), search_pattern(1:pat_len))
583592
           if (pos == 0) exit
584593
 
585594
           ! Adjust for substring offset
@@ -590,11 +599,11 @@ contains
590599
           if (num_matches < size(match_starts)) then
591600
             num_matches = num_matches + 1
592601
             match_starts(num_matches) = pos
593
-            match_ends(num_matches) = pos + len_trim(search_pattern) - 1
602
+            match_ends(num_matches) = pos + pat_len - 1
594603
           end if
595604
 
596605
           ! Move past this match
597
-          pos = pos + len_trim(search_pattern)
606
+          pos = pos + pat_len
598607
         end do
599608
       end do
600609
       return