fortrangoingonforty/ferp / 6a04d1a

Browse files

Update matcher to use pattern_len for -w/-x options

Use explicit pattern lengths when applying word boundary (-w) and
line regexp (-x) transformations. This prevents null terminators
from being included in the transformed patterns.
Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
6a04d1af1422fb10330a8f4598185ffa84af34cb
Parents
5263050
Tree
d4260e0

1 changed file

StatusFile+-
M src/ferp_matcher.f90 37 28
src/ferp_matcher.f90modified
@@ -1,7 +1,7 @@
1
 module ferp_matcher
1
 module ferp_matcher
2
   !> Pattern matching orchestration for FERP
2
   !> Pattern matching orchestration for FERP
3
   !> Thread-safe: no SAVE variables, all buffers are dynamically allocated
3
   !> Thread-safe: no SAVE variables, all buffers are dynamically allocated
4
-  use ferp_kinds
4
+  use ferp_kinds, only: i64, max_pattern_len, pattern_len
5
   use ferp_options
5
   use ferp_options
6
   use ferp_io
6
   use ferp_io
7
   use ferp_output
7
   use ferp_output
@@ -43,7 +43,7 @@ contains
43
     type(compiled_patterns_t), intent(out) :: compiled
43
     type(compiled_patterns_t), intent(out) :: compiled
44
     integer, intent(out) :: ierr
44
     integer, intent(out) :: ierr
45
 
45
 
46
-    integer :: i, n
46
+    integer :: i, n, plen
47
     logical :: is_ere
47
     logical :: is_ere
48
     character(len=max_pattern_len) :: pattern
48
     character(len=max_pattern_len) :: pattern
49
 
49
 
@@ -58,12 +58,12 @@ contains
58
       allocate(compiled%bm_pats(n))
58
       allocate(compiled%bm_pats(n))
59
 
59
 
60
       do i = 1, n
60
       do i = 1, n
61
-        pattern = patterns(i)
61
+        plen = pattern_len(patterns(i))
62
         ! For case-insensitive, convert pattern to lowercase
62
         ! For case-insensitive, convert pattern to lowercase
63
         if (opts%ignore_case) then
63
         if (opts%ignore_case) then
64
-          call bm_compile(compiled%bm_pats(i), trim(pattern), .true.)
64
+          call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .true.)
65
         else
65
         else
66
-          call bm_compile(compiled%bm_pats(i), trim(pattern), .false.)
66
+          call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .false.)
67
         end if
67
         end if
68
       end do
68
       end do
69
 
69
 
@@ -76,19 +76,21 @@ contains
76
       allocate(compiled%pcres(n))
76
       allocate(compiled%pcres(n))
77
 
77
 
78
       do i = 1, n
78
       do i = 1, n
79
-        pattern = patterns(i)
79
+        plen = pattern_len(patterns(i))
80
 
80
 
81
         ! Apply -w (word) transformation using PCRE word boundaries
81
         ! Apply -w (word) transformation using PCRE word boundaries
82
         if (opts%word_regexp) then
82
         if (opts%word_regexp) then
83
-          pattern = '\b' // trim(pattern) // '\b'
83
+          pattern = '\b' // patterns(i)(1:plen) // '\b'
84
-        end if
84
+          plen = plen + 4  ! \b and \b
85
-
86
         ! Apply -x (line) transformation
85
         ! Apply -x (line) transformation
87
-        if (opts%line_regexp) then
86
+        else if (opts%line_regexp) then
88
-          pattern = '^' // trim(pattern) // '$'
87
+          pattern = '^' // patterns(i)(1:plen) // '$'
88
+          plen = plen + 2  ! ^ and $
89
+        else
90
+          pattern = patterns(i)(1:plen)
89
         end if
91
         end if
90
 
92
 
91
-        call pcre_compile(compiled%pcres(i), trim(pattern), opts%ignore_case, ierr)
93
+        call pcre_compile(compiled%pcres(i), pattern(1:plen), opts%ignore_case, ierr)
92
         if (ierr /= 0) then
94
         if (ierr /= 0) then
93
           compiled%compiled = .false.
95
           compiled%compiled = .false.
94
           return
96
           return
@@ -105,19 +107,22 @@ contains
105
     is_ere = (opts%pattern_type == PATTERN_ERE)
107
     is_ere = (opts%pattern_type == PATTERN_ERE)
106
 
108
 
107
     do i = 1, n
109
     do i = 1, n
108
-      pattern = patterns(i)
110
+      plen = pattern_len(patterns(i))
109
 
111
 
110
       ! Apply -w (word) transformation
112
       ! Apply -w (word) transformation
111
       if (opts%word_regexp .and. opts%pattern_type /= PATTERN_FIXED) then
113
       if (opts%word_regexp .and. opts%pattern_type /= PATTERN_FIXED) then
112
-        pattern = '\<' // trim(pattern) // '\>'
114
+        pattern = '\<' // patterns(i)(1:plen) // '\>'
113
-      end if
115
+        plen = plen + 4  ! \< and \>
114
-
115
       ! Apply -x (line) transformation
116
       ! Apply -x (line) transformation
116
-      if (opts%line_regexp .and. opts%pattern_type /= PATTERN_FIXED) then
117
+      else if (opts%line_regexp .and. opts%pattern_type /= PATTERN_FIXED) then
117
-        pattern = '^' // trim(pattern) // '$'
118
+        pattern = '^' // patterns(i)(1:plen) // '$'
119
+        plen = plen + 2  ! ^ and $
120
+      else
121
+        pattern = patterns(i)(1:plen)
118
       end if
122
       end if
119
 
123
 
120
-      call regex_compile(compiled%regexes(i), trim(pattern), is_ere, ierr)
124
+      ! Compile with exact pattern length
125
+      call regex_compile(compiled%regexes(i), pattern(1:plen), is_ere, ierr)
121
       if (ierr /= 0) then
126
       if (ierr /= 0) then
122
         compiled%compiled = .false.
127
         compiled%compiled = .false.
123
         return
128
         return
@@ -276,7 +281,7 @@ contains
276
 
281
 
277
     matches = .false.
282
     matches = .false.
278
     line_len = len_trim(line)
283
     line_len = len_trim(line)
279
-    pat_len = len_trim(pattern)
284
+    pat_len = pattern_len(pattern)  ! Use pattern_len to preserve whitespace patterns
280
 
285
 
281
     if (pat_len == 0) then
286
     if (pat_len == 0) then
282
       ! Empty pattern matches everything
287
       ! Empty pattern matches everything
@@ -284,8 +289,8 @@ contains
284
       return
289
       return
285
     end if
290
     end if
286
 
291
 
287
-    ! Find pattern in line
292
+    ! Find pattern in line (use exact length, not trim)
288
-    pos = index(line(1:line_len), trim(pattern))
293
+    pos = index(line(1:line_len), pattern(1:pat_len))
289
 
294
 
290
     if (pos == 0) return
295
     if (pos == 0) return
291
 
296
 
@@ -552,7 +557,7 @@ contains
552
     integer, intent(out) :: match_starts(:), match_ends(:)
557
     integer, intent(out) :: match_starts(:), match_ends(:)
553
     integer, intent(out) :: num_matches
558
     integer, intent(out) :: num_matches
554
 
559
 
555
-    integer :: i, pos, line_len
560
+    integer :: i, pos, line_len, pat_len
556
     type(match_result_t) :: res
561
     type(match_result_t) :: res
557
     type(pcre_match_result_t) :: pcre_res
562
     type(pcre_match_result_t) :: pcre_res
558
     character(len=:), allocatable :: search_line
563
     character(len=:), allocatable :: search_line
@@ -571,15 +576,19 @@ contains
571
       end if
576
       end if
572
 
577
 
573
       do i = 1, size(patterns)
578
       do i = 1, size(patterns)
579
+        ! Get pattern length (preserving whitespace patterns)
580
+        pat_len = pattern_len(patterns(i))
581
+        if (pat_len == 0) cycle
582
+
574
         if (opts%ignore_case) then
583
         if (opts%ignore_case) then
575
-          search_pattern = to_lower(patterns(i))
584
+          search_pattern = to_lower(patterns(i)(1:pat_len))
576
         else
585
         else
577
-          search_pattern = patterns(i)
586
+          search_pattern = patterns(i)(1:pat_len)
578
         end if
587
         end if
579
 
588
 
580
         pos = 1
589
         pos = 1
581
         do while (pos <= line_len)
590
         do while (pos <= line_len)
582
-          pos = index(search_line(pos:line_len), trim(search_pattern))
591
+          pos = index(search_line(pos:line_len), search_pattern(1:pat_len))
583
           if (pos == 0) exit
592
           if (pos == 0) exit
584
 
593
 
585
           ! Adjust for substring offset
594
           ! Adjust for substring offset
@@ -590,11 +599,11 @@ contains
590
           if (num_matches < size(match_starts)) then
599
           if (num_matches < size(match_starts)) then
591
             num_matches = num_matches + 1
600
             num_matches = num_matches + 1
592
             match_starts(num_matches) = pos
601
             match_starts(num_matches) = pos
593
-            match_ends(num_matches) = pos + len_trim(search_pattern) - 1
602
+            match_ends(num_matches) = pos + pat_len - 1
594
           end if
603
           end if
595
 
604
 
596
           ! Move past this match
605
           ! Move past this match
597
-          pos = pos + len_trim(search_pattern)
606
+          pos = pos + pat_len
598
         end do
607
         end do
599
       end do
608
       end do
600
       return
609
       return