fortrangoingonforty/ferp / 3259a58

Browse files

Fix alternation pattern parsing with null terminators

Add null terminators when storing parsed alternatives in the
Aho-Corasick optimization path. Without this, alternatives like
"hello" stored in fixed-length arrays would have incorrect lengths.
Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
3259a5837fd0dcec5afffe34e3d7ff9281e927ea
Parents
6a04d1a
Tree
8785b2c

1 changed file

StatusFile+-
M src/regex/regex_optimizer.f90 17 4
src/regex/regex_optimizer.f90modified
@@ -10,6 +10,7 @@ module regex_optimizer
1010
   use regex_types
1111
   use regex_charclass
1212
   use aho_corasick
13
+  use ferp_kinds, only: pattern_len
1314
   implicit none
1415
   private
1516
 
@@ -1595,7 +1596,7 @@ contains
15951596
 
15961597
     is_simple = .true.
15971598
     num_alt = 0
1598
-    pat_len = len_trim(pattern)
1599
+    pat_len = pattern_len(pattern)  ! Use pattern_len to preserve whitespace patterns
15991600
 
16001601
     if (pat_len == 0) then
16011602
       is_simple = .false.
@@ -1622,10 +1623,14 @@ contains
16221623
               return
16231624
             end if
16241625
             alternatives(num_alt) = pattern(alt_start:alt_start+alt_len-1)
1626
+            ! Add null terminator to preserve exact length
1627
+            if (alt_len < len(alternatives(num_alt))) then
1628
+              alternatives(num_alt)(alt_len+1:alt_len+1) = char(0)
1629
+            end if
16251630
           else
16261631
             ! Empty alternative - still valid
16271632
             num_alt = num_alt + 1
1628
-            alternatives(num_alt) = ''
1633
+            alternatives(num_alt) = char(0)
16291634
           end if
16301635
           alt_start = i + 1
16311636
           alt_len = 0
@@ -1665,9 +1670,13 @@ contains
16651670
               return
16661671
             end if
16671672
             alternatives(num_alt) = pattern(alt_start:alt_start+alt_len-1)
1673
+            ! Add null terminator to preserve exact length
1674
+            if (alt_len < len(alternatives(num_alt))) then
1675
+              alternatives(num_alt)(alt_len+1:alt_len+1) = char(0)
1676
+            end if
16681677
           else
16691678
             num_alt = num_alt + 1
1670
-            alternatives(num_alt) = ''
1679
+            alternatives(num_alt) = char(0)
16711680
           end if
16721681
           alt_start = i + 1
16731682
           alt_len = 0
@@ -1704,8 +1713,12 @@ contains
17041713
       end if
17051714
       if (alt_len > 0) then
17061715
         alternatives(num_alt) = pattern(alt_start:alt_start+alt_len-1)
1716
+        ! Add null terminator to preserve exact length
1717
+        if (alt_len < len(alternatives(num_alt))) then
1718
+          alternatives(num_alt)(alt_len+1:alt_len+1) = char(0)
1719
+        end if
17071720
       else
1708
-        alternatives(num_alt) = ''
1721
+        alternatives(num_alt) = char(0)
17091722
       end if
17101723
     end if
17111724