@@ -10,6 +10,7 @@ module regex_optimizer |
| 10 | 10 | use regex_types |
| 11 | 11 | use regex_charclass |
| 12 | 12 | use aho_corasick |
| 13 | + use ferp_kinds, only: pattern_len |
| 13 | 14 | implicit none |
| 14 | 15 | private |
| 15 | 16 | |
@@ -1595,7 +1596,7 @@ contains |
| 1595 | 1596 | |
| 1596 | 1597 | is_simple = .true. |
| 1597 | 1598 | num_alt = 0 |
| 1598 | | - pat_len = len_trim(pattern) |
| 1599 | + pat_len = pattern_len(pattern) ! Use pattern_len to preserve whitespace patterns |
| 1599 | 1600 | |
| 1600 | 1601 | if (pat_len == 0) then |
| 1601 | 1602 | is_simple = .false. |
@@ -1622,10 +1623,14 @@ contains |
| 1622 | 1623 | return |
| 1623 | 1624 | end if |
| 1624 | 1625 | alternatives(num_alt) = pattern(alt_start:alt_start+alt_len-1) |
| 1626 | + ! Add null terminator to preserve exact length |
| 1627 | + if (alt_len < len(alternatives(num_alt))) then |
| 1628 | + alternatives(num_alt)(alt_len+1:alt_len+1) = char(0) |
| 1629 | + end if |
| 1625 | 1630 | else |
| 1626 | 1631 | ! Empty alternative - still valid |
| 1627 | 1632 | num_alt = num_alt + 1 |
| 1628 | | - alternatives(num_alt) = '' |
| 1633 | + alternatives(num_alt) = char(0) |
| 1629 | 1634 | end if |
| 1630 | 1635 | alt_start = i + 1 |
| 1631 | 1636 | alt_len = 0 |
@@ -1665,9 +1670,13 @@ contains |
| 1665 | 1670 | return |
| 1666 | 1671 | end if |
| 1667 | 1672 | alternatives(num_alt) = pattern(alt_start:alt_start+alt_len-1) |
| 1673 | + ! Add null terminator to preserve exact length |
| 1674 | + if (alt_len < len(alternatives(num_alt))) then |
| 1675 | + alternatives(num_alt)(alt_len+1:alt_len+1) = char(0) |
| 1676 | + end if |
| 1668 | 1677 | else |
| 1669 | 1678 | num_alt = num_alt + 1 |
| 1670 | | - alternatives(num_alt) = '' |
| 1679 | + alternatives(num_alt) = char(0) |
| 1671 | 1680 | end if |
| 1672 | 1681 | alt_start = i + 1 |
| 1673 | 1682 | alt_len = 0 |
@@ -1704,8 +1713,12 @@ contains |
| 1704 | 1713 | end if |
| 1705 | 1714 | if (alt_len > 0) then |
| 1706 | 1715 | alternatives(num_alt) = pattern(alt_start:alt_start+alt_len-1) |
| 1716 | + ! Add null terminator to preserve exact length |
| 1717 | + if (alt_len < len(alternatives(num_alt))) then |
| 1718 | + alternatives(num_alt)(alt_len+1:alt_len+1) = char(0) |
| 1719 | + end if |
| 1707 | 1720 | else |
| 1708 | | - alternatives(num_alt) = '' |
| 1721 | + alternatives(num_alt) = char(0) |
| 1709 | 1722 | end if |
| 1710 | 1723 | end if |
| 1711 | 1724 | |