Fortran · 5138 bytes Raw Blame History
1 module regex_api
2 !> Public API for the FERP regex engine
3 !> Provides high-level interface for pattern compilation and matching
4 use regex_types
5 use regex_lexer
6 use regex_parser
7 use regex_nfa
8 use regex_engine
9 use regex_optimizer
10 use ferp_kinds, only: pattern_len
11 implicit none
12 private
13
14 public :: regex_t
15 public :: regex_compile, regex_match, regex_search
16 public :: regex_free, regex_error_message
17 public :: match_result_t
18
19 !> Compiled regex type
20 type :: regex_t
21 private
22 type(nfa_t) :: nfa
23 type(optimized_nfa_t) :: opt_nfa ! Optimized NFA for faster matching
24 type(ast_pool_t) :: ast_pool
25 logical :: compiled = .false.
26 logical :: is_ere = .false.
27 integer :: error_code = 0
28 character(len=256) :: error_msg = ''
29 integer :: num_groups = 0
30 character(len=4096) :: pattern = '' ! Original pattern for AC detection
31 contains
32 procedure :: is_compiled => regex_is_compiled
33 end type regex_t
34
35 contains
36
37 subroutine regex_compile(re, pattern, is_ere, ierr)
38 !> Compile a regex pattern
39 type(regex_t), intent(out) :: re
40 character(len=*), intent(in) :: pattern
41 logical, intent(in), optional :: is_ere
42 integer, intent(out) :: ierr
43
44 type(token_list_t) :: tokens
45 integer :: root_idx
46 logical :: extended
47
48 ierr = 0
49 re%compiled = .false.
50 re%error_code = 0
51 re%error_msg = ''
52
53 extended = .false.
54 if (present(is_ere)) extended = is_ere
55 re%is_ere = extended
56
57 ! Handle empty pattern (use pattern_len to preserve whitespace patterns)
58 if (pattern_len(pattern) == 0) then
59 call re%nfa%init()
60 re%nfa%start_state = re%nfa%add_state()
61 re%nfa%accept_state = re%nfa%add_state()
62 re%nfa%states(re%nfa%accept_state)%is_accept = .true.
63 ! Add epsilon transition for empty match
64 call add_eps(re%nfa, re%nfa%start_state, re%nfa%accept_state)
65 ! Optimize NFA for faster matching
66 call optimize_nfa(re%opt_nfa, re%nfa)
67 re%compiled = .true.
68 re%num_groups = 0
69 return
70 end if
71
72 ! Tokenize
73 call tokenize(pattern, tokens, extended, ierr)
74 if (ierr /= 0) then
75 re%error_code = 1
76 re%error_msg = 'Invalid pattern: tokenization failed'
77 return
78 end if
79
80 ! Parse
81 call parse(tokens, re%ast_pool, root_idx, re%num_groups, ierr)
82 if (ierr /= 0) then
83 re%error_code = 2
84 re%error_msg = 'Invalid pattern: parse failed'
85 return
86 end if
87
88 ! Build NFA
89 call build_nfa(re%ast_pool, root_idx, re%nfa, ierr)
90 if (ierr /= 0) then
91 re%error_code = 3
92 re%error_msg = 'Invalid pattern: NFA construction failed'
93 return
94 end if
95
96 ! Optimize NFA for faster matching
97 call optimize_nfa(re%opt_nfa, re%nfa)
98
99 ! Store pattern and try Aho-Corasick for alternation patterns
100 re%pattern = pattern
101 call try_build_aho_corasick(re%opt_nfa, pattern, extended, .false.)
102
103 re%compiled = .true.
104
105 contains
106 subroutine add_eps(nfa, from, to)
107 type(nfa_t), intent(inout) :: nfa
108 integer, intent(in) :: from, to
109 type(nfa_transition_t) :: trans
110 trans%trans_type = TRANS_EPSILON
111 trans%target = to
112 call nfa%states(from)%add_trans(trans)
113 end subroutine
114 end subroutine regex_compile
115
116 function regex_match(re, text, ignore_case) result(matched)
117 !> Check if pattern matches anywhere in text
118 type(regex_t), intent(inout) :: re ! inout for DFA cache
119 character(len=*), intent(in) :: text
120 logical, intent(in), optional :: ignore_case
121 logical :: matched
122
123 type(match_result_t) :: res
124 logical :: icase
125
126 matched = .false.
127 if (.not. re%compiled) return
128
129 icase = .false.
130 if (present(ignore_case)) icase = ignore_case
131
132 ! Use optimized search with bit vectors and prefix skip
133 res = optimized_search(re%opt_nfa, text, icase)
134 matched = res%matched
135
136 end function regex_match
137
138 function regex_search(re, text, ignore_case) result(res)
139 !> Search for pattern in text, return match result with positions
140 type(regex_t), intent(inout) :: re ! inout for DFA cache
141 character(len=*), intent(in) :: text
142 logical, intent(in), optional :: ignore_case
143 type(match_result_t) :: res
144
145 logical :: icase
146
147 res%matched = .false.
148 if (.not. re%compiled) return
149
150 icase = .false.
151 if (present(ignore_case)) icase = ignore_case
152
153 ! Use optimized search with bit vectors and prefix skip
154 res = optimized_search(re%opt_nfa, text, icase)
155
156 end function regex_search
157
158 subroutine regex_free(re)
159 !> Free resources associated with compiled regex
160 type(regex_t), intent(inout) :: re
161
162 call re%nfa%cleanup()
163 call re%ast_pool%cleanup()
164 re%compiled = .false.
165
166 end subroutine regex_free
167
168 function regex_error_message(re) result(msg)
169 !> Get error message from failed compilation
170 type(regex_t), intent(in) :: re
171 character(len=256) :: msg
172 msg = re%error_msg
173 end function regex_error_message
174
175 function regex_is_compiled(this) result(res)
176 class(regex_t), intent(in) :: this
177 logical :: res
178 res = this%compiled
179 end function regex_is_compiled
180
181 end module regex_api
182