Fortran · 14095 bytes Raw Blame History
1 module pcre_api
2 !> PCRE2 library bindings for Perl-compatible regular expressions
3 !> Uses iso_c_binding for C interoperability with libpcre2-8
4 use, intrinsic :: iso_c_binding
5 use ferp_kinds, only: pattern_len
6 implicit none
7 private
8
9 public :: pcre_t, pcre_match_result_t
10 public :: pcre_compile, pcre_match, pcre_search
11 public :: pcre_free, pcre_error_message
12 public :: pcre_available
13
14 !---------------------------------------------------------------------------
15 ! PCRE2 Option Constants
16 !---------------------------------------------------------------------------
17 integer(c_int), parameter, public :: PCRE2_CASELESS = int(z'00000008', c_int)
18 integer(c_int), parameter, public :: PCRE2_MULTILINE = int(z'00000400', c_int)
19 integer(c_int), parameter, public :: PCRE2_DOTALL = int(z'00000020', c_int)
20 integer(c_int), parameter, public :: PCRE2_EXTENDED = int(z'00000080', c_int)
21 integer(c_int), parameter, public :: PCRE2_UTF = int(z'00080000', c_int)
22 integer(c_int), parameter, public :: PCRE2_UCP = int(z'00020000', c_int) ! Unicode properties
23 integer(c_int), parameter, public :: PCRE2_NO_UTF_CHECK = int(z'40000000', c_int)
24
25 !---------------------------------------------------------------------------
26 ! C Interface to PCRE2 Library (8-bit)
27 !---------------------------------------------------------------------------
28 interface
29 ! pcre2_compile_8 - Compile a regular expression pattern
30 function pcre2_compile_8(pattern, length, options, errorcode, erroroffset, ccontext) &
31 bind(C, name="pcre2_compile_8")
32 import :: c_ptr, c_char, c_size_t, c_int
33 character(kind=c_char), intent(in) :: pattern(*)
34 integer(c_size_t), value :: length
35 integer(c_int), value :: options
36 integer(c_int), intent(out) :: errorcode
37 integer(c_size_t), intent(out) :: erroroffset
38 type(c_ptr), value :: ccontext
39 type(c_ptr) :: pcre2_compile_8
40 end function pcre2_compile_8
41
42 ! pcre2_match_data_create_from_pattern_8 - Create match data block
43 function pcre2_match_data_create_from_pattern_8(code, gcontext) &
44 bind(C, name="pcre2_match_data_create_from_pattern_8")
45 import :: c_ptr
46 type(c_ptr), value :: code
47 type(c_ptr), value :: gcontext
48 type(c_ptr) :: pcre2_match_data_create_from_pattern_8
49 end function pcre2_match_data_create_from_pattern_8
50
51 ! pcre2_match_8 - Match a compiled pattern against a subject string
52 function pcre2_match_8(code, subject, length, startoffset, options, &
53 match_data, mcontext) bind(C, name="pcre2_match_8")
54 import :: c_ptr, c_char, c_size_t, c_int
55 type(c_ptr), value :: code
56 character(kind=c_char), intent(in) :: subject(*)
57 integer(c_size_t), value :: length
58 integer(c_size_t), value :: startoffset
59 integer(c_int), value :: options
60 type(c_ptr), value :: match_data
61 type(c_ptr), value :: mcontext
62 integer(c_int) :: pcre2_match_8
63 end function pcre2_match_8
64
65 ! pcre2_get_ovector_pointer_8 - Get pointer to output vector
66 function pcre2_get_ovector_pointer_8(match_data) &
67 bind(C, name="pcre2_get_ovector_pointer_8")
68 import :: c_ptr
69 type(c_ptr), value :: match_data
70 type(c_ptr) :: pcre2_get_ovector_pointer_8
71 end function pcre2_get_ovector_pointer_8
72
73 ! pcre2_get_ovector_count_8 - Get number of pairs in output vector
74 function pcre2_get_ovector_count_8(match_data) &
75 bind(C, name="pcre2_get_ovector_count_8")
76 import :: c_ptr, c_int
77 type(c_ptr), value :: match_data
78 integer(c_int) :: pcre2_get_ovector_count_8
79 end function pcre2_get_ovector_count_8
80
81 ! pcre2_code_free_8 - Free a compiled pattern
82 subroutine pcre2_code_free_8(code) bind(C, name="pcre2_code_free_8")
83 import :: c_ptr
84 type(c_ptr), value :: code
85 end subroutine pcre2_code_free_8
86
87 ! pcre2_match_data_free_8 - Free match data block
88 subroutine pcre2_match_data_free_8(match_data) &
89 bind(C, name="pcre2_match_data_free_8")
90 import :: c_ptr
91 type(c_ptr), value :: match_data
92 end subroutine pcre2_match_data_free_8
93
94 ! pcre2_get_error_message_8 - Get error message for error code
95 function pcre2_get_error_message_8(errorcode, buffer, bufflen) &
96 bind(C, name="pcre2_get_error_message_8")
97 import :: c_ptr, c_char, c_size_t, c_int
98 integer(c_int), value :: errorcode
99 character(kind=c_char), intent(out) :: buffer(*)
100 integer(c_size_t), value :: bufflen
101 integer(c_int) :: pcre2_get_error_message_8
102 end function pcre2_get_error_message_8
103 end interface
104
105 !---------------------------------------------------------------------------
106 ! Compiled PCRE Pattern Type
107 !---------------------------------------------------------------------------
108 type :: pcre_t
109 private
110 type(c_ptr) :: code = c_null_ptr ! Compiled pattern
111 type(c_ptr) :: match_data = c_null_ptr ! Match data block
112 logical :: compiled = .false.
113 integer :: error_code = 0
114 character(len=256) :: error_msg = ''
115 contains
116 procedure :: is_compiled => pcre_is_compiled
117 end type pcre_t
118
119 !---------------------------------------------------------------------------
120 ! Match Result Type
121 !---------------------------------------------------------------------------
122 type :: pcre_match_result_t
123 logical :: matched = .false.
124 integer :: match_start = 0 ! 1-based start position
125 integer :: match_end = 0 ! 1-based end position
126 integer :: group_starts(20) = 0
127 integer :: group_ends(20) = 0
128 integer :: num_groups = 0
129 end type pcre_match_result_t
130
131 ! Module state
132 logical, save :: pcre_checked = .false.
133 logical, save :: pcre_is_available = .false.
134
135 contains
136
137 !---------------------------------------------------------------------------
138 ! Check if PCRE2 library is available
139 !---------------------------------------------------------------------------
140 function pcre_available() result(available)
141 logical :: available
142
143 if (.not. pcre_checked) then
144 ! Try to compile a simple pattern to check availability
145 ! If the library isn't linked, this will cause a runtime error
146 ! For now, assume available if we got this far (library linked)
147 pcre_is_available = .true.
148 pcre_checked = .true.
149 end if
150
151 available = pcre_is_available
152 end function pcre_available
153
154 !---------------------------------------------------------------------------
155 ! Compile a PCRE pattern
156 !---------------------------------------------------------------------------
157 subroutine pcre_compile(re, pattern, ignore_case, ierr)
158 type(pcre_t), intent(out) :: re
159 character(len=*), intent(in) :: pattern
160 logical, intent(in), optional :: ignore_case
161 integer, intent(out) :: ierr
162
163 integer :: plen
164 character(len=:), allocatable :: c_pattern
165 integer(c_int) :: options, errorcode
166 integer(c_size_t) :: erroroffset, pcre_pattern_len
167
168 ierr = 0
169 re%compiled = .false.
170 re%error_code = 0
171 re%error_msg = ''
172
173 ! Get actual pattern length (preserving whitespace patterns)
174 plen = pattern_len(pattern)
175
176 ! Set options - enable UTF-8 and Unicode properties by default
177 options = ior(PCRE2_UTF, PCRE2_UCP)
178 if (present(ignore_case)) then
179 if (ignore_case) options = ior(options, PCRE2_CASELESS)
180 end if
181
182 ! Prepare pattern as C string (use exact length, not trim)
183 allocate(character(len=plen+1) :: c_pattern)
184 c_pattern = pattern(1:plen) // c_null_char
185 pcre_pattern_len = int(plen, c_size_t)
186
187 ! Compile pattern
188 re%code = pcre2_compile_8(c_pattern, pcre_pattern_len, options, &
189 errorcode, erroroffset, c_null_ptr)
190
191 if (.not. c_associated(re%code)) then
192 re%error_code = int(errorcode)
193 call get_pcre_error(errorcode, re%error_msg)
194 ierr = 1
195 return
196 end if
197
198 ! Create match data block
199 re%match_data = pcre2_match_data_create_from_pattern_8(re%code, c_null_ptr)
200 if (.not. c_associated(re%match_data)) then
201 call pcre2_code_free_8(re%code)
202 re%code = c_null_ptr
203 re%error_msg = 'Failed to create match data'
204 ierr = 2
205 return
206 end if
207
208 re%compiled = .true.
209
210 end subroutine pcre_compile
211
212 !---------------------------------------------------------------------------
213 ! Match pattern against text (returns true if matches anywhere)
214 !---------------------------------------------------------------------------
215 function pcre_match(re, text, ignore_case) result(matched)
216 type(pcre_t), intent(in) :: re
217 character(len=*), intent(in) :: text
218 logical, intent(in), optional :: ignore_case
219 logical :: matched
220
221 type(pcre_match_result_t) :: res
222 logical :: icase
223
224 matched = .false.
225 if (.not. re%compiled) return
226
227 icase = .false.
228 if (present(ignore_case)) icase = ignore_case
229
230 res = pcre_search(re, text, icase)
231 matched = res%matched
232
233 end function pcre_match
234
235 !---------------------------------------------------------------------------
236 ! Search for pattern in text, return match result with positions
237 !---------------------------------------------------------------------------
238 function pcre_search(re, text, ignore_case, start_offset) result(res)
239 type(pcre_t), intent(in) :: re
240 character(len=*), intent(in) :: text
241 logical, intent(in), optional :: ignore_case
242 integer, intent(in), optional :: start_offset
243 type(pcre_match_result_t) :: res
244
245 character(len=len(text)+1, kind=c_char) :: c_text
246 integer(c_int) :: rc, options
247 integer(c_size_t) :: text_len, startoffset
248 integer(c_size_t) :: ovector_count
249 type(c_ptr) :: ovector_ptr
250 integer(c_size_t), pointer :: ovector(:)
251 integer :: i
252
253 res%matched = .false.
254 res%match_start = 0
255 res%match_end = 0
256 res%num_groups = 0
257
258 if (.not. re%compiled) return
259
260 ! Prepare text as C string (without null terminator for length)
261 c_text = text // c_null_char
262 text_len = int(len(text), c_size_t)
263
264 ! Set start offset
265 startoffset = 0_c_size_t
266 if (present(start_offset)) then
267 if (start_offset > 0) startoffset = int(start_offset - 1, c_size_t)
268 end if
269
270 ! Match options (ignore_case was set at compile time)
271 options = 0_c_int
272
273 ! Execute match
274 rc = pcre2_match_8(re%code, c_text, text_len, startoffset, options, &
275 re%match_data, c_null_ptr)
276
277 if (rc < 0) then
278 ! No match or error
279 return
280 end if
281
282 res%matched = .true.
283
284 ! Get output vector with match positions
285 ovector_ptr = pcre2_get_ovector_pointer_8(re%match_data)
286 if (.not. c_associated(ovector_ptr)) return
287
288 ovector_count = int(pcre2_get_ovector_count_8(re%match_data), c_size_t)
289
290 ! Map to Fortran array - ovector has pairs of (start, end) positions
291 ! PCRE2 uses byte offsets (0-based), we need 1-based character positions
292 call c_f_pointer(ovector_ptr, ovector, [ovector_count * 2])
293
294 ! Overall match is in ovector(1) and ovector(2)
295 res%match_start = int(ovector(1)) + 1 ! Convert 0-based to 1-based
296 res%match_end = int(ovector(2)) ! End is exclusive in PCRE2, so this is correct
297
298 ! Capture groups start at index 3 (pairs 2+)
299 res%num_groups = min(int(rc) - 1, 20)
300 do i = 1, res%num_groups
301 if ((i * 2 + 1) <= int(ovector_count * 2)) then
302 res%group_starts(i) = int(ovector(i * 2 + 1)) + 1
303 res%group_ends(i) = int(ovector(i * 2 + 2))
304 end if
305 end do
306
307 end function pcre_search
308
309 !---------------------------------------------------------------------------
310 ! Free PCRE compiled pattern resources
311 !---------------------------------------------------------------------------
312 subroutine pcre_free(re)
313 type(pcre_t), intent(inout) :: re
314
315 if (c_associated(re%match_data)) then
316 call pcre2_match_data_free_8(re%match_data)
317 re%match_data = c_null_ptr
318 end if
319
320 if (c_associated(re%code)) then
321 call pcre2_code_free_8(re%code)
322 re%code = c_null_ptr
323 end if
324
325 re%compiled = .false.
326
327 end subroutine pcre_free
328
329 !---------------------------------------------------------------------------
330 ! Get error message for failed compilation
331 !---------------------------------------------------------------------------
332 function pcre_error_message(re) result(msg)
333 type(pcre_t), intent(in) :: re
334 character(len=256) :: msg
335 msg = re%error_msg
336 end function pcre_error_message
337
338 !---------------------------------------------------------------------------
339 ! Check if pattern is compiled
340 !---------------------------------------------------------------------------
341 function pcre_is_compiled(this) result(res)
342 class(pcre_t), intent(in) :: this
343 logical :: res
344 res = this%compiled
345 end function pcre_is_compiled
346
347 !---------------------------------------------------------------------------
348 ! Get PCRE error message from error code
349 !---------------------------------------------------------------------------
350 subroutine get_pcre_error(errorcode, msg)
351 integer(c_int), intent(in) :: errorcode
352 character(len=*), intent(out) :: msg
353
354 character(len=256, kind=c_char) :: c_buffer
355 integer(c_int) :: ret
356
357 msg = ''
358 ret = pcre2_get_error_message_8(errorcode, c_buffer, 256_c_size_t)
359
360 if (ret > 0) then
361 msg = c_to_fortran_string(c_buffer)
362 else
363 write(msg, '(A,I0)') 'PCRE error code: ', errorcode
364 end if
365
366 end subroutine get_pcre_error
367
368 !---------------------------------------------------------------------------
369 ! Convert C string to Fortran string
370 !---------------------------------------------------------------------------
371 function c_to_fortran_string(c_str) result(f_str)
372 character(len=*, kind=c_char), intent(in) :: c_str
373 character(len=len(c_str)) :: f_str
374 integer :: i
375
376 f_str = ''
377 do i = 1, len(c_str)
378 if (c_str(i:i) == c_null_char) exit
379 f_str(i:i) = c_str(i:i)
380 end do
381 end function c_to_fortran_string
382
383 end module pcre_api
384