ferp Public

Watch 0 Fork 1 Star 0

Fortran · 12043 bytes Raw Blame History

  
        1
        module regex_engine
      
        2
          !> NFA simulation engine using Thompson's algorithm
      
        3
          !> Tracks set of current states, processes input character by character
      
        4
          use regex_types
      
        5
          implicit none
      
        6
          private
      
        7
        
        8
          public :: nfa_match, nfa_search
      
        9
        
        10
          integer, parameter :: MAX_STATES = 1024
      
        11
        
        12
        contains
      
        13
        
        14
          function nfa_match(nfa, text, start_pos, ignore_case) result(res)
      
        15
            !> Try to match NFA starting at start_pos in text
      
        16
            !> Returns match result with positions if successful
      
        17
            type(nfa_t), intent(in) :: nfa
      
        18
            character(len=*), intent(in) :: text
      
        19
            integer, intent(in) :: start_pos
      
        20
            logical, intent(in) :: ignore_case
      
        21
            type(match_result_t) :: res
      
        22
        
        23
            integer :: current(MAX_STATES), next_states(MAX_STATES)
      
        24
            integer :: num_current, num_next
      
        25
            integer :: i, pos, text_len
      
        26
            character(len=1) :: c
      
        27
        
        28
            res%matched = .false.
      
        29
            text_len = len_trim(text)
      
        30
        
        31
            if (nfa%num_states == 0) return
      
        32
        
        33
            ! Initialize with epsilon closure of start state
      
        34
            num_current = 0
      
        35
            call epsilon_closure(nfa, nfa%start_state, current, num_current, &
      
        36
                                 text, start_pos, text_len)
      
        37
        
        38
            ! Check if already accepting (empty pattern case)
      
        39
            if (is_accepting(nfa, current, num_current)) then
      
        40
              res%matched = .true.
      
        41
              res%match_start = start_pos
      
        42
              res%match_end = start_pos - 1  ! Empty match
      
        43
              return
      
        44
            end if
      
        45
        
        46
            ! Process each character
      
        47
            pos = start_pos
      
        48
            do while (pos <= text_len .and. num_current > 0)
      
        49
              c = text(pos:pos)
      
        50
        
        51
              ! Compute next states
      
        52
              num_next = 0
      
        53
              do i = 1, num_current
      
        54
                call step(nfa, current(i), c, pos, text, text_len, ignore_case, &
      
        55
                          next_states, num_next)
      
        56
              end do
      
        57
        
        58
              ! Compute epsilon closure of next states
      
        59
              num_current = 0
      
        60
              do i = 1, num_next
      
        61
                call epsilon_closure(nfa, next_states(i), current, num_current, &
      
        62
                                     text, pos + 1, text_len)
      
        63
              end do
      
        64
        
        65
              pos = pos + 1
      
        66
        
        67
              ! Check for acceptance
      
        68
              if (is_accepting(nfa, current, num_current)) then
      
        69
                res%matched = .true.
      
        70
                res%match_start = start_pos
      
        71
                res%match_end = pos - 1
      
        72
                ! Don't return yet - try to find longest match (greedy)
      
        73
              end if
      
        74
            end do
      
        75
        
        76
          end function nfa_match
      
        77
        
        78
          function nfa_search(nfa, text, ignore_case) result(res)
      
        79
            !> Search for pattern anywhere in text
      
        80
            type(nfa_t), intent(in) :: nfa
      
        81
            character(len=*), intent(in) :: text
      
        82
            logical, intent(in) :: ignore_case
      
        83
            type(match_result_t) :: res
      
        84
        
        85
            integer :: i, text_len
      
        86
            type(match_result_t) :: try_res
      
        87
        
        88
            res%matched = .false.
      
        89
            text_len = len_trim(text)
      
        90
        
        91
            ! Try matching at each position
      
        92
            do i = 1, text_len + 1  ! +1 to handle empty string match at end
      
        93
              try_res = nfa_match(nfa, text, i, ignore_case)
      
        94
              if (try_res%matched) then
      
        95
                res = try_res
      
        96
                return
      
        97
              end if
      
        98
            end do
      
        99
        
        100
          end function nfa_search
      
        101
        
        102
          subroutine epsilon_closure(nfa, state, states, num_states, text, pos, text_len)
      
        103
            !> Compute epsilon closure of a state, adding to states array
      
        104
            type(nfa_t), intent(in) :: nfa
      
        105
            integer, intent(in) :: state
      
        106
            integer, intent(inout) :: states(MAX_STATES)
      
        107
            integer, intent(inout) :: num_states
      
        108
            character(len=*), intent(in) :: text
      
        109
            integer, intent(in) :: pos, text_len
      
        110
        
        111
            integer :: stack(MAX_STATES), stack_top
      
        112
            integer :: current, i, target
      
        113
            type(nfa_transition_t) :: trans
      
        114
            logical :: visited(MAX_STATES)
      
        115
        
        116
            visited = .false.
      
        117
            stack_top = 1
      
        118
            stack(1) = state
      
        119
        
        120
            do while (stack_top > 0)
      
        121
              current = stack(stack_top)
      
        122
              stack_top = stack_top - 1
      
        123
        
        124
              if (current < 1 .or. current > nfa%num_states) cycle
      
        125
              if (visited(current)) cycle
      
        126
              visited(current) = .true.
      
        127
        
        128
              ! Add to result
      
        129
              if (num_states < MAX_STATES) then
      
        130
                num_states = num_states + 1
      
        131
                states(num_states) = current
      
        132
              end if
      
        133
        
        134
              ! Follow epsilon transitions and anchors
      
        135
              do i = 1, nfa%states(current)%num_trans
      
        136
                trans = nfa%states(current)%trans(i)
      
        137
        
        138
                if (trans%trans_type == TRANS_EPSILON) then
      
        139
                  ! Check for backref marker (negative anchor_type)
      
        140
                  if (trans%anchor_type < 0) then
      
        141
                    ! Backref - skip for now (would need captured text)
      
        142
                    cycle
      
        143
                  end if
      
        144
        
        145
                  target = trans%target
      
        146
                  if (target >= 1 .and. target <= nfa%num_states .and. .not. visited(target)) then
      
        147
                    stack_top = stack_top + 1
      
        148
                    stack(stack_top) = target
      
        149
                  end if
      
        150
        
        151
                else if (trans%trans_type == TRANS_ANCHOR) then
      
        152
                  ! Check if anchor matches
      
        153
                  if (anchor_matches(trans%anchor_type, text, pos, text_len)) then
      
        154
                    target = trans%target
      
        155
                    if (target >= 1 .and. target <= nfa%num_states .and. .not. visited(target)) then
      
        156
                      stack_top = stack_top + 1
      
        157
                      stack(stack_top) = target
      
        158
                    end if
      
        159
                  end if
      
        160
                end if
      
        161
              end do
      
        162
            end do
      
        163
        
        164
          end subroutine epsilon_closure
      
        165
        
        166
          subroutine step(nfa, state, c, pos, text, text_len, ignore_case, next_states, num_next)
      
        167
            !> Take one step from state on character c
      
        168
            type(nfa_t), intent(in) :: nfa
      
        169
            integer, intent(in) :: state
      
        170
            character(len=1), intent(in) :: c
      
        171
            integer, intent(in) :: pos, text_len
      
        172
            character(len=*), intent(in) :: text
      
        173
            logical, intent(in) :: ignore_case
      
        174
            integer, intent(inout) :: next_states(MAX_STATES)
      
        175
            integer, intent(inout) :: num_next
      
        176
        
        177
            integer :: i
      
        178
            type(nfa_transition_t) :: trans
      
        179
            character(len=1) :: c_lower, match_lower
      
        180
        
        181
            do i = 1, nfa%states(state)%num_trans
      
        182
              trans = nfa%states(state)%trans(i)
      
        183
        
        184
              select case (trans%trans_type)
      
        185
                case (TRANS_CHAR)
      
        186
                  if (ignore_case) then
      
        187
                    c_lower = to_lower_char(c)
      
        188
                    match_lower = to_lower_char(trans%match_char)
      
        189
                    if (c_lower == match_lower) then
      
        190
                      call add_if_unique(next_states, num_next, trans%target)
      
        191
                    end if
      
        192
                  else
      
        193
                    if (c == trans%match_char) then
      
        194
                      call add_if_unique(next_states, num_next, trans%target)
      
        195
                    end if
      
        196
                  end if
      
        197
        
        198
                case (TRANS_CLASS)
      
        199
                  if (char_in_class(c, trans%char_class, trans%negated, ignore_case)) then
      
        200
                    call add_if_unique(next_states, num_next, trans%target)
      
        201
                  end if
      
        202
        
        203
                case (TRANS_ANY)
      
        204
                  ! Match any character except newline
      
        205
                  if (c /= char(10)) then
      
        206
                    call add_if_unique(next_states, num_next, trans%target)
      
        207
                  end if
      
        208
        
        209
              end select
      
        210
            end do
      
        211
        
        212
          end subroutine step
      
        213
        
        214
          function is_accepting(nfa, states, num_states) result(res)
      
        215
            !> Check if any state in the set is an accepting state
      
        216
            type(nfa_t), intent(in) :: nfa
      
        217
            integer, intent(in) :: states(MAX_STATES)
      
        218
            integer, intent(in) :: num_states
      
        219
            logical :: res
      
        220
        
        221
            integer :: i
      
        222
        
        223
            res = .false.
      
        224
            do i = 1, num_states
      
        225
              if (states(i) >= 1 .and. states(i) <= nfa%num_states) then
      
        226
                if (nfa%states(states(i))%is_accept) then
      
        227
                  res = .true.
      
        228
                  return
      
        229
                end if
      
        230
              end if
      
        231
            end do
      
        232
        
        233
          end function is_accepting
      
        234
        
        235
          function anchor_matches(anchor_type, text, pos, text_len) result(matches)
      
        236
            !> Check if anchor matches at position
      
        237
            integer, intent(in) :: anchor_type
      
        238
            character(len=*), intent(in) :: text
      
        239
            integer, intent(in) :: pos, text_len
      
        240
            logical :: matches
      
        241
        
        242
            logical :: at_start, at_end, prev_word, curr_word
      
        243
        
        244
            matches = .false.
      
        245
        
        246
            at_start = (pos == 1) .or. (pos < 1)
      
        247
            at_end = (pos > text_len)
      
        248
        
        249
            select case (anchor_type)
      
        250
              case (1)  ! ^ start anchor
      
        251
                if (at_start) then
      
        252
                  matches = .true.
      
        253
                else if (pos > 1 .and. pos <= text_len + 1) then
      
        254
                  matches = (text(pos-1:pos-1) == char(10))
      
        255
                end if
      
        256
        
        257
              case (2)  ! $ end anchor
      
        258
                if (at_end) then
      
        259
                  matches = .true.
      
        260
                else if (pos >= 1 .and. pos <= text_len) then
      
        261
                  matches = (text(pos:pos) == char(10))
      
        262
                end if
      
        263
        
        264
              case (3)  ! \< word start
      
        265
                prev_word = .false.
      
        266
                curr_word = .false.
      
        267
                if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
      
        268
                if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
      
        269
                matches = (.not. prev_word) .and. curr_word
      
        270
        
        271
              case (4)  ! \> word end
      
        272
                prev_word = .false.
      
        273
                curr_word = .false.
      
        274
                if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
      
        275
                if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
      
        276
                matches = prev_word .and. (.not. curr_word)
      
        277
        
        278
              case (5)  ! \b word boundary
      
        279
                prev_word = .false.
      
        280
                curr_word = .false.
      
        281
                if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
      
        282
                if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
      
        283
                matches = prev_word .neqv. curr_word
      
        284
        
        285
              case (6)  ! \B not word boundary
      
        286
                prev_word = .false.
      
        287
                curr_word = .false.
      
        288
                if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
      
        289
                if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
      
        290
                matches = prev_word .eqv. curr_word
      
        291
            end select
      
        292
        
        293
          end function anchor_matches
      
        294
        
        295
          pure function is_word_char(c) result(res)
      
        296
            character(len=1), intent(in) :: c
      
        297
            logical :: res
      
        298
            integer :: ic
      
        299
        
        300
            ic = ichar(c)
      
        301
            res = (ic >= ichar('a') .and. ic <= ichar('z')) .or. &
      
        302
                  (ic >= ichar('A') .and. ic <= ichar('Z')) .or. &
      
        303
                  (ic >= ichar('0') .and. ic <= ichar('9')) .or. &
      
        304
                  (c == '_')
      
        305
          end function is_word_char
      
        306
        
        307
          pure function to_lower_char(c) result(lower)
      
        308
            character(len=1), intent(in) :: c
      
        309
            character(len=1) :: lower
      
        310
            integer :: ic
      
        311
        
        312
            ic = ichar(c)
      
        313
            if (ic >= ichar('A') .and. ic <= ichar('Z')) then
      
        314
              ! ASCII uppercase A-Z -> a-z
      
        315
              lower = char(ic + 32)
      
        316
            else if (ic >= 192 .and. ic <= 214) then
      
        317
              ! Latin-1 uppercase À-Ö (192-214) -> à-ö (224-246)
      
        318
              lower = char(ic + 32)
      
        319
            else if (ic >= 216 .and. ic <= 222) then
      
        320
              ! Latin-1 uppercase Ø-Þ (216-222) -> ø-þ (248-254)
      
        321
              lower = char(ic + 32)
      
        322
            else if (ic >= 128 .and. ic <= 150) then
      
        323
              ! UTF-8 continuation byte for uppercase Latin Extended-A (U+00C0-U+00D6)
      
        324
              ! When preceded by 0xC3, these represent À-Ö, fold to à-ö
      
        325
              lower = char(ic + 32)
      
        326
            else if (ic >= 152 .and. ic <= 158) then
      
        327
              ! UTF-8 continuation byte for uppercase Latin Extended-A (U+00D8-U+00DE)
      
        328
              ! When preceded by 0xC3, these represent Ø-Þ, fold to ø-þ
      
        329
              lower = char(ic + 32)
      
        330
            else
      
        331
              lower = c
      
        332
            end if
      
        333
          end function to_lower_char
      
        334
        
        335
          function char_in_class(c, char_class, negated, ignore_case) result(res)
      
        336
            character(len=1), intent(in) :: c
      
        337
            logical, intent(in) :: char_class(0:255)
      
        338
            logical, intent(in) :: negated, ignore_case
      
        339
            logical :: res
      
        340
        
        341
            integer :: ic
      
        342
            character(len=1) :: c_lower, c_upper
      
        343
        
        344
            ic = ichar(c)
      
        345
            res = char_class(ic)
      
        346
        
        347
            ! For case-insensitive, check both cases
      
        348
            if (ignore_case .and. .not. res) then
      
        349
              c_lower = to_lower_char(c)
      
        350
              c_upper = to_upper_char(c)
      
        351
              if (c_lower /= c) res = char_class(ichar(c_lower))
      
        352
              if (.not. res .and. c_upper /= c) res = char_class(ichar(c_upper))
      
        353
            end if
      
        354
        
        355
            if (negated) res = .not. res
      
        356
        
        357
          end function char_in_class
      
        358
        
        359
          pure function to_upper_char(c) result(upper)
      
        360
            character(len=1), intent(in) :: c
      
        361
            character(len=1) :: upper
      
        362
            integer :: ic
      
        363
        
        364
            ic = ichar(c)
      
        365
            if (ic >= ichar('a') .and. ic <= ichar('z')) then
      
        366
              upper = char(ic - 32)
      
        367
            else
      
        368
              upper = c
      
        369
            end if
      
        370
          end function to_upper_char
      
        371
        
        372
          subroutine add_if_unique(arr, num, val)
      
        373
            integer, intent(inout) :: arr(MAX_STATES)
      
        374
            integer, intent(inout) :: num
      
        375
            integer, intent(in) :: val
      
        376
        
        377
            integer :: i
      
        378
        
        379
            ! Check if already present
      
        380
            do i = 1, num
      
        381
              if (arr(i) == val) return
      
        382
            end do
      
        383
        
        384
            ! Add if space available
      
        385
            if (num < MAX_STATES) then
      
        386
              num = num + 1
      
        387
              arr(num) = val
      
        388
            end if
      
        389
        
        390
          end subroutine add_if_unique
      
        391
        
        392
        end module regex_engine
      
        393

1	module regex_engine
2	!> NFA simulation engine using Thompson's algorithm
3	!> Tracks set of current states, processes input character by character
4	use regex_types
5	implicit none
6	private
7
8	public :: nfa_match, nfa_search
9
10	integer, parameter :: MAX_STATES = 1024
11
12	contains
13
14	function nfa_match(nfa, text, start_pos, ignore_case) result(res)
15	!> Try to match NFA starting at start_pos in text
16	!> Returns match result with positions if successful
17	type(nfa_t), intent(in) :: nfa
18	character(len=*), intent(in) :: text
19	integer, intent(in) :: start_pos
20	logical, intent(in) :: ignore_case
21	type(match_result_t) :: res
22
23	integer :: current(MAX_STATES), next_states(MAX_STATES)
24	integer :: num_current, num_next
25	integer :: i, pos, text_len
26	character(len=1) :: c
27
28	res%matched = .false.
29	text_len = len_trim(text)
30
31	if (nfa%num_states == 0) return
32
33	! Initialize with epsilon closure of start state
34	num_current = 0
35	call epsilon_closure(nfa, nfa%start_state, current, num_current, &
36	text, start_pos, text_len)
37
38	! Check if already accepting (empty pattern case)
39	if (is_accepting(nfa, current, num_current)) then
40	res%matched = .true.
41	res%match_start = start_pos
42	res%match_end = start_pos - 1 ! Empty match
43	return
44	end if
45
46	! Process each character
47	pos = start_pos
48	do while (pos <= text_len .and. num_current > 0)
49	c = text(pos:pos)
50
51	! Compute next states
52	num_next = 0
53	do i = 1, num_current
54	call step(nfa, current(i), c, pos, text, text_len, ignore_case, &
55	next_states, num_next)
56	end do
57
58	! Compute epsilon closure of next states
59	num_current = 0
60	do i = 1, num_next
61	call epsilon_closure(nfa, next_states(i), current, num_current, &
62	text, pos + 1, text_len)
63	end do
64
65	pos = pos + 1
66
67	! Check for acceptance
68	if (is_accepting(nfa, current, num_current)) then
69	res%matched = .true.
70	res%match_start = start_pos
71	res%match_end = pos - 1
72	! Don't return yet - try to find longest match (greedy)
73	end if
74	end do
75
76	end function nfa_match
77
78	function nfa_search(nfa, text, ignore_case) result(res)
79	!> Search for pattern anywhere in text
80	type(nfa_t), intent(in) :: nfa
81	character(len=*), intent(in) :: text
82	logical, intent(in) :: ignore_case
83	type(match_result_t) :: res
84
85	integer :: i, text_len
86	type(match_result_t) :: try_res
87
88	res%matched = .false.
89	text_len = len_trim(text)
90
91	! Try matching at each position
92	do i = 1, text_len + 1 ! +1 to handle empty string match at end
93	try_res = nfa_match(nfa, text, i, ignore_case)
94	if (try_res%matched) then
95	res = try_res
96	return
97	end if
98	end do
99
100	end function nfa_search
101
102	subroutine epsilon_closure(nfa, state, states, num_states, text, pos, text_len)
103	!> Compute epsilon closure of a state, adding to states array
104	type(nfa_t), intent(in) :: nfa
105	integer, intent(in) :: state
106	integer, intent(inout) :: states(MAX_STATES)
107	integer, intent(inout) :: num_states
108	character(len=*), intent(in) :: text
109	integer, intent(in) :: pos, text_len
110
111	integer :: stack(MAX_STATES), stack_top
112	integer :: current, i, target
113	type(nfa_transition_t) :: trans
114	logical :: visited(MAX_STATES)
115
116	visited = .false.
117	stack_top = 1
118	stack(1) = state
119
120	do while (stack_top > 0)
121	current = stack(stack_top)
122	stack_top = stack_top - 1
123
124	if (current < 1 .or. current > nfa%num_states) cycle
125	if (visited(current)) cycle
126	visited(current) = .true.
127
128	! Add to result
129	if (num_states < MAX_STATES) then
130	num_states = num_states + 1
131	states(num_states) = current
132	end if
133
134	! Follow epsilon transitions and anchors
135	do i = 1, nfa%states(current)%num_trans
136	trans = nfa%states(current)%trans(i)
137
138	if (trans%trans_type == TRANS_EPSILON) then
139	! Check for backref marker (negative anchor_type)
140	if (trans%anchor_type < 0) then
141	! Backref - skip for now (would need captured text)
142	cycle
143	end if
144
145	target = trans%target
146	if (target >= 1 .and. target <= nfa%num_states .and. .not. visited(target)) then
147	stack_top = stack_top + 1
148	stack(stack_top) = target
149	end if
150
151	else if (trans%trans_type == TRANS_ANCHOR) then
152	! Check if anchor matches
153	if (anchor_matches(trans%anchor_type, text, pos, text_len)) then
154	target = trans%target
155	if (target >= 1 .and. target <= nfa%num_states .and. .not. visited(target)) then
156	stack_top = stack_top + 1
157	stack(stack_top) = target
158	end if
159	end if
160	end if
161	end do
162	end do
163
164	end subroutine epsilon_closure
165
166	subroutine step(nfa, state, c, pos, text, text_len, ignore_case, next_states, num_next)
167	!> Take one step from state on character c
168	type(nfa_t), intent(in) :: nfa
169	integer, intent(in) :: state
170	character(len=1), intent(in) :: c
171	integer, intent(in) :: pos, text_len
172	character(len=*), intent(in) :: text
173	logical, intent(in) :: ignore_case
174	integer, intent(inout) :: next_states(MAX_STATES)
175	integer, intent(inout) :: num_next
176
177	integer :: i
178	type(nfa_transition_t) :: trans
179	character(len=1) :: c_lower, match_lower
180
181	do i = 1, nfa%states(state)%num_trans
182	trans = nfa%states(state)%trans(i)
183
184	select case (trans%trans_type)
185	case (TRANS_CHAR)
186	if (ignore_case) then
187	c_lower = to_lower_char(c)
188	match_lower = to_lower_char(trans%match_char)
189	if (c_lower == match_lower) then
190	call add_if_unique(next_states, num_next, trans%target)
191	end if
192	else
193	if (c == trans%match_char) then
194	call add_if_unique(next_states, num_next, trans%target)
195	end if
196	end if
197
198	case (TRANS_CLASS)
199	if (char_in_class(c, trans%char_class, trans%negated, ignore_case)) then
200	call add_if_unique(next_states, num_next, trans%target)
201	end if
202
203	case (TRANS_ANY)
204	! Match any character except newline
205	if (c /= char(10)) then
206	call add_if_unique(next_states, num_next, trans%target)
207	end if
208
209	end select
210	end do
211
212	end subroutine step
213
214	function is_accepting(nfa, states, num_states) result(res)
215	!> Check if any state in the set is an accepting state
216	type(nfa_t), intent(in) :: nfa
217	integer, intent(in) :: states(MAX_STATES)
218	integer, intent(in) :: num_states
219	logical :: res
220
221	integer :: i
222
223	res = .false.
224	do i = 1, num_states
225	if (states(i) >= 1 .and. states(i) <= nfa%num_states) then
226	if (nfa%states(states(i))%is_accept) then
227	res = .true.
228	return
229	end if
230	end if
231	end do
232
233	end function is_accepting
234
235	function anchor_matches(anchor_type, text, pos, text_len) result(matches)
236	!> Check if anchor matches at position
237	integer, intent(in) :: anchor_type
238	character(len=*), intent(in) :: text
239	integer, intent(in) :: pos, text_len
240	logical :: matches
241
242	logical :: at_start, at_end, prev_word, curr_word
243
244	matches = .false.
245
246	at_start = (pos == 1) .or. (pos < 1)
247	at_end = (pos > text_len)
248
249	select case (anchor_type)
250	case (1) ! ^ start anchor
251	if (at_start) then
252	matches = .true.
253	else if (pos > 1 .and. pos <= text_len + 1) then
254	matches = (text(pos-1:pos-1) == char(10))
255	end if
256
257	case (2) ! $ end anchor
258	if (at_end) then
259	matches = .true.
260	else if (pos >= 1 .and. pos <= text_len) then
261	matches = (text(pos:pos) == char(10))
262	end if
263
264	case (3) ! \< word start
265	prev_word = .false.
266	curr_word = .false.
267	if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
268	if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
269	matches = (.not. prev_word) .and. curr_word
270
271	case (4) ! \> word end
272	prev_word = .false.
273	curr_word = .false.
274	if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
275	if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
276	matches = prev_word .and. (.not. curr_word)
277
278	case (5) ! \b word boundary
279	prev_word = .false.
280	curr_word = .false.
281	if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
282	if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
283	matches = prev_word .neqv. curr_word
284
285	case (6) ! \B not word boundary
286	prev_word = .false.
287	curr_word = .false.
288	if (pos > 1 .and. pos <= text_len + 1) prev_word = is_word_char(text(pos-1:pos-1))
289	if (pos >= 1 .and. pos <= text_len) curr_word = is_word_char(text(pos:pos))
290	matches = prev_word .eqv. curr_word
291	end select
292
293	end function anchor_matches
294
295	pure function is_word_char(c) result(res)
296	character(len=1), intent(in) :: c
297	logical :: res
298	integer :: ic
299
300	ic = ichar(c)
301	res = (ic >= ichar('a') .and. ic <= ichar('z')) .or. &
302	(ic >= ichar('A') .and. ic <= ichar('Z')) .or. &
303	(ic >= ichar('0') .and. ic <= ichar('9')) .or. &
304	(c == '_')
305	end function is_word_char
306
307	pure function to_lower_char(c) result(lower)
308	character(len=1), intent(in) :: c
309	character(len=1) :: lower
310	integer :: ic
311
312	ic = ichar(c)
313	if (ic >= ichar('A') .and. ic <= ichar('Z')) then
314	! ASCII uppercase A-Z -> a-z
315	lower = char(ic + 32)
316	else if (ic >= 192 .and. ic <= 214) then
317	! Latin-1 uppercase À-Ö (192-214) -> à-ö (224-246)
318	lower = char(ic + 32)
319	else if (ic >= 216 .and. ic <= 222) then
320	! Latin-1 uppercase Ø-Þ (216-222) -> ø-þ (248-254)
321	lower = char(ic + 32)
322	else if (ic >= 128 .and. ic <= 150) then
323	! UTF-8 continuation byte for uppercase Latin Extended-A (U+00C0-U+00D6)
324	! When preceded by 0xC3, these represent À-Ö, fold to à-ö
325	lower = char(ic + 32)
326	else if (ic >= 152 .and. ic <= 158) then
327	! UTF-8 continuation byte for uppercase Latin Extended-A (U+00D8-U+00DE)
328	! When preceded by 0xC3, these represent Ø-Þ, fold to ø-þ
329	lower = char(ic + 32)
330	else
331	lower = c
332	end if
333	end function to_lower_char
334
335	function char_in_class(c, char_class, negated, ignore_case) result(res)
336	character(len=1), intent(in) :: c
337	logical, intent(in) :: char_class(0:255)
338	logical, intent(in) :: negated, ignore_case
339	logical :: res
340
341	integer :: ic
342	character(len=1) :: c_lower, c_upper
343
344	ic = ichar(c)
345	res = char_class(ic)
346
347	! For case-insensitive, check both cases
348	if (ignore_case .and. .not. res) then
349	c_lower = to_lower_char(c)
350	c_upper = to_upper_char(c)
351	if (c_lower /= c) res = char_class(ichar(c_lower))
352	if (.not. res .and. c_upper /= c) res = char_class(ichar(c_upper))
353	end if
354
355	if (negated) res = .not. res
356
357	end function char_in_class
358
359	pure function to_upper_char(c) result(upper)
360	character(len=1), intent(in) :: c
361	character(len=1) :: upper
362	integer :: ic
363
364	ic = ichar(c)
365	if (ic >= ichar('a') .and. ic <= ichar('z')) then
366	upper = char(ic - 32)
367	else
368	upper = c
369	end if
370	end function to_upper_char
371
372	subroutine add_if_unique(arr, num, val)
373	integer, intent(inout) :: arr(MAX_STATES)
374	integer, intent(inout) :: num
375	integer, intent(in) :: val
376
377	integer :: i
378
379	! Check if already present
380	do i = 1, num
381	if (arr(i) == val) return
382	end do
383
384	! Add if space available
385	if (num < MAX_STATES) then
386	num = num + 1
387	arr(num) = val
388	end if
389
390	end subroutine add_if_unique
391
392	end module regex_engine
393