Fortran · 4959 bytes Raw Blame History
1 module regex_charclass
2 !> High-performance bitwise character class operations for FERP
3 !> Uses 256-bit representation (4 x 64-bit integers) instead of boolean array
4 !> Provides O(1) membership testing with minimal memory footprint
5 implicit none
6 private
7
8 public :: char_class_bits_t
9 public :: charclass_from_array, charclass_test, charclass_test_case_insensitive
10 public :: charclass_set, charclass_clear, charclass_set_range
11 public :: charclass_add_case_variants
12
13 !> Bitwise character class - 256 bits in 4 words
14 type :: char_class_bits_t
15 integer(8) :: words(4) = 0_8 ! bits 0-63, 64-127, 128-191, 192-255
16 logical :: negated = .false.
17 end type char_class_bits_t
18
19 contains
20
21 pure subroutine charclass_clear(cc)
22 !> Clear all bits
23 type(char_class_bits_t), intent(inout) :: cc
24 cc%words = 0_8
25 cc%negated = .false.
26 end subroutine charclass_clear
27
28 pure subroutine charclass_set(cc, char_code)
29 !> Set a single character bit
30 type(char_class_bits_t), intent(inout) :: cc
31 integer, intent(in) :: char_code
32 integer :: word_idx, bit_idx
33
34 if (char_code < 0 .or. char_code > 255) return
35 word_idx = char_code / 64 + 1 ! 1-based index
36 bit_idx = mod(char_code, 64)
37 cc%words(word_idx) = ior(cc%words(word_idx), ishft(1_8, bit_idx))
38 end subroutine charclass_set
39
40 pure subroutine charclass_set_range(cc, start_char, end_char)
41 !> Set a range of character bits efficiently
42 type(char_class_bits_t), intent(inout) :: cc
43 integer, intent(in) :: start_char, end_char
44 integer :: i
45
46 do i = start_char, end_char
47 if (i >= 0 .and. i <= 255) then
48 call charclass_set(cc, i)
49 end if
50 end do
51 end subroutine charclass_set_range
52
53 pure function charclass_test(cc, c) result(res)
54 !> Test if character is in class - O(1) bit test
55 type(char_class_bits_t), intent(in) :: cc
56 character(len=1), intent(in) :: c
57 logical :: res
58
59 integer :: char_code, word_idx, bit_idx
60
61 char_code = ichar(c)
62 word_idx = char_code / 64 + 1
63 bit_idx = mod(char_code, 64)
64 res = btest(cc%words(word_idx), bit_idx)
65
66 if (cc%negated) res = .not. res
67 end function charclass_test
68
69 pure function charclass_test_case_insensitive(cc, c) result(res)
70 !> Test character with case insensitivity - checks both cases in one call
71 type(char_class_bits_t), intent(in) :: cc
72 character(len=1), intent(in) :: c
73 logical :: res
74
75 integer :: char_code, word_idx, bit_idx, other_case
76
77 char_code = ichar(c)
78 word_idx = char_code / 64 + 1
79 bit_idx = mod(char_code, 64)
80 res = btest(cc%words(word_idx), bit_idx)
81
82 ! Quick check for other case (only for a-z and A-Z)
83 if (.not. res) then
84 if (char_code >= 65 .and. char_code <= 90) then
85 ! Uppercase A-Z -> check lowercase a-z
86 other_case = char_code + 32
87 word_idx = other_case / 64 + 1
88 bit_idx = mod(other_case, 64)
89 res = btest(cc%words(word_idx), bit_idx)
90 else if (char_code >= 97 .and. char_code <= 122) then
91 ! Lowercase a-z -> check uppercase A-Z
92 other_case = char_code - 32
93 word_idx = other_case / 64 + 1
94 bit_idx = mod(other_case, 64)
95 res = btest(cc%words(word_idx), bit_idx)
96 end if
97 end if
98
99 if (cc%negated) res = .not. res
100 end function charclass_test_case_insensitive
101
102 pure subroutine charclass_from_array(cc, char_class_array, negated)
103 !> Convert 256-element boolean array to bitwise format
104 type(char_class_bits_t), intent(out) :: cc
105 logical, intent(in) :: char_class_array(0:255)
106 logical, intent(in) :: negated
107
108 integer :: i, word_idx, bit_idx
109
110 cc%words = 0_8
111 cc%negated = negated
112
113 do i = 0, 255
114 if (char_class_array(i)) then
115 word_idx = i / 64 + 1
116 bit_idx = mod(i, 64)
117 cc%words(word_idx) = ior(cc%words(word_idx), ishft(1_8, bit_idx))
118 end if
119 end do
120 end subroutine charclass_from_array
121
122 pure subroutine charclass_add_case_variants(cc)
123 !> Pre-compute case variants into the character class
124 !> After calling this, case-insensitive matching becomes a single test
125 type(char_class_bits_t), intent(inout) :: cc
126
127 integer :: i, word_idx, bit_idx, other_case
128 integer(8) :: saved_words(4)
129
130 saved_words = cc%words
131
132 ! For each set bit, also set its case variant
133 do i = 0, 255
134 word_idx = i / 64 + 1
135 bit_idx = mod(i, 64)
136
137 if (btest(saved_words(word_idx), bit_idx)) then
138 ! Character is in class - add its case variant
139 if (i >= 65 .and. i <= 90) then
140 ! Uppercase A-Z -> add lowercase a-z
141 other_case = i + 32
142 call charclass_set(cc, other_case)
143 else if (i >= 97 .and. i <= 122) then
144 ! Lowercase a-z -> add uppercase A-Z
145 other_case = i - 32
146 call charclass_set(cc, other_case)
147 end if
148 end if
149 end do
150 end subroutine charclass_add_case_variants
151
152 end module regex_charclass
153