Fortran · 5750 bytes Raw Blame History
1 module ferp_search
2 !> Fast string search algorithms for FERP
3 !> Implements Boyer-Moore-Horspool for fixed string matching
4 use ferp_kinds
5 implicit none
6 private
7
8 public :: bm_search, bm_search_all
9 public :: bm_pattern_t, bm_compile, bm_free
10
11 !> Compiled Boyer-Moore pattern
12 type :: bm_pattern_t
13 character(len=:), allocatable :: pattern
14 integer :: pattern_len = 0
15 integer :: skip_table(0:255) ! Bad character skip table
16 logical :: case_insensitive = .false.
17 end type bm_pattern_t
18
19 contains
20
21 subroutine bm_compile(pat, pattern, case_insensitive)
22 !> Compile a pattern for Boyer-Moore search
23 type(bm_pattern_t), intent(out) :: pat
24 character(len=*), intent(in) :: pattern
25 logical, intent(in), optional :: case_insensitive
26
27 integer :: i, c, pat_len
28 character(len=:), allocatable :: work_pattern
29
30 pat%case_insensitive = .false.
31 if (present(case_insensitive)) pat%case_insensitive = case_insensitive
32
33 pat_len = len(pattern)
34 pat%pattern_len = pat_len
35
36 ! Store pattern (lowercase if case-insensitive)
37 if (pat%case_insensitive) then
38 allocate(character(len=pat_len) :: work_pattern)
39 do i = 1, pat_len
40 work_pattern(i:i) = to_lower(pattern(i:i))
41 end do
42 pat%pattern = work_pattern
43 else
44 pat%pattern = pattern
45 end if
46
47 ! Initialize skip table - default skip is pattern length
48 pat%skip_table = pat_len
49
50 ! Build bad character table
51 ! For each character in pattern (except last), set skip distance
52 do i = 1, pat_len - 1
53 if (pat%case_insensitive) then
54 c = ichar(to_lower(pattern(i:i)))
55 else
56 c = ichar(pattern(i:i))
57 end if
58 pat%skip_table(c) = pat_len - i
59
60 ! For case-insensitive, also set the other case
61 if (pat%case_insensitive) then
62 if (c >= ichar('a') .and. c <= ichar('z')) then
63 pat%skip_table(c - 32) = pat_len - i ! uppercase
64 else if (c >= ichar('A') .and. c <= ichar('Z')) then
65 pat%skip_table(c + 32) = pat_len - i ! lowercase
66 end if
67 end if
68 end do
69
70 end subroutine bm_compile
71
72 subroutine bm_free(pat)
73 !> Free compiled pattern
74 type(bm_pattern_t), intent(inout) :: pat
75 if (allocated(pat%pattern)) deallocate(pat%pattern)
76 pat%pattern_len = 0
77 end subroutine bm_free
78
79 function bm_search(text, pat) result(pos)
80 !> Search for pattern in text using Boyer-Moore-Horspool
81 !> Returns position of first match (1-based), or 0 if not found
82 character(len=*), intent(in) :: text
83 type(bm_pattern_t), intent(in) :: pat
84 integer :: pos
85
86 integer :: text_len, pat_len, i, j, skip
87 character :: tc, pc
88
89 pos = 0
90 text_len = len(text)
91 pat_len = pat%pattern_len
92
93 if (pat_len == 0) then
94 pos = 1 ! Empty pattern matches at start
95 return
96 end if
97
98 if (text_len < pat_len) return
99
100 i = pat_len ! Start at position where pattern could first match
101
102 do while (i <= text_len)
103 ! Compare pattern right-to-left
104 j = pat_len
105 do while (j >= 1)
106 if (pat%case_insensitive) then
107 tc = to_lower(text(i - pat_len + j:i - pat_len + j))
108 else
109 tc = text(i - pat_len + j:i - pat_len + j)
110 end if
111 pc = pat%pattern(j:j)
112
113 if (tc /= pc) exit
114 j = j - 1
115 end do
116
117 if (j == 0) then
118 ! Full match found
119 pos = i - pat_len + 1
120 return
121 end if
122
123 ! Skip based on bad character at current position
124 if (pat%case_insensitive) then
125 skip = pat%skip_table(ichar(to_lower(text(i:i))))
126 else
127 skip = pat%skip_table(ichar(text(i:i)))
128 end if
129 i = i + skip
130 end do
131
132 end function bm_search
133
134 subroutine bm_search_all(text, pat, positions, count)
135 !> Find all occurrences of pattern in text
136 character(len=*), intent(in) :: text
137 type(bm_pattern_t), intent(in) :: pat
138 integer, intent(out) :: positions(:) ! Array to store positions
139 integer, intent(out) :: count ! Number of matches found
140
141 integer :: text_len, pat_len, i, j, skip, max_matches
142 character :: tc, pc
143
144 count = 0
145 max_matches = size(positions)
146 text_len = len(text)
147 pat_len = pat%pattern_len
148
149 if (pat_len == 0) then
150 ! Empty pattern matches at every position
151 do i = 1, min(text_len + 1, max_matches)
152 count = count + 1
153 positions(count) = i
154 end do
155 return
156 end if
157
158 if (text_len < pat_len) return
159
160 i = pat_len
161
162 do while (i <= text_len .and. count < max_matches)
163 ! Compare pattern right-to-left
164 j = pat_len
165 do while (j >= 1)
166 if (pat%case_insensitive) then
167 tc = to_lower(text(i - pat_len + j:i - pat_len + j))
168 else
169 tc = text(i - pat_len + j:i - pat_len + j)
170 end if
171 pc = pat%pattern(j:j)
172
173 if (tc /= pc) exit
174 j = j - 1
175 end do
176
177 if (j == 0) then
178 ! Full match found
179 count = count + 1
180 positions(count) = i - pat_len + 1
181 ! Move past this match (non-overlapping)
182 i = i + pat_len
183 else
184 ! Skip based on bad character
185 if (pat%case_insensitive) then
186 skip = pat%skip_table(ichar(to_lower(text(i:i))))
187 else
188 skip = pat%skip_table(ichar(text(i:i)))
189 end if
190 i = i + max(skip, 1)
191 end if
192 end do
193
194 end subroutine bm_search_all
195
196 pure function to_lower(ch) result(lower)
197 !> Convert character to lowercase
198 character, intent(in) :: ch
199 character :: lower
200 integer :: ic
201
202 ic = ichar(ch)
203 if (ic >= ichar('A') .and. ic <= ichar('Z')) then
204 lower = char(ic + 32)
205 else
206 lower = ch
207 end if
208 end function to_lower
209
210 end module ferp_search
211