fortrangoingonforty/ferp / 46aa90a

Browse files

Add memory-mapped I/O and Boyer-Moore search for 36x speedup

Performance improvements:
- Add ferp_mmap module for memory-mapped file I/O via POSIX mmap
- Add ferp_search module with Boyer-Moore-Horspool string search
- Use mmap for all file reads (falls back to standard I/O for stdin)
- Use Boyer-Moore for fixed string matching (-F mode)
- Compile patterns for all modes (including fixed strings)

Results on 134MB file (2M lines):
- Fixed string (-F): 8.2s → 0.24s (36x faster, matches GNU grep)
- BRE regex: 55.5s → 11.1s (5x faster, NFA still bottleneck)
- PCRE (-P): ~0.3s (very fast via libpcre2)
Authored by espadonne
SHA
46aa90a4e03432206d1cb1a1a17236d2b7e0d7f8
Parents
243c590
Tree
825af53

6 changed files

StatusFile+-
M Makefile 6 2
M src/ferp_io.f90 44 10
M src/ferp_matcher.f90 79 4
A src/ferp_mmap.f90 258 0
A src/ferp_search.f90 210 0
M src/main.f90 8 20
Makefilemodified
@@ -39,9 +39,11 @@ REGEX_SRCS = $(REGEX_DIR)/regex_types.f90 \
39
 # Main source files (in dependency order)
39
 # Main source files (in dependency order)
40
 MAIN_SRCS = $(SRC_DIR)/ferp_kinds.f90 \
40
 MAIN_SRCS = $(SRC_DIR)/ferp_kinds.f90 \
41
             $(SRC_DIR)/ferp_options.f90 \
41
             $(SRC_DIR)/ferp_options.f90 \
42
+            $(SRC_DIR)/ferp_mmap.f90 \
42
             $(SRC_DIR)/ferp_io.f90 \
43
             $(SRC_DIR)/ferp_io.f90 \
43
             $(SRC_DIR)/ferp_output.f90 \
44
             $(SRC_DIR)/ferp_output.f90 \
44
             $(SRC_DIR)/ferp_dir.f90 \
45
             $(SRC_DIR)/ferp_dir.f90 \
46
+            $(SRC_DIR)/ferp_search.f90 \
45
             $(SRC_DIR)/ferp_cli.f90 \
47
             $(SRC_DIR)/ferp_cli.f90 \
46
             $(SRC_DIR)/ferp_matcher.f90 \
48
             $(SRC_DIR)/ferp_matcher.f90 \
47
             $(SRC_DIR)/main.f90
49
             $(SRC_DIR)/main.f90
@@ -91,11 +93,13 @@ $(BUILD_DIR)/pcre_api.o:
91
 
93
 
92
 # Main module dependencies
94
 # Main module dependencies
93
 $(BUILD_DIR)/ferp_options.o: $(BUILD_DIR)/ferp_kinds.o
95
 $(BUILD_DIR)/ferp_options.o: $(BUILD_DIR)/ferp_kinds.o
94
-$(BUILD_DIR)/ferp_io.o: $(BUILD_DIR)/ferp_kinds.o
96
+$(BUILD_DIR)/ferp_mmap.o: $(BUILD_DIR)/ferp_kinds.o
97
+$(BUILD_DIR)/ferp_io.o: $(BUILD_DIR)/ferp_kinds.o $(BUILD_DIR)/ferp_mmap.o
95
 $(BUILD_DIR)/ferp_output.o: $(BUILD_DIR)/ferp_kinds.o $(BUILD_DIR)/ferp_options.o
98
 $(BUILD_DIR)/ferp_output.o: $(BUILD_DIR)/ferp_kinds.o $(BUILD_DIR)/ferp_options.o
96
 $(BUILD_DIR)/ferp_dir.o: $(BUILD_DIR)/ferp_kinds.o
99
 $(BUILD_DIR)/ferp_dir.o: $(BUILD_DIR)/ferp_kinds.o
100
+$(BUILD_DIR)/ferp_search.o: $(BUILD_DIR)/ferp_kinds.o
97
 $(BUILD_DIR)/ferp_cli.o: $(BUILD_DIR)/ferp_kinds.o $(BUILD_DIR)/ferp_options.o
101
 $(BUILD_DIR)/ferp_cli.o: $(BUILD_DIR)/ferp_kinds.o $(BUILD_DIR)/ferp_options.o
98
-$(BUILD_DIR)/ferp_matcher.o: $(BUILD_DIR)/ferp_kinds.o $(BUILD_DIR)/ferp_options.o $(BUILD_DIR)/ferp_io.o $(BUILD_DIR)/ferp_output.o $(BUILD_DIR)/regex_api.o $(BUILD_DIR)/pcre_api.o
102
+$(BUILD_DIR)/ferp_matcher.o: $(BUILD_DIR)/ferp_kinds.o $(BUILD_DIR)/ferp_options.o $(BUILD_DIR)/ferp_io.o $(BUILD_DIR)/ferp_output.o $(BUILD_DIR)/ferp_search.o $(BUILD_DIR)/regex_api.o $(BUILD_DIR)/pcre_api.o
99
 $(BUILD_DIR)/main.o: $(BUILD_DIR)/ferp_kinds.o $(BUILD_DIR)/ferp_options.o $(BUILD_DIR)/ferp_cli.o $(BUILD_DIR)/ferp_io.o $(BUILD_DIR)/ferp_dir.o $(BUILD_DIR)/ferp_matcher.o
103
 $(BUILD_DIR)/main.o: $(BUILD_DIR)/ferp_kinds.o $(BUILD_DIR)/ferp_options.o $(BUILD_DIR)/ferp_cli.o $(BUILD_DIR)/ferp_io.o $(BUILD_DIR)/ferp_dir.o $(BUILD_DIR)/ferp_matcher.o
100
 
104
 
101
 # Clean build artifacts
105
 # Clean build artifacts
src/ferp_io.f90modified
@@ -1,17 +1,20 @@
1
 module ferp_io
1
 module ferp_io
2
   !> File I/O handling for FERP
2
   !> File I/O handling for FERP
3
   !> Supports dynamic line length (no fixed limit)
3
   !> Supports dynamic line length (no fixed limit)
4
+  !> Uses memory-mapped I/O for improved performance on files
4
   use ferp_kinds
5
   use ferp_kinds
6
+  use ferp_mmap
5
   use, intrinsic :: iso_fortran_env, only: input_unit, error_unit, iostat_end, iostat_eor
7
   use, intrinsic :: iso_fortran_env, only: input_unit, error_unit, iostat_end, iostat_eor
6
   implicit none
8
   implicit none
7
   private
9
   private
8
 
10
 
9
   public :: input_source
11
   public :: input_source
10
-  public :: SOURCE_STDIN, SOURCE_FILE
12
+  public :: SOURCE_STDIN, SOURCE_FILE, SOURCE_MMAP
11
   public :: check_binary_file
13
   public :: check_binary_file
12
 
14
 
13
   integer, parameter :: SOURCE_STDIN = 1
15
   integer, parameter :: SOURCE_STDIN = 1
14
   integer, parameter :: SOURCE_FILE = 2
16
   integer, parameter :: SOURCE_FILE = 2
17
+  integer, parameter :: SOURCE_MMAP = 3
15
 
18
 
16
   type :: input_source
19
   type :: input_source
17
     integer :: source_type = SOURCE_STDIN
20
     integer :: source_type = SOURCE_STDIN
@@ -23,6 +26,7 @@ module ferp_io
23
     logical :: is_binary = .false.
26
     logical :: is_binary = .false.
24
     logical :: eof_reached = .false.
27
     logical :: eof_reached = .false.
25
     logical :: null_data_mode = .false.
28
     logical :: null_data_mode = .false.
29
+    type(mmap_file_t) :: mmap_file  ! Memory-mapped file handle
26
   contains
30
   contains
27
     procedure :: open => source_open
31
     procedure :: open => source_open
28
     procedure :: close => source_close
32
     procedure :: close => source_close
@@ -68,18 +72,37 @@ contains
68
       return
72
       return
69
     end if
73
     end if
70
 
74
 
71
-    this%source_type = SOURCE_FILE
72
     this%filename = filename
75
     this%filename = filename
73
 
76
 
74
-    ! Open file - use stream access for null-data mode, otherwise sequential
77
+    ! For null-data mode, use stream access (can't use mmap easily)
75
     if (this%null_data_mode) then
78
     if (this%null_data_mode) then
79
+      this%source_type = SOURCE_FILE
76
       open(newunit=this%unit_num, file=filename, status='old', action='read', &
80
       open(newunit=this%unit_num, file=filename, status='old', action='read', &
77
            access='stream', form='unformatted', iostat=ios, iomsg=errmsg)
81
            access='stream', form='unformatted', iostat=ios, iomsg=errmsg)
78
-    else
82
+      if (ios /= 0) then
79
-      open(newunit=this%unit_num, file=filename, status='old', action='read', &
83
+        if (.not. quiet) then
80
-           iostat=ios, iomsg=errmsg)
84
+          write(error_unit, '(A)') 'ferp: ' // trim(filename) // ': ' // trim(errmsg)
85
+        end if
86
+        return
87
+      end if
88
+      this%is_open = .true.
89
+      success = .true.
90
+      return
81
     end if
91
     end if
82
 
92
 
93
+    ! Try memory-mapped I/O first (fastest for regular files)
94
+    if (this%mmap_file%open(filename)) then
95
+      this%source_type = SOURCE_MMAP
96
+      this%is_open = .true.
97
+      success = .true.
98
+      return
99
+    end if
100
+
101
+    ! Fall back to standard Fortran I/O
102
+    this%source_type = SOURCE_FILE
103
+    open(newunit=this%unit_num, file=filename, status='old', action='read', &
104
+         iostat=ios, iomsg=errmsg)
105
+
83
     if (ios /= 0) then
106
     if (ios /= 0) then
84
       if (.not. quiet) then
107
       if (.not. quiet) then
85
         write(error_unit, '(A)') 'ferp: ' // trim(filename) // ': ' // trim(errmsg)
108
         write(error_unit, '(A)') 'ferp: ' // trim(filename) // ': ' // trim(errmsg)
@@ -96,8 +119,12 @@ contains
96
     !> Close the input source
119
     !> Close the input source
97
     class(input_source), intent(inout) :: this
120
     class(input_source), intent(inout) :: this
98
 
121
 
99
-    if (this%is_open .and. this%source_type == SOURCE_FILE) then
122
+    if (this%is_open) then
100
-      close(this%unit_num)
123
+      if (this%source_type == SOURCE_FILE) then
124
+        close(this%unit_num)
125
+      else if (this%source_type == SOURCE_MMAP) then
126
+        call this%mmap_file%close()
127
+      end if
101
     end if
128
     end if
102
 
129
 
103
     this%is_open = .false.
130
     this%is_open = .false.
@@ -105,7 +132,7 @@ contains
105
 
132
 
106
   function source_read_line_dynamic(this, line, line_num, byte_off) result(success)
133
   function source_read_line_dynamic(this, line, line_num, byte_off) result(success)
107
     !> Read a line from the input source with dynamic allocation
134
     !> Read a line from the input source with dynamic allocation
108
-    !> Uses a fixed read buffer but returns an allocatable string (thread-safe)
135
+    !> Uses mmap for files, standard I/O for stdin
109
     class(input_source), intent(inout) :: this
136
     class(input_source), intent(inout) :: this
110
     character(len=:), allocatable, intent(out) :: line
137
     character(len=:), allocatable, intent(out) :: line
111
     integer, intent(out) :: line_num
138
     integer, intent(out) :: line_num
@@ -125,7 +152,14 @@ contains
125
 
152
 
126
     if (.not. this%is_open .or. this%eof_reached) return
153
     if (.not. this%is_open .or. this%eof_reached) return
127
 
154
 
128
-    ! Read line using standard Fortran I/O
155
+    ! Use mmap for memory-mapped files (fastest path)
156
+    if (this%source_type == SOURCE_MMAP) then
157
+      success = this%mmap_file%read_line(line, line_num, byte_off)
158
+      if (.not. success) this%eof_reached = .true.
159
+      return
160
+    end if
161
+
162
+    ! Standard Fortran I/O for stdin and fallback
129
     read(this%unit_num, '(A)', iostat=ios) buffer
163
     read(this%unit_num, '(A)', iostat=ios) buffer
130
 
164
 
131
     if (ios == iostat_end) then
165
     if (ios == iostat_end) then
src/ferp_matcher.f90modified
@@ -5,6 +5,7 @@ module ferp_matcher
5
   use ferp_options
5
   use ferp_options
6
   use ferp_io
6
   use ferp_io
7
   use ferp_output
7
   use ferp_output
8
+  use ferp_search
8
   use regex_api
9
   use regex_api
9
   use pcre_api
10
   use pcre_api
10
   implicit none
11
   implicit none
@@ -19,9 +20,11 @@ module ferp_matcher
19
   type :: compiled_patterns_t
20
   type :: compiled_patterns_t
20
     type(regex_t), allocatable :: regexes(:)
21
     type(regex_t), allocatable :: regexes(:)
21
     type(pcre_t), allocatable :: pcres(:)       ! PCRE compiled patterns
22
     type(pcre_t), allocatable :: pcres(:)       ! PCRE compiled patterns
23
+    type(bm_pattern_t), allocatable :: bm_pats(:)  ! Boyer-Moore patterns for fixed strings
22
     integer :: count = 0
24
     integer :: count = 0
23
     logical :: compiled = .false.
25
     logical :: compiled = .false.
24
     logical :: is_pcre = .false.                ! True if using PCRE
26
     logical :: is_pcre = .false.                ! True if using PCRE
27
+    logical :: is_fixed = .false.               ! True if using Boyer-Moore fixed strings
25
   end type compiled_patterns_t
28
   end type compiled_patterns_t
26
 
29
 
27
   !> Context buffer entry - holds a line with its metadata
30
   !> Context buffer entry - holds a line with its metadata
@@ -48,6 +51,25 @@ contains
48
     n = size(patterns)
51
     n = size(patterns)
49
     compiled%count = n
52
     compiled%count = n
50
     compiled%is_pcre = (opts%pattern_type == PATTERN_PERL)
53
     compiled%is_pcre = (opts%pattern_type == PATTERN_PERL)
54
+    compiled%is_fixed = (opts%pattern_type == PATTERN_FIXED)
55
+
56
+    ! Use Boyer-Moore for fixed string patterns
57
+    if (compiled%is_fixed) then
58
+      allocate(compiled%bm_pats(n))
59
+
60
+      do i = 1, n
61
+        pattern = patterns(i)
62
+        ! For case-insensitive, convert pattern to lowercase
63
+        if (opts%ignore_case) then
64
+          call bm_compile(compiled%bm_pats(i), trim(pattern), .true.)
65
+        else
66
+          call bm_compile(compiled%bm_pats(i), trim(pattern), .false.)
67
+        end if
68
+      end do
69
+
70
+      compiled%compiled = .true.
71
+      return
72
+    end if
51
 
73
 
52
     ! Use PCRE for Perl-compatible patterns
74
     ! Use PCRE for Perl-compatible patterns
53
     if (compiled%is_pcre) then
75
     if (compiled%is_pcre) then
@@ -125,9 +147,17 @@ contains
125
       deallocate(compiled%pcres)
147
       deallocate(compiled%pcres)
126
     end if
148
     end if
127
 
149
 
150
+    if (allocated(compiled%bm_pats)) then
151
+      do i = 1, compiled%count
152
+        call bm_free(compiled%bm_pats(i))
153
+      end do
154
+      deallocate(compiled%bm_pats)
155
+    end if
156
+
128
     compiled%count = 0
157
     compiled%count = 0
129
     compiled%compiled = .false.
158
     compiled%compiled = .false.
130
     compiled%is_pcre = .false.
159
     compiled%is_pcre = .false.
160
+    compiled%is_fixed = .false.
131
 
161
 
132
   end subroutine free_patterns
162
   end subroutine free_patterns
133
 
163
 
@@ -157,12 +187,18 @@ contains
157
       ! Match based on pattern type
187
       ! Match based on pattern type
158
       select case (opts%pattern_type)
188
       select case (opts%pattern_type)
159
         case (PATTERN_FIXED)
189
         case (PATTERN_FIXED)
160
-          if (opts%ignore_case) then
190
+          ! Use Boyer-Moore if compiled patterns available
161
-            search_pattern = to_lower(patterns(i))
191
+          if (present(compiled) .and. compiled%compiled .and. compiled%is_fixed) then
192
+            matches = match_fixed_bm(line, compiled%bm_pats(i), opts)
162
           else
193
           else
163
-            search_pattern = patterns(i)
194
+            ! Fallback to simple index search
195
+            if (opts%ignore_case) then
196
+              search_pattern = to_lower(patterns(i))
197
+            else
198
+              search_pattern = patterns(i)
199
+            end if
200
+            matches = match_fixed_string(search_line, search_pattern, opts)
164
           end if
201
           end if
165
-          matches = match_fixed_string(search_line, search_pattern, opts)
166
 
202
 
167
         case (PATTERN_BRE, PATTERN_ERE)
203
         case (PATTERN_BRE, PATTERN_ERE)
168
           if (present(compiled) .and. compiled%compiled) then
204
           if (present(compiled) .and. compiled%compiled) then
@@ -267,6 +303,45 @@ contains
267
 
303
 
268
   end function match_fixed_string
304
   end function match_fixed_string
269
 
305
 
306
+  function match_fixed_bm(line, bm_pat, opts) result(matches)
307
+    !> Fixed string matching using Boyer-Moore algorithm
308
+    character(len=*), intent(in) :: line
309
+    type(bm_pattern_t), intent(in) :: bm_pat
310
+    type(grep_options), intent(in) :: opts
311
+    logical :: matches
312
+
313
+    integer :: pos
314
+    integer :: line_len, pat_len
315
+
316
+    matches = .false.
317
+    line_len = len_trim(line)
318
+    pat_len = bm_pat%pattern_len
319
+
320
+    if (pat_len == 0) then
321
+      ! Empty pattern matches everything
322
+      matches = .true.
323
+      return
324
+    end if
325
+
326
+    ! Find pattern using Boyer-Moore
327
+    pos = bm_search(line(1:line_len), bm_pat)
328
+
329
+    if (pos == 0) return
330
+
331
+    ! Check word boundary if -w
332
+    if (opts%word_regexp) then
333
+      if (.not. is_word_match(line, pos, pat_len)) return
334
+    end if
335
+
336
+    ! Check line match if -x
337
+    if (opts%line_regexp) then
338
+      if (pos /= 1 .or. pat_len /= line_len) return
339
+    end if
340
+
341
+    matches = .true.
342
+
343
+  end function match_fixed_bm
344
+
270
   function is_word_match(line, pos, pat_len) result(is_word)
345
   function is_word_match(line, pos, pat_len) result(is_word)
271
     !> Check if match at pos is a whole word
346
     !> Check if match at pos is a whole word
272
     character(len=*), intent(in) :: line
347
     character(len=*), intent(in) :: line
src/ferp_mmap.f90added
@@ -0,0 +1,258 @@
1
+module ferp_mmap
2
+  !> Memory-mapped file I/O for FERP
3
+  !> Uses POSIX mmap for efficient file reading
4
+  use ferp_kinds
5
+  use, intrinsic :: iso_c_binding
6
+  implicit none
7
+  private
8
+
9
+  public :: mmap_file_t
10
+  public :: mmap_open, mmap_close, mmap_get_line
11
+
12
+  ! POSIX constants
13
+  integer(c_int), parameter :: PROT_READ = 1
14
+  integer(c_int), parameter :: MAP_PRIVATE = 2
15
+  integer(c_int), parameter :: MAP_FAILED = -1
16
+
17
+  ! C interfaces
18
+  interface
19
+    function c_open(pathname, flags) bind(C, name="open")
20
+      import :: c_char, c_int
21
+      character(kind=c_char), intent(in) :: pathname(*)
22
+      integer(c_int), value :: flags
23
+      integer(c_int) :: c_open
24
+    end function c_open
25
+
26
+    function c_close(fd) bind(C, name="close")
27
+      import :: c_int
28
+      integer(c_int), value :: fd
29
+      integer(c_int) :: c_close
30
+    end function c_close
31
+
32
+    function c_mmap(addr, length, prot, flags, fd, offset) bind(C, name="mmap")
33
+      import :: c_ptr, c_size_t, c_int, c_long
34
+      type(c_ptr), value :: addr
35
+      integer(c_size_t), value :: length
36
+      integer(c_int), value :: prot
37
+      integer(c_int), value :: flags
38
+      integer(c_int), value :: fd
39
+      integer(c_long), value :: offset
40
+      type(c_ptr) :: c_mmap
41
+    end function c_mmap
42
+
43
+    function c_munmap(addr, length) bind(C, name="munmap")
44
+      import :: c_ptr, c_size_t, c_int
45
+      type(c_ptr), value :: addr
46
+      integer(c_size_t), value :: length
47
+      integer(c_int) :: c_munmap
48
+    end function c_munmap
49
+
50
+    function c_fstat(fd, statbuf) bind(C, name="fstat")
51
+      import :: c_int, c_ptr
52
+      integer(c_int), value :: fd
53
+      type(c_ptr), value :: statbuf
54
+      integer(c_int) :: c_fstat
55
+    end function c_fstat
56
+
57
+    function c_lseek(fd, offset, whence) bind(C, name="lseek")
58
+      import :: c_int, c_long
59
+      integer(c_int), value :: fd
60
+      integer(c_long), value :: offset
61
+      integer(c_int), value :: whence
62
+      integer(c_long) :: c_lseek
63
+    end function c_lseek
64
+  end interface
65
+
66
+  ! lseek whence values
67
+  integer(c_int), parameter :: SEEK_SET = 0
68
+  integer(c_int), parameter :: SEEK_END = 2
69
+
70
+  !> Memory-mapped file type
71
+  type :: mmap_file_t
72
+    type(c_ptr) :: data = c_null_ptr
73
+    integer(c_size_t) :: size = 0
74
+    integer(c_size_t) :: pos = 0      ! Current position in file
75
+    integer :: line_number = 0
76
+    integer(i64) :: byte_offset = 0
77
+    logical :: is_open = .false.
78
+    character(len=max_path_len) :: filename = ''
79
+  contains
80
+    procedure :: open => mmap_open_method
81
+    procedure :: close => mmap_close_method
82
+    procedure :: read_line => mmap_read_line
83
+    procedure :: reset => mmap_reset
84
+  end type mmap_file_t
85
+
86
+contains
87
+
88
+  function mmap_open(filename, mfile) result(success)
89
+    !> Open a file with memory mapping
90
+    character(len=*), intent(in) :: filename
91
+    type(mmap_file_t), intent(out) :: mfile
92
+    logical :: success
93
+
94
+    integer(c_int) :: fd, istat
95
+    integer(c_int), parameter :: O_RDONLY = 0
96
+    integer(c_size_t) :: file_size
97
+    integer(c_long) :: size_long
98
+
99
+    success = .false.
100
+    mfile%is_open = .false.
101
+    mfile%filename = filename
102
+
103
+    ! Open file
104
+    fd = c_open(trim(filename) // c_null_char, O_RDONLY)
105
+    if (fd < 0) return
106
+
107
+    ! Get file size via lseek to end
108
+    size_long = c_lseek(fd, 0_c_long, SEEK_END)
109
+    if (size_long < 0) then
110
+      istat = c_close(fd)
111
+      return
112
+    end if
113
+    file_size = int(size_long, c_size_t)
114
+
115
+    ! Seek back to beginning
116
+    size_long = c_lseek(fd, 0_c_long, SEEK_SET)
117
+
118
+    if (file_size == 0) then
119
+      istat = c_close(fd)
120
+      mfile%size = 0
121
+      mfile%is_open = .true.
122
+      success = .true.
123
+      return
124
+    end if
125
+
126
+    ! Memory map the file
127
+    mfile%data = c_mmap(c_null_ptr, file_size, PROT_READ, MAP_PRIVATE, fd, 0_c_long)
128
+    istat = c_close(fd)  ! Can close fd after mmap
129
+
130
+    if (.not. c_associated(mfile%data)) return
131
+
132
+    mfile%size = file_size
133
+    mfile%pos = 0
134
+    mfile%line_number = 0
135
+    mfile%byte_offset = 0
136
+    mfile%is_open = .true.
137
+    success = .true.
138
+
139
+  end function mmap_open
140
+
141
+  function mmap_open_method(this, filename) result(success)
142
+    class(mmap_file_t), intent(inout) :: this
143
+    character(len=*), intent(in) :: filename
144
+    logical :: success
145
+    success = mmap_open(filename, this)
146
+  end function mmap_open_method
147
+
148
+  subroutine mmap_close(mfile)
149
+    !> Close memory-mapped file
150
+    type(mmap_file_t), intent(inout) :: mfile
151
+
152
+    integer(c_int) :: istat
153
+
154
+    if (mfile%is_open .and. c_associated(mfile%data)) then
155
+      istat = c_munmap(mfile%data, mfile%size)
156
+    end if
157
+
158
+    mfile%data = c_null_ptr
159
+    mfile%size = 0
160
+    mfile%pos = 0
161
+    mfile%is_open = .false.
162
+
163
+  end subroutine mmap_close
164
+
165
+  subroutine mmap_close_method(this)
166
+    class(mmap_file_t), intent(inout) :: this
167
+    call mmap_close(this)
168
+  end subroutine mmap_close_method
169
+
170
+  subroutine mmap_reset(this)
171
+    !> Reset to beginning of file
172
+    class(mmap_file_t), intent(inout) :: this
173
+    this%pos = 0
174
+    this%line_number = 0
175
+    this%byte_offset = 0
176
+  end subroutine mmap_reset
177
+
178
+  function mmap_get_line(mfile, line, line_num, byte_off) result(success)
179
+    !> Get next line from memory-mapped file
180
+    type(mmap_file_t), intent(inout) :: mfile
181
+    character(len=:), allocatable, intent(out) :: line
182
+    integer, intent(out) :: line_num
183
+    integer(i64), intent(out) :: byte_off
184
+    logical :: success
185
+
186
+    success = mfile%read_line(line, line_num, byte_off)
187
+  end function mmap_get_line
188
+
189
+  function mmap_read_line(this, line, line_num, byte_off) result(success)
190
+    !> Read next line from memory-mapped file
191
+    class(mmap_file_t), intent(inout) :: this
192
+    character(len=:), allocatable, intent(out) :: line
193
+    integer, intent(out) :: line_num
194
+    integer(i64), intent(out) :: byte_off
195
+    logical :: success
196
+
197
+    character(len=1, kind=c_char), pointer :: file_data(:)
198
+    integer(c_size_t) :: start_pos, end_pos, line_len
199
+    integer :: i
200
+
201
+    success = .false.
202
+    line_num = 0
203
+    byte_off = 0
204
+    if (allocated(line)) deallocate(line)
205
+
206
+    if (.not. this%is_open) return
207
+    if (this%pos >= this%size) return
208
+
209
+    ! Map the C pointer to a Fortran character array
210
+    call c_f_pointer(this%data, file_data, [this%size])
211
+
212
+    ! Find start and end of line
213
+    start_pos = this%pos + 1  ! 1-based for Fortran
214
+    end_pos = start_pos
215
+
216
+    ! Scan for newline
217
+    do while (end_pos <= this%size)
218
+      if (file_data(end_pos) == char(10)) exit  ! LF
219
+      end_pos = end_pos + 1
220
+    end do
221
+
222
+    ! Calculate line length (excluding newline)
223
+    line_len = end_pos - start_pos
224
+    if (line_len > 0 .and. end_pos > start_pos) then
225
+      ! Check for CR before LF (Windows line ending)
226
+      if (file_data(end_pos - 1) == char(13)) then
227
+        line_len = line_len - 1
228
+      end if
229
+    end if
230
+
231
+    ! Allocate and copy line
232
+    if (line_len > 0) then
233
+      allocate(character(len=line_len) :: line)
234
+      do i = 1, int(line_len)
235
+        line(i:i) = file_data(start_pos + i - 1)
236
+      end do
237
+    else
238
+      line = ''
239
+    end if
240
+
241
+    ! Update state
242
+    this%line_number = this%line_number + 1
243
+    line_num = this%line_number
244
+    byte_off = int(this%pos, i64)
245
+
246
+    ! Move past the newline
247
+    if (end_pos <= this%size) then
248
+      this%pos = end_pos  ! Position after newline (0-based)
249
+    else
250
+      this%pos = this%size
251
+    end if
252
+    this%byte_offset = int(this%pos, i64)
253
+
254
+    success = .true.
255
+
256
+  end function mmap_read_line
257
+
258
+end module ferp_mmap
src/ferp_search.f90added
@@ -0,0 +1,210 @@
1
+module ferp_search
2
+  !> Fast string search algorithms for FERP
3
+  !> Implements Boyer-Moore-Horspool for fixed string matching
4
+  use ferp_kinds
5
+  implicit none
6
+  private
7
+
8
+  public :: bm_search, bm_search_all
9
+  public :: bm_pattern_t, bm_compile, bm_free
10
+
11
+  !> Compiled Boyer-Moore pattern
12
+  type :: bm_pattern_t
13
+    character(len=:), allocatable :: pattern
14
+    integer :: pattern_len = 0
15
+    integer :: skip_table(0:255)  ! Bad character skip table
16
+    logical :: case_insensitive = .false.
17
+  end type bm_pattern_t
18
+
19
+contains
20
+
21
+  subroutine bm_compile(pat, pattern, case_insensitive)
22
+    !> Compile a pattern for Boyer-Moore search
23
+    type(bm_pattern_t), intent(out) :: pat
24
+    character(len=*), intent(in) :: pattern
25
+    logical, intent(in), optional :: case_insensitive
26
+
27
+    integer :: i, c, pat_len
28
+    character(len=:), allocatable :: work_pattern
29
+
30
+    pat%case_insensitive = .false.
31
+    if (present(case_insensitive)) pat%case_insensitive = case_insensitive
32
+
33
+    pat_len = len(pattern)
34
+    pat%pattern_len = pat_len
35
+
36
+    ! Store pattern (lowercase if case-insensitive)
37
+    if (pat%case_insensitive) then
38
+      allocate(character(len=pat_len) :: work_pattern)
39
+      do i = 1, pat_len
40
+        work_pattern(i:i) = to_lower(pattern(i:i))
41
+      end do
42
+      pat%pattern = work_pattern
43
+    else
44
+      pat%pattern = pattern
45
+    end if
46
+
47
+    ! Initialize skip table - default skip is pattern length
48
+    pat%skip_table = pat_len
49
+
50
+    ! Build bad character table
51
+    ! For each character in pattern (except last), set skip distance
52
+    do i = 1, pat_len - 1
53
+      if (pat%case_insensitive) then
54
+        c = ichar(to_lower(pattern(i:i)))
55
+      else
56
+        c = ichar(pattern(i:i))
57
+      end if
58
+      pat%skip_table(c) = pat_len - i
59
+
60
+      ! For case-insensitive, also set the other case
61
+      if (pat%case_insensitive) then
62
+        if (c >= ichar('a') .and. c <= ichar('z')) then
63
+          pat%skip_table(c - 32) = pat_len - i  ! uppercase
64
+        else if (c >= ichar('A') .and. c <= ichar('Z')) then
65
+          pat%skip_table(c + 32) = pat_len - i  ! lowercase
66
+        end if
67
+      end if
68
+    end do
69
+
70
+  end subroutine bm_compile
71
+
72
+  subroutine bm_free(pat)
73
+    !> Free compiled pattern
74
+    type(bm_pattern_t), intent(inout) :: pat
75
+    if (allocated(pat%pattern)) deallocate(pat%pattern)
76
+    pat%pattern_len = 0
77
+  end subroutine bm_free
78
+
79
+  function bm_search(text, pat) result(pos)
80
+    !> Search for pattern in text using Boyer-Moore-Horspool
81
+    !> Returns position of first match (1-based), or 0 if not found
82
+    character(len=*), intent(in) :: text
83
+    type(bm_pattern_t), intent(in) :: pat
84
+    integer :: pos
85
+
86
+    integer :: text_len, pat_len, i, j, skip
87
+    character :: tc, pc
88
+
89
+    pos = 0
90
+    text_len = len(text)
91
+    pat_len = pat%pattern_len
92
+
93
+    if (pat_len == 0) then
94
+      pos = 1  ! Empty pattern matches at start
95
+      return
96
+    end if
97
+
98
+    if (text_len < pat_len) return
99
+
100
+    i = pat_len  ! Start at position where pattern could first match
101
+
102
+    do while (i <= text_len)
103
+      ! Compare pattern right-to-left
104
+      j = pat_len
105
+      do while (j >= 1)
106
+        if (pat%case_insensitive) then
107
+          tc = to_lower(text(i - pat_len + j:i - pat_len + j))
108
+        else
109
+          tc = text(i - pat_len + j:i - pat_len + j)
110
+        end if
111
+        pc = pat%pattern(j:j)
112
+
113
+        if (tc /= pc) exit
114
+        j = j - 1
115
+      end do
116
+
117
+      if (j == 0) then
118
+        ! Full match found
119
+        pos = i - pat_len + 1
120
+        return
121
+      end if
122
+
123
+      ! Skip based on bad character at current position
124
+      if (pat%case_insensitive) then
125
+        skip = pat%skip_table(ichar(to_lower(text(i:i))))
126
+      else
127
+        skip = pat%skip_table(ichar(text(i:i)))
128
+      end if
129
+      i = i + skip
130
+    end do
131
+
132
+  end function bm_search
133
+
134
+  subroutine bm_search_all(text, pat, positions, count)
135
+    !> Find all occurrences of pattern in text
136
+    character(len=*), intent(in) :: text
137
+    type(bm_pattern_t), intent(in) :: pat
138
+    integer, intent(out) :: positions(:)  ! Array to store positions
139
+    integer, intent(out) :: count         ! Number of matches found
140
+
141
+    integer :: text_len, pat_len, i, j, skip, max_matches
142
+    character :: tc, pc
143
+
144
+    count = 0
145
+    max_matches = size(positions)
146
+    text_len = len(text)
147
+    pat_len = pat%pattern_len
148
+
149
+    if (pat_len == 0) then
150
+      ! Empty pattern matches at every position
151
+      do i = 1, min(text_len + 1, max_matches)
152
+        count = count + 1
153
+        positions(count) = i
154
+      end do
155
+      return
156
+    end if
157
+
158
+    if (text_len < pat_len) return
159
+
160
+    i = pat_len
161
+
162
+    do while (i <= text_len .and. count < max_matches)
163
+      ! Compare pattern right-to-left
164
+      j = pat_len
165
+      do while (j >= 1)
166
+        if (pat%case_insensitive) then
167
+          tc = to_lower(text(i - pat_len + j:i - pat_len + j))
168
+        else
169
+          tc = text(i - pat_len + j:i - pat_len + j)
170
+        end if
171
+        pc = pat%pattern(j:j)
172
+
173
+        if (tc /= pc) exit
174
+        j = j - 1
175
+      end do
176
+
177
+      if (j == 0) then
178
+        ! Full match found
179
+        count = count + 1
180
+        positions(count) = i - pat_len + 1
181
+        ! Move past this match (non-overlapping)
182
+        i = i + pat_len
183
+      else
184
+        ! Skip based on bad character
185
+        if (pat%case_insensitive) then
186
+          skip = pat%skip_table(ichar(to_lower(text(i:i))))
187
+        else
188
+          skip = pat%skip_table(ichar(text(i:i)))
189
+        end if
190
+        i = i + max(skip, 1)
191
+      end if
192
+    end do
193
+
194
+  end subroutine bm_search_all
195
+
196
+  pure function to_lower(ch) result(lower)
197
+    !> Convert character to lowercase
198
+    character, intent(in) :: ch
199
+    character :: lower
200
+    integer :: ic
201
+
202
+    ic = ichar(ch)
203
+    if (ic >= ichar('A') .and. ic <= ichar('Z')) then
204
+      lower = char(ic + 32)
205
+    else
206
+      lower = ch
207
+    end if
208
+  end function to_lower
209
+
210
+end module ferp_search
src/main.f90modified
@@ -105,13 +105,11 @@ program ferp
105
     end if
105
     end if
106
   end if
106
   end if
107
 
107
 
108
-  ! Compile patterns for regex modes
108
+  ! Compile patterns (for all modes - regex uses NFA/PCRE, fixed uses Boyer-Moore)
109
-  if (opts%pattern_type /= PATTERN_FIXED) then
109
+  call compile_patterns(patterns, opts, compiled, ierr)
110
-    call compile_patterns(patterns, opts, compiled, ierr)
110
+  if (ierr /= 0) then
111
-    if (ierr /= 0) then
111
+    write(error_unit, '(A)') 'ferp: Invalid regular expression'
112
-      write(error_unit, '(A)') 'ferp: Invalid regular expression'
112
+    call c_exit(2_c_int)
113
-      call c_exit(2_c_int)
114
-    end if
115
   end if
113
   end if
116
 
114
 
117
   any_match = .false.
115
   any_match = .false.
@@ -123,11 +121,7 @@ program ferp
123
     opts%reading_stdin = .true.
121
     opts%reading_stdin = .true.
124
     if (src%open('-', null_data=opts%null_data)) then
122
     if (src%open('-', null_data=opts%null_data)) then
125
       src%filename = opts%label  ! Use --label if provided
123
       src%filename = opts%label  ! Use --label if provided
126
-      if (opts%pattern_type /= PATTERN_FIXED) then
124
+      any_match = process_source(src, patterns, opts, compiled)
127
-        any_match = process_source(src, patterns, opts, compiled)
128
-      else
129
-        any_match = process_source(src, patterns, opts)
130
-      end if
131
       call src%close()
125
       call src%close()
132
     end if
126
     end if
133
   else
127
   else
@@ -178,11 +172,7 @@ program ferp
178
       if (src%open(trim(files(i)), opts%no_messages, opts%null_data)) then
172
       if (src%open(trim(files(i)), opts%no_messages, opts%null_data)) then
179
         ! Critical section for output serialization (prevents interleaved output)
173
         ! Critical section for output serialization (prevents interleaved output)
180
         !$omp critical(output_lock)
174
         !$omp critical(output_lock)
181
-        if (opts%pattern_type /= PATTERN_FIXED) then
175
+        file_match = process_source(src, patterns, opts, compiled)
182
-          file_match = process_source(src, patterns, opts, compiled)
183
-        else
184
-          file_match = process_source(src, patterns, opts)
185
-        end if
186
         !$omp end critical(output_lock)
176
         !$omp end critical(output_lock)
187
         if (file_match) then
177
         if (file_match) then
188
           any_match = .true.
178
           any_match = .true.
@@ -196,9 +186,7 @@ program ferp
196
   end if
186
   end if
197
 
187
 
198
   ! Clean up compiled patterns
188
   ! Clean up compiled patterns
199
-  if (opts%pattern_type /= PATTERN_FIXED) then
189
+  call free_patterns(compiled)
200
-    call free_patterns(compiled)
201
-  end if
202
 
190
 
203
   ! Exit with appropriate code
191
   ! Exit with appropriate code
204
   ! 0 = match found, 1 = no match, 2 = error
192
   ! 0 = match found, 1 = no match, 2 = error