Fortran · 11315 bytes Raw Blame History
1 module ferp_mmap
2 !> Memory-mapped file I/O for FERP
3 !> Uses POSIX mmap for efficient file reading
4 !> Uses SIMD for fast newline scanning on ARM64
5 use ferp_kinds
6 use ferp_simd
7 use, intrinsic :: iso_c_binding
8 implicit none
9 private
10
11 public :: mmap_file_t
12 public :: mmap_open, mmap_close, mmap_get_line
13 public :: line_info_t, line_batch_t
14 public :: BATCH_SIZE
15
16 ! POSIX constants
17 integer(c_int), parameter :: PROT_READ = 1
18 integer(c_int), parameter :: MAP_PRIVATE = 2
19 integer(c_int), parameter :: MAP_FAILED = -1
20
21 ! Batch processing constants
22 integer, parameter :: BATCH_SIZE = 256 ! Number of lines per batch
23
24 !> Line info for batch processing (pointers into mmap'd memory)
25 type :: line_info_t
26 integer(c_size_t) :: start_pos = 0 ! Start position in mmap (0-based)
27 integer(c_size_t) :: length = 0 ! Length of line (excluding newline)
28 integer :: line_num = 0 ! Line number (1-based)
29 integer(i64) :: byte_off = 0 ! Byte offset in file
30 end type line_info_t
31
32 !> Batch of line info for bulk processing
33 type :: line_batch_t
34 type(line_info_t) :: lines(BATCH_SIZE)
35 integer :: count = 0 ! Number of valid lines in batch
36 end type line_batch_t
37
38 ! C interfaces
39 interface
40 function c_open(pathname, flags) bind(C, name="open")
41 import :: c_char, c_int
42 character(kind=c_char), intent(in) :: pathname(*)
43 integer(c_int), value :: flags
44 integer(c_int) :: c_open
45 end function c_open
46
47 function c_close(fd) bind(C, name="close")
48 import :: c_int
49 integer(c_int), value :: fd
50 integer(c_int) :: c_close
51 end function c_close
52
53 function c_mmap(addr, length, prot, flags, fd, offset) bind(C, name="mmap")
54 import :: c_ptr, c_size_t, c_int, c_long
55 type(c_ptr), value :: addr
56 integer(c_size_t), value :: length
57 integer(c_int), value :: prot
58 integer(c_int), value :: flags
59 integer(c_int), value :: fd
60 integer(c_long), value :: offset
61 type(c_ptr) :: c_mmap
62 end function c_mmap
63
64 function c_munmap(addr, length) bind(C, name="munmap")
65 import :: c_ptr, c_size_t, c_int
66 type(c_ptr), value :: addr
67 integer(c_size_t), value :: length
68 integer(c_int) :: c_munmap
69 end function c_munmap
70
71 function c_fstat(fd, statbuf) bind(C, name="fstat")
72 import :: c_int, c_ptr
73 integer(c_int), value :: fd
74 type(c_ptr), value :: statbuf
75 integer(c_int) :: c_fstat
76 end function c_fstat
77
78 function c_lseek(fd, offset, whence) bind(C, name="lseek")
79 import :: c_int, c_long
80 integer(c_int), value :: fd
81 integer(c_long), value :: offset
82 integer(c_int), value :: whence
83 integer(c_long) :: c_lseek
84 end function c_lseek
85 end interface
86
87 ! lseek whence values
88 integer(c_int), parameter :: SEEK_SET = 0
89 integer(c_int), parameter :: SEEK_END = 2
90
91 !> Memory-mapped file type
92 type :: mmap_file_t
93 type(c_ptr) :: data = c_null_ptr
94 integer(c_size_t) :: size = 0
95 integer(c_size_t) :: pos = 0 ! Current position in file
96 integer :: line_number = 0
97 integer(i64) :: byte_offset = 0
98 logical :: is_open = .false.
99 character(len=max_path_len) :: filename = ''
100 contains
101 procedure :: open => mmap_open_method
102 procedure :: close => mmap_close_method
103 procedure :: read_line => mmap_read_line
104 procedure :: read_lines_batch => mmap_read_lines_batch
105 procedure :: get_line_text => mmap_get_line_text
106 procedure :: reset => mmap_reset
107 end type mmap_file_t
108
109 contains
110
111 function mmap_open(filename, mfile) result(success)
112 !> Open a file with memory mapping
113 character(len=*), intent(in) :: filename
114 type(mmap_file_t), intent(out) :: mfile
115 logical :: success
116
117 integer(c_int) :: fd, istat
118 integer(c_int), parameter :: O_RDONLY = 0
119 integer(c_size_t) :: file_size
120 integer(c_long) :: size_long
121
122 success = .false.
123 mfile%is_open = .false.
124 mfile%filename = filename
125
126 ! Open file
127 fd = c_open(trim(filename) // c_null_char, O_RDONLY)
128 if (fd < 0) return
129
130 ! Get file size via lseek to end
131 size_long = c_lseek(fd, 0_c_long, SEEK_END)
132 if (size_long < 0) then
133 istat = c_close(fd)
134 return
135 end if
136 file_size = int(size_long, c_size_t)
137
138 ! Seek back to beginning
139 size_long = c_lseek(fd, 0_c_long, SEEK_SET)
140
141 if (file_size == 0) then
142 istat = c_close(fd)
143 mfile%size = 0
144 mfile%is_open = .true.
145 success = .true.
146 return
147 end if
148
149 ! Memory map the file
150 mfile%data = c_mmap(c_null_ptr, file_size, PROT_READ, MAP_PRIVATE, fd, 0_c_long)
151 istat = c_close(fd) ! Can close fd after mmap
152
153 if (.not. c_associated(mfile%data)) return
154
155 mfile%size = file_size
156 mfile%pos = 0
157 mfile%line_number = 0
158 mfile%byte_offset = 0
159 mfile%is_open = .true.
160 success = .true.
161
162 end function mmap_open
163
164 function mmap_open_method(this, filename) result(success)
165 class(mmap_file_t), intent(inout) :: this
166 character(len=*), intent(in) :: filename
167 logical :: success
168 success = mmap_open(filename, this)
169 end function mmap_open_method
170
171 subroutine mmap_close(mfile)
172 !> Close memory-mapped file
173 type(mmap_file_t), intent(inout) :: mfile
174
175 integer(c_int) :: istat
176
177 if (mfile%is_open .and. c_associated(mfile%data)) then
178 istat = c_munmap(mfile%data, mfile%size)
179 end if
180
181 mfile%data = c_null_ptr
182 mfile%size = 0
183 mfile%pos = 0
184 mfile%is_open = .false.
185
186 end subroutine mmap_close
187
188 subroutine mmap_close_method(this)
189 class(mmap_file_t), intent(inout) :: this
190 call mmap_close(this)
191 end subroutine mmap_close_method
192
193 subroutine mmap_reset(this)
194 !> Reset to beginning of file
195 class(mmap_file_t), intent(inout) :: this
196 this%pos = 0
197 this%line_number = 0
198 this%byte_offset = 0
199 end subroutine mmap_reset
200
201 function mmap_get_line(mfile, line, line_num, byte_off) result(success)
202 !> Get next line from memory-mapped file
203 type(mmap_file_t), intent(inout) :: mfile
204 character(len=:), allocatable, intent(out) :: line
205 integer, intent(out) :: line_num
206 integer(i64), intent(out) :: byte_off
207 logical :: success
208
209 success = mfile%read_line(line, line_num, byte_off)
210 end function mmap_get_line
211
212 function mmap_read_line(this, line, line_num, byte_off) result(success)
213 !> Read next line from memory-mapped file (SIMD-accelerated newline scanning)
214 class(mmap_file_t), intent(inout) :: this
215 character(len=:), allocatable, intent(out) :: line
216 integer, intent(out) :: line_num
217 integer(i64), intent(out) :: byte_off
218 logical :: success
219
220 character(len=1, kind=c_char), pointer :: file_data(:)
221 integer(c_size_t) :: start_pos, end_pos, line_len
222 integer(c_int64_t) :: newline_pos
223 integer :: i
224
225 success = .false.
226 line_num = 0
227 byte_off = 0
228 if (allocated(line)) deallocate(line)
229
230 if (.not. this%is_open) return
231 if (this%pos >= this%size) return
232
233 ! Map the C pointer to a Fortran character array
234 call c_f_pointer(this%data, file_data, [this%size])
235
236 ! Find start and end of line
237 start_pos = this%pos + 1 ! 1-based for Fortran
238
239 ! Use SIMD to find newline (16 bytes at a time on ARM64)
240 newline_pos = simd_find_char_ptr(this%data, int(this%size, c_int64_t), &
241 int(this%pos, c_int64_t), char(10))
242
243 if (newline_pos < 0) then
244 ! No newline found - rest of file is the line
245 end_pos = this%size + 1
246 else
247 ! Found newline - convert from 0-indexed to 1-indexed
248 end_pos = int(newline_pos, c_size_t) + 1
249 end if
250
251 ! Calculate line length (excluding newline, but preserving CR like grep)
252 line_len = end_pos - start_pos
253
254 ! Allocate and copy line
255 if (line_len > 0) then
256 allocate(character(len=line_len) :: line)
257 do i = 1, int(line_len)
258 line(i:i) = file_data(start_pos + i - 1)
259 end do
260 else
261 line = ''
262 end if
263
264 ! Update state
265 this%line_number = this%line_number + 1
266 line_num = this%line_number
267 byte_off = int(this%pos, i64)
268
269 ! Move past the newline
270 if (end_pos <= this%size) then
271 this%pos = end_pos ! Position after newline (0-based)
272 else
273 this%pos = this%size
274 end if
275 this%byte_offset = int(this%pos, i64)
276
277 success = .true.
278
279 end function mmap_read_line
280
281 function mmap_read_lines_batch(this, batch) result(success)
282 !> Read up to BATCH_SIZE lines from memory-mapped file
283 !> Returns line positions without copying data (zero-copy batch read)
284 class(mmap_file_t), intent(inout) :: this
285 type(line_batch_t), intent(out) :: batch
286 logical :: success
287
288 character(len=1, kind=c_char), pointer :: file_data(:)
289 integer(c_size_t) :: start_pos, end_pos, line_len
290 integer(c_int64_t) :: newline_pos
291 integer :: i
292
293 success = .false.
294 batch%count = 0
295
296 if (.not. this%is_open) return
297 if (this%pos >= this%size) return
298 if (.not. c_associated(this%data)) return
299
300 ! Map the C pointer to a Fortran character array
301 call c_f_pointer(this%data, file_data, [this%size])
302
303 ! Read up to BATCH_SIZE lines
304 do i = 1, BATCH_SIZE
305 if (this%pos >= this%size) exit
306
307 start_pos = this%pos ! 0-based position
308
309 ! Use SIMD to find newline
310 newline_pos = simd_find_char_ptr(this%data, int(this%size, c_int64_t), &
311 int(this%pos, c_int64_t), char(10))
312
313 if (newline_pos < 0) then
314 ! No newline found - rest of file is the line
315 end_pos = this%size
316 else
317 end_pos = int(newline_pos, c_size_t)
318 end if
319
320 ! Calculate line length (excluding newline, but preserving CR like grep)
321 line_len = end_pos - start_pos
322
323 ! Store line info
324 batch%count = batch%count + 1
325 batch%lines(batch%count)%start_pos = start_pos
326 batch%lines(batch%count)%length = line_len
327 this%line_number = this%line_number + 1
328 batch%lines(batch%count)%line_num = this%line_number
329 batch%lines(batch%count)%byte_off = int(start_pos, i64)
330
331 ! Move past the newline
332 if (end_pos < this%size) then
333 this%pos = end_pos + 1 ! Position after newline (0-based)
334 else
335 this%pos = this%size
336 end if
337 this%byte_offset = int(this%pos, i64)
338 end do
339
340 success = (batch%count > 0)
341
342 end function mmap_read_lines_batch
343
344 function mmap_get_line_text(this, info) result(line)
345 !> Extract line text from mmap'd memory given line info
346 class(mmap_file_t), intent(in) :: this
347 type(line_info_t), intent(in) :: info
348 character(len=:), allocatable :: line
349
350 character(len=1, kind=c_char), pointer :: file_data(:)
351 integer :: i
352
353 if (.not. this%is_open .or. .not. c_associated(this%data)) then
354 line = ''
355 return
356 end if
357
358 if (info%length == 0) then
359 line = ''
360 return
361 end if
362
363 ! Map the C pointer to a Fortran character array
364 call c_f_pointer(this%data, file_data, [this%size])
365
366 ! Allocate and copy line (start_pos is 0-based, array is 1-based)
367 allocate(character(len=info%length) :: line)
368 do i = 1, int(info%length)
369 line(i:i) = file_data(info%start_pos + i)
370 end do
371
372 end function mmap_get_line_text
373
374 end module ferp_mmap
375