Fortran · 11779 bytes Raw Blame History
1 module ferp_io
2 !> File I/O handling for FERP
3 !> Supports dynamic line length (no fixed limit)
4 !> Uses memory-mapped I/O for improved performance on files
5 use ferp_kinds
6 use ferp_mmap
7 use, intrinsic :: iso_fortran_env, only: input_unit, error_unit, iostat_end, iostat_eor
8 implicit none
9 private
10
11 public :: input_source
12 public :: SOURCE_STDIN, SOURCE_FILE, SOURCE_MMAP
13 public :: check_binary_file
14 ! Re-export batch types from ferp_mmap
15 public :: line_info_t, line_batch_t, BATCH_SIZE
16
17 integer, parameter :: SOURCE_STDIN = 1
18 integer, parameter :: SOURCE_FILE = 2
19 integer, parameter :: SOURCE_MMAP = 3
20
21 type :: input_source
22 integer :: source_type = SOURCE_STDIN
23 integer :: unit_num = input_unit
24 character(len=max_path_len) :: filename = '(standard input)'
25 logical :: is_open = .false.
26 integer(i64) :: byte_offset = 0
27 integer :: line_number = 0
28 logical :: is_binary = .false.
29 logical :: eof_reached = .false.
30 logical :: null_data_mode = .false.
31 type(mmap_file_t) :: mmap_file ! Memory-mapped file handle
32 contains
33 procedure :: open => source_open
34 procedure :: close => source_close
35 procedure :: read_line_dynamic => source_read_line_dynamic
36 procedure :: read_line_null_dynamic => source_read_line_null_dynamic
37 procedure :: read_lines_batch => source_read_lines_batch
38 procedure :: get_line_text => source_get_line_text
39 procedure :: check_binary => source_check_binary
40 end type input_source
41
42 contains
43
44 function source_open(this, filename, suppress_errors, null_data) result(success)
45 !> Open a file or stdin for reading
46 class(input_source), intent(inout) :: this
47 character(len=*), intent(in) :: filename
48 logical, intent(in), optional :: suppress_errors
49 logical, intent(in), optional :: null_data
50 logical :: success
51
52 integer :: ios
53 character(len=256) :: errmsg
54 logical :: quiet
55
56 quiet = .false.
57 if (present(suppress_errors)) quiet = suppress_errors
58
59 this%null_data_mode = .false.
60 if (present(null_data)) this%null_data_mode = null_data
61
62 success = .false.
63
64 ! Reset state (but preserve is_binary if already set by caller)
65 this%byte_offset = 0
66 this%line_number = 0
67 this%eof_reached = .false.
68
69 ! Handle stdin
70 if (filename == '-' .or. len_trim(filename) == 0) then
71 this%source_type = SOURCE_STDIN
72 this%unit_num = input_unit
73 this%filename = '(standard input)'
74 this%is_open = .true.
75 success = .true.
76 return
77 end if
78
79 this%filename = filename
80
81 ! For null-data mode, use stream access (can't use mmap easily)
82 if (this%null_data_mode) then
83 this%source_type = SOURCE_FILE
84 open(newunit=this%unit_num, file=filename, status='old', action='read', &
85 access='stream', form='unformatted', iostat=ios, iomsg=errmsg)
86 if (ios /= 0) then
87 if (.not. quiet) then
88 write(error_unit, '(A)') 'ferp: ' // trim(filename) // ': ' // trim(errmsg)
89 end if
90 return
91 end if
92 this%is_open = .true.
93 success = .true.
94 return
95 end if
96
97 ! Try memory-mapped I/O first (fastest for regular files)
98 if (this%mmap_file%open(filename)) then
99 this%source_type = SOURCE_MMAP
100 this%is_open = .true.
101 success = .true.
102 return
103 end if
104
105 ! Fall back to standard Fortran I/O
106 this%source_type = SOURCE_FILE
107 open(newunit=this%unit_num, file=filename, status='old', action='read', &
108 iostat=ios, iomsg=errmsg)
109
110 if (ios /= 0) then
111 if (.not. quiet) then
112 write(error_unit, '(A)') 'ferp: ' // trim(filename) // ': ' // trim(errmsg)
113 end if
114 return
115 end if
116
117 this%is_open = .true.
118 success = .true.
119
120 end function source_open
121
122 subroutine source_close(this)
123 !> Close the input source
124 class(input_source), intent(inout) :: this
125
126 if (this%is_open) then
127 if (this%source_type == SOURCE_FILE) then
128 close(this%unit_num)
129 else if (this%source_type == SOURCE_MMAP) then
130 call this%mmap_file%close()
131 end if
132 end if
133
134 this%is_open = .false.
135 end subroutine source_close
136
137 function source_read_line_dynamic(this, line, line_num, byte_off) result(success)
138 !> Read a line from the input source with dynamic allocation
139 !> Uses mmap for files, standard I/O for stdin
140 class(input_source), intent(inout) :: this
141 character(len=:), allocatable, intent(out) :: line
142 integer, intent(out) :: line_num
143 integer(i64), intent(out) :: byte_off
144 logical :: success
145
146 integer :: ios
147 integer :: line_len
148 ! Use a generous fixed buffer for reading (64KB handles most lines)
149 integer, parameter :: READ_BUFFER_SIZE = 65536
150 character(len=READ_BUFFER_SIZE) :: buffer
151
152 success = .false.
153 line_num = 0
154 byte_off = 0
155 if (allocated(line)) deallocate(line)
156
157 if (.not. this%is_open .or. this%eof_reached) return
158
159 ! Use mmap for memory-mapped files (fastest path)
160 if (this%source_type == SOURCE_MMAP) then
161 success = this%mmap_file%read_line(line, line_num, byte_off)
162 if (.not. success) this%eof_reached = .true.
163 return
164 end if
165
166 ! Standard Fortran I/O for stdin and fallback
167 read(this%unit_num, '(A)', iostat=ios) buffer
168
169 if (ios == iostat_end) then
170 this%eof_reached = .true.
171 return
172 end if
173
174 if (ios /= 0) then
175 ! Read error
176 return
177 end if
178
179 ! Get line length (preserve CR like grep)
180 line_len = len_trim(buffer)
181
182 ! Allocate result string trimmed to actual length
183 if (line_len > 0) then
184 line = buffer(1:line_len)
185 else
186 line = ''
187 end if
188
189 ! Update state
190 this%line_number = this%line_number + 1
191 line_num = this%line_number
192 byte_off = this%byte_offset
193
194 ! Update byte offset (line length + newline)
195 this%byte_offset = this%byte_offset + int(line_len, i64) + 1_i64
196
197 success = .true.
198
199 end function source_read_line_dynamic
200
201 function source_read_line_null_dynamic(this, line, line_num, byte_off) result(success)
202 !> Read a NUL-terminated line from the input source (for -z mode)
203 !> Line buffer grows automatically to accommodate any line length
204 class(input_source), intent(inout) :: this
205 character(len=:), allocatable, intent(out) :: line
206 integer, intent(out) :: line_num
207 integer(i64), intent(out) :: byte_off
208 logical :: success
209
210 integer :: ios, pos, capacity
211 character(len=1) :: ch
212 character(len=:), allocatable :: new_buf
213
214 success = .false.
215 line_num = 0
216 byte_off = 0
217 if (allocated(line)) deallocate(line)
218
219 if (.not. this%is_open .or. this%eof_reached) return
220
221 ! Start with initial buffer
222 capacity = initial_line_len
223 allocate(character(len=capacity) :: line)
224 pos = 0
225
226 ! Read byte by byte until NUL or EOF
227 do
228 ! Use formatted read for stdin, unformatted for files
229 if (this%source_type == SOURCE_STDIN) then
230 read(this%unit_num, '(A1)', iostat=ios, advance='no') ch
231 else
232 read(this%unit_num, iostat=ios) ch
233 end if
234
235 if (ios == iostat_end) then
236 this%eof_reached = .true.
237 if (pos > 0) then
238 exit ! Return what we have
239 else
240 deallocate(line)
241 return
242 end if
243 end if
244
245 if (ios /= 0) then
246 deallocate(line)
247 return
248 end if
249
250 ! Check for NUL terminator
251 if (ch == char(0)) exit
252
253 ! Skip carriage returns (for Windows line endings in data)
254 if (ch == char(13)) cycle
255
256 ! Convert embedded newlines to space
257 if (ch == char(10)) ch = ' '
258
259 ! Grow buffer if needed
260 if (pos >= capacity) then
261 capacity = capacity * 2
262 allocate(character(len=capacity) :: new_buf)
263 if (pos > 0) new_buf(1:pos) = line(1:pos)
264 call move_alloc(new_buf, line)
265 end if
266
267 ! Add character to line
268 pos = pos + 1
269 line(pos:pos) = ch
270 end do
271
272 ! Trim to actual length
273 if (pos > 0) then
274 new_buf = line(1:pos)
275 call move_alloc(new_buf, line)
276 else
277 line = ''
278 end if
279
280 ! Update state
281 this%line_number = this%line_number + 1
282 line_num = this%line_number
283 byte_off = this%byte_offset
284
285 ! Update byte offset (record length + NUL)
286 this%byte_offset = this%byte_offset + int(len(line), i64) + 1_i64
287
288 success = .true.
289
290 end function source_read_line_null_dynamic
291
292 function check_binary_file(filename) result(is_binary)
293 !> Check if a file is binary by looking for NUL bytes or non-text chars
294 !> This must be called BEFORE the file is opened for reading
295 character(len=*), intent(in) :: filename
296 logical :: is_binary
297
298 integer, parameter :: CHECK_SIZE = 8192
299 character(len=CHECK_SIZE) :: buffer
300 integer :: ios, i, check_unit, bytes_read
301 integer :: char_code
302 logical :: file_exists
303
304 is_binary = .false.
305
306 ! Open file in stream mode to check for binary content
307 inquire(file=filename, exist=file_exists)
308 if (.not. file_exists) return
309
310 open(newunit=check_unit, file=filename, status='old', action='read', &
311 access='stream', form='unformatted', iostat=ios)
312 if (ios /= 0) return
313
314 ! Initialize buffer to spaces
315 buffer = ''
316
317 read(check_unit, iostat=ios) buffer
318 close(check_unit)
319
320 ! Determine how many bytes were actually read
321 bytes_read = CHECK_SIZE
322 if (ios == iostat_end) then
323 ! File was smaller than buffer - find actual length
324 do i = CHECK_SIZE, 1, -1
325 if (buffer(i:i) /= char(0)) then
326 bytes_read = i
327 exit
328 end if
329 end do
330 else if (ios /= 0) then
331 return
332 end if
333
334 ! Check each byte for binary indicators
335 do i = 1, bytes_read
336 char_code = ichar(buffer(i:i))
337
338 ! NUL byte is definitive binary indicator
339 if (char_code == 0) then
340 is_binary = .true.
341 return
342 end if
343
344 ! Non-printable control chars (except common text ones)
345 ! Allow: tab (9), newline (10), carriage return (13), form feed (12)
346 if (char_code < 32 .and. char_code /= 9 .and. char_code /= 10 &
347 .and. char_code /= 13 .and. char_code /= 12) then
348 is_binary = .true.
349 return
350 end if
351 end do
352
353 end function check_binary_file
354
355 subroutine source_check_binary(this)
356 !> Check if the source is binary (wrapper that calls check_binary_file)
357 !> NOTE: This only works if called BEFORE the file is opened
358 class(input_source), intent(inout) :: this
359
360 this%is_binary = .false.
361 if (this%source_type == SOURCE_STDIN) return
362
363 this%is_binary = check_binary_file(trim(this%filename))
364
365 end subroutine source_check_binary
366
367 function source_read_lines_batch(this, batch) result(success)
368 !> Read multiple lines as a batch (wrapper for mmap batch read)
369 !> Only works for mmap sources; returns false for stdin/file
370 class(input_source), intent(inout) :: this
371 type(line_batch_t), intent(out) :: batch
372 logical :: success
373
374 success = .false.
375 batch%count = 0
376
377 if (.not. this%is_open .or. this%eof_reached) return
378
379 ! Only mmap sources support batch reading
380 if (this%source_type == SOURCE_MMAP) then
381 success = this%mmap_file%read_lines_batch(batch)
382 if (.not. success) this%eof_reached = .true.
383 end if
384
385 end function source_read_lines_batch
386
387 function source_get_line_text(this, info) result(line)
388 !> Get line text from mmap given line info (wrapper)
389 class(input_source), intent(in) :: this
390 type(line_info_t), intent(in) :: info
391 character(len=:), allocatable :: line
392
393 if (this%source_type == SOURCE_MMAP) then
394 line = this%mmap_file%get_line_text(info)
395 else
396 line = ''
397 end if
398
399 end function source_get_line_text
400
401 end module ferp_io
402