Fortran · 33038 bytes Raw Blame History
1 module ferp_matcher
2 !> Pattern matching orchestration for FERP
3 !> Thread-safe: no SAVE variables, all buffers are dynamically allocated
4 use ferp_kinds, only: i64, max_pattern_len, pattern_len
5 use ferp_options
6 use ferp_io
7 use ferp_output
8 use ferp_search
9 use regex_api
10 use pcre_api
11 implicit none
12 private
13
14 public :: match_line, match_fixed_string
15 public :: process_source, to_lower
16 public :: compiled_patterns_t, compile_patterns, free_patterns
17 public :: find_matches
18
19 !> Holds compiled regex patterns for reuse
20 type :: compiled_patterns_t
21 type(regex_t), allocatable :: regexes(:)
22 type(pcre_t), allocatable :: pcres(:) ! PCRE compiled patterns
23 type(bm_pattern_t), allocatable :: bm_pats(:) ! Boyer-Moore patterns for fixed strings
24 integer :: count = 0
25 logical :: compiled = .false.
26 logical :: is_pcre = .false. ! True if using PCRE
27 logical :: is_fixed = .false. ! True if using Boyer-Moore fixed strings
28 end type compiled_patterns_t
29
30 !> Context buffer entry - holds a line with its metadata
31 type :: context_entry_t
32 character(len=:), allocatable :: text
33 integer :: line_num = 0
34 integer(i64) :: byte_off = 0
35 end type context_entry_t
36
37 contains
38
39 subroutine compile_patterns(patterns, opts, compiled, ierr)
40 !> Compile all patterns once for reuse
41 character(len=max_pattern_len), intent(in) :: patterns(:)
42 type(grep_options), intent(in) :: opts
43 type(compiled_patterns_t), intent(out) :: compiled
44 integer, intent(out) :: ierr
45
46 integer :: i, j, n, plen, total_subpats, subpat_count
47 logical :: is_ere
48 character(len=max_pattern_len) :: pattern
49 character(len=max_pattern_len), allocatable :: subpatterns(:)
50
51 ierr = 0
52 n = size(patterns)
53 compiled%count = n
54 compiled%is_pcre = (opts%pattern_type == PATTERN_PERL)
55 compiled%is_fixed = (opts%pattern_type == PATTERN_FIXED)
56
57 ! Use Boyer-Moore for fixed string patterns
58 if (compiled%is_fixed) then
59 ! For -F mode, patterns containing newlines should be split into multiple patterns
60 ! First count total subpatterns
61 total_subpats = 0
62 do i = 1, n
63 total_subpats = total_subpats + count_subpatterns(patterns(i))
64 end do
65
66 allocate(compiled%bm_pats(total_subpats))
67 allocate(subpatterns(total_subpats))
68
69 ! Split patterns on newlines
70 j = 0
71 do i = 1, n
72 call split_pattern_on_newlines(patterns(i), subpatterns, j)
73 end do
74
75 compiled%count = j
76
77 ! Compile each subpattern
78 do i = 1, j
79 plen = pattern_len(subpatterns(i))
80 ! For case-insensitive, convert pattern to lowercase
81 if (opts%ignore_case) then
82 call bm_compile(compiled%bm_pats(i), subpatterns(i)(1:plen), .true.)
83 else
84 call bm_compile(compiled%bm_pats(i), subpatterns(i)(1:plen), .false.)
85 end if
86 end do
87
88 deallocate(subpatterns)
89 compiled%compiled = .true.
90 return
91 end if
92
93 ! Use PCRE for Perl-compatible patterns
94 if (compiled%is_pcre) then
95 allocate(compiled%pcres(n))
96
97 do i = 1, n
98 plen = pattern_len(patterns(i))
99
100 ! Apply -w (word) transformation using PCRE word boundaries
101 if (opts%word_regexp) then
102 pattern = '\b' // patterns(i)(1:plen) // '\b'
103 plen = plen + 4 ! \b and \b
104 ! Apply -x (line) transformation
105 else if (opts%line_regexp) then
106 pattern = '^' // patterns(i)(1:plen) // '$'
107 plen = plen + 2 ! ^ and $
108 else
109 pattern = patterns(i)(1:plen)
110 end if
111
112 call pcre_compile(compiled%pcres(i), pattern(1:plen), opts%ignore_case, ierr)
113 if (ierr /= 0) then
114 compiled%compiled = .false.
115 return
116 end if
117 end do
118
119 compiled%compiled = .true.
120 return
121 end if
122
123 ! Use Thompson NFA for BRE/ERE
124 allocate(compiled%regexes(n))
125
126 is_ere = (opts%pattern_type == PATTERN_ERE)
127
128 do i = 1, n
129 plen = pattern_len(patterns(i))
130
131 ! Apply -w (word) transformation
132 if (opts%word_regexp .and. opts%pattern_type /= PATTERN_FIXED) then
133 pattern = '\<' // patterns(i)(1:plen) // '\>'
134 plen = plen + 4 ! \< and \>
135 ! Apply -x (line) transformation
136 else if (opts%line_regexp .and. opts%pattern_type /= PATTERN_FIXED) then
137 pattern = '^' // patterns(i)(1:plen) // '$'
138 plen = plen + 2 ! ^ and $
139 else
140 pattern = patterns(i)(1:plen)
141 end if
142
143 ! Compile with exact pattern length
144 call regex_compile(compiled%regexes(i), pattern(1:plen), is_ere, ierr)
145 if (ierr /= 0) then
146 compiled%compiled = .false.
147 return
148 end if
149 end do
150
151 compiled%compiled = .true.
152
153 end subroutine compile_patterns
154
155 subroutine free_patterns(compiled)
156 !> Free compiled patterns
157 type(compiled_patterns_t), intent(inout) :: compiled
158 integer :: i
159
160 if (allocated(compiled%regexes)) then
161 do i = 1, compiled%count
162 call regex_free(compiled%regexes(i))
163 end do
164 deallocate(compiled%regexes)
165 end if
166
167 if (allocated(compiled%pcres)) then
168 do i = 1, compiled%count
169 call pcre_free(compiled%pcres(i))
170 end do
171 deallocate(compiled%pcres)
172 end if
173
174 if (allocated(compiled%bm_pats)) then
175 do i = 1, compiled%count
176 call bm_free(compiled%bm_pats(i))
177 end do
178 deallocate(compiled%bm_pats)
179 end if
180
181 compiled%count = 0
182 compiled%compiled = .false.
183 compiled%is_pcre = .false.
184 compiled%is_fixed = .false.
185
186 end subroutine free_patterns
187
188 function match_line(line, patterns, opts, compiled) result(matches)
189 !> Check if line matches any pattern according to options
190 character(len=*), intent(in) :: line
191 character(len=max_pattern_len), intent(in) :: patterns(:)
192 type(grep_options), intent(in) :: opts
193 type(compiled_patterns_t), intent(inout), optional :: compiled ! inout for DFA cache
194 logical :: matches
195
196 integer :: i, num_patterns
197 character(len=:), allocatable :: search_line
198 character(len=max_pattern_len) :: search_pattern
199
200 matches = .false.
201
202 ! Prepare line for searching (for fixed string mode)
203 if (opts%ignore_case .and. opts%pattern_type == PATTERN_FIXED) then
204 search_line = to_lower(line)
205 else
206 search_line = line
207 end if
208
209 ! Determine number of patterns to try
210 ! For -F mode with compiled patterns, use compiled%count (may differ due to newline splitting)
211 if (present(compiled) .and. compiled%compiled .and. compiled%is_fixed) then
212 num_patterns = compiled%count
213 else
214 num_patterns = size(patterns)
215 end if
216
217 ! Try each pattern
218 do i = 1, num_patterns
219 ! Match based on pattern type
220 select case (opts%pattern_type)
221 case (PATTERN_FIXED)
222 ! Use Boyer-Moore if compiled patterns available
223 if (present(compiled) .and. compiled%compiled .and. compiled%is_fixed) then
224 matches = match_fixed_bm(line, compiled%bm_pats(i), opts)
225 else
226 ! Fallback to simple index search
227 if (opts%ignore_case) then
228 search_pattern = to_lower(patterns(i))
229 else
230 search_pattern = patterns(i)
231 end if
232 matches = match_fixed_string(search_line, search_pattern, opts)
233 end if
234
235 case (PATTERN_BRE, PATTERN_ERE)
236 if (present(compiled) .and. compiled%compiled) then
237 matches = regex_match(compiled%regexes(i), line, opts%ignore_case)
238 else
239 ! Fallback if no compiled patterns (shouldn't happen in normal use)
240 matches = match_regex_inline(line, patterns(i), opts)
241 end if
242
243 case (PATTERN_PERL)
244 ! Use PCRE2 for Perl-compatible regular expressions
245 if (present(compiled) .and. compiled%compiled .and. compiled%is_pcre) then
246 ! ignore_case is handled at compile time for PCRE
247 matches = pcre_match(compiled%pcres(i), line)
248 else
249 ! Fallback if PCRE not available (shouldn't happen normally)
250 if (present(compiled) .and. compiled%compiled) then
251 matches = regex_match(compiled%regexes(i), line, opts%ignore_case)
252 else
253 matches = match_fixed_string(search_line, patterns(i), opts)
254 end if
255 end if
256 end select
257
258 if (matches) exit
259 end do
260
261 ! Apply invert match
262 if (opts%invert_match) then
263 matches = .not. matches
264 end if
265
266 end function match_line
267
268 function match_regex_inline(line, pattern, opts) result(matches)
269 !> Compile and match regex inline (less efficient, for fallback)
270 character(len=*), intent(in) :: line
271 character(len=*), intent(in) :: pattern
272 type(grep_options), intent(in) :: opts
273 logical :: matches
274
275 type(regex_t) :: re
276 integer :: ierr
277 logical :: is_ere
278 character(len=max_pattern_len) :: pat
279
280 matches = .false.
281 is_ere = (opts%pattern_type == PATTERN_ERE)
282
283 pat = pattern
284 if (opts%word_regexp) then
285 pat = '\<' // trim(pattern) // '\>'
286 end if
287 if (opts%line_regexp) then
288 pat = '^' // trim(pat) // '$'
289 end if
290
291 call regex_compile(re, trim(pat), is_ere, ierr)
292 if (ierr /= 0) return
293
294 matches = regex_match(re, line, opts%ignore_case)
295 call regex_free(re)
296
297 end function match_regex_inline
298
299 function match_fixed_string(line, pattern, opts) result(matches)
300 !> Fixed string matching (for -F mode)
301 character(len=*), intent(in) :: line
302 character(len=*), intent(in) :: pattern
303 type(grep_options), intent(in) :: opts
304 logical :: matches
305
306 integer :: pos
307 integer :: line_len, pat_len
308
309 matches = .false.
310 line_len = len_trim(line)
311 pat_len = pattern_len(pattern) ! Use pattern_len to preserve whitespace patterns
312
313 if (pat_len == 0) then
314 ! Empty pattern matches everything
315 matches = .true.
316 return
317 end if
318
319 ! Find pattern in line (use exact length, not trim)
320 pos = index(line(1:line_len), pattern(1:pat_len))
321
322 if (pos == 0) return
323
324 ! Check word boundary if -w
325 if (opts%word_regexp) then
326 if (.not. is_word_match(line, pos, pat_len)) return
327 end if
328
329 ! Check line match if -x
330 if (opts%line_regexp) then
331 if (pos /= 1 .or. pat_len /= line_len) return
332 end if
333
334 matches = .true.
335
336 end function match_fixed_string
337
338 function match_fixed_bm(line, bm_pat, opts) result(matches)
339 !> Fixed string matching using Boyer-Moore algorithm
340 character(len=*), intent(in) :: line
341 type(bm_pattern_t), intent(in) :: bm_pat
342 type(grep_options), intent(in) :: opts
343 logical :: matches
344
345 integer :: pos
346 integer :: line_len, pat_len
347
348 matches = .false.
349 line_len = len_trim(line)
350 pat_len = bm_pat%pattern_len
351
352 if (pat_len == 0) then
353 ! Empty pattern matches everything
354 matches = .true.
355 return
356 end if
357
358 ! Find pattern using Boyer-Moore
359 pos = bm_search(line(1:line_len), bm_pat)
360
361 if (pos == 0) return
362
363 ! Check word boundary if -w
364 if (opts%word_regexp) then
365 if (.not. is_word_match(line, pos, pat_len)) return
366 end if
367
368 ! Check line match if -x
369 if (opts%line_regexp) then
370 if (pos /= 1 .or. pat_len /= line_len) return
371 end if
372
373 matches = .true.
374
375 end function match_fixed_bm
376
377 function is_word_match(line, pos, pat_len) result(is_word)
378 !> Check if match at pos is a whole word
379 character(len=*), intent(in) :: line
380 integer, intent(in) :: pos, pat_len
381 logical :: is_word
382
383 integer :: line_len
384 logical :: start_ok, end_ok
385
386 is_word = .false.
387 line_len = len_trim(line)
388
389 ! Check character before match
390 if (pos == 1) then
391 start_ok = .true.
392 else
393 start_ok = .not. is_word_char(line(pos-1:pos-1))
394 end if
395
396 ! Check character after match
397 if (pos + pat_len - 1 >= line_len) then
398 end_ok = .true.
399 else
400 end_ok = .not. is_word_char(line(pos+pat_len:pos+pat_len))
401 end if
402
403 is_word = start_ok .and. end_ok
404
405 end function is_word_match
406
407 pure function is_word_char(c) result(is_word)
408 !> Check if character is a word character (alphanumeric or underscore)
409 character(len=1), intent(in) :: c
410 logical :: is_word
411
412 integer :: ic
413
414 ic = ichar(c)
415 is_word = (ic >= ichar('a') .and. ic <= ichar('z')) .or. &
416 (ic >= ichar('A') .and. ic <= ichar('Z')) .or. &
417 (ic >= ichar('0') .and. ic <= ichar('9')) .or. &
418 (c == '_')
419
420 end function is_word_char
421
422 pure function to_lower(str) result(lower)
423 !> Convert string to lowercase
424 character(len=*), intent(in) :: str
425 character(len=len(str)) :: lower
426
427 integer :: i, ic
428
429 do i = 1, len(str)
430 ic = ichar(str(i:i))
431 if (ic >= ichar('A') .and. ic <= ichar('Z')) then
432 lower(i:i) = char(ic + 32)
433 else
434 lower(i:i) = str(i:i)
435 end if
436 end do
437
438 end function to_lower
439
440 subroutine match_lines_batch(src, batch, patterns, opts, compiled, match_results)
441 !> Match a batch of lines against patterns
442 !> Returns array of match results (true/false for each line)
443 type(input_source), intent(in) :: src
444 type(line_batch_t), intent(in) :: batch
445 character(len=max_pattern_len), intent(in) :: patterns(:)
446 type(grep_options), intent(in) :: opts
447 type(compiled_patterns_t), intent(inout) :: compiled
448 logical, intent(out) :: match_results(BATCH_SIZE)
449
450 integer :: i
451 character(len=:), allocatable :: line
452
453 match_results = .false.
454
455 do i = 1, batch%count
456 ! Extract line text from mmap
457 line = src%get_line_text(batch%lines(i))
458 ! Match this line
459 match_results(i) = match_line(line, patterns, opts, compiled)
460 end do
461
462 end subroutine match_lines_batch
463
464 function can_use_batch_mode(src, opts) result(can_batch)
465 !> Check if we can use optimized batch processing
466 !> Batch mode works for simple cases without context lines or special modes
467 type(input_source), intent(in) :: src
468 type(grep_options), intent(in) :: opts
469 logical :: can_batch
470
471 can_batch = .false.
472
473 ! Must be mmap source (has the file in memory)
474 if (src%source_type /= SOURCE_MMAP) return
475
476 ! Can't use batch with context lines
477 if (opts%before_context > 0 .or. opts%after_context > 0) return
478
479 ! Can't use batch with invert match (need careful line tracking)
480 if (opts%invert_match) return
481
482 ! Can't use batch with only-matching mode
483 if (opts%only_matching) return
484
485 ! Can't use batch with files-without-match
486 if (opts%files_without_match) return
487
488 ! Can't use batch with null-data mode
489 if (opts%null_data) return
490
491 can_batch = .true.
492
493 end function can_use_batch_mode
494
495 function process_source_batch(src, patterns, opts, compiled) result(found_match)
496 !> Process a source using batch mode for improved performance
497 !> This is a fast path for simple search modes
498 type(input_source), intent(inout) :: src
499 character(len=max_pattern_len), intent(in) :: patterns(:)
500 type(grep_options), intent(inout) :: opts
501 type(compiled_patterns_t), intent(inout) :: compiled
502 logical :: found_match
503
504 type(line_batch_t) :: batch
505 logical :: match_results(BATCH_SIZE)
506 character(len=:), allocatable :: line
507 integer :: i, match_count
508 logical :: binary_matched
509
510 ! For color mode
511 integer, parameter :: MAX_MATCHES_PER_LINE = 100
512 integer :: match_starts(MAX_MATCHES_PER_LINE)
513 integer :: match_ends(MAX_MATCHES_PER_LINE)
514 integer :: num_matches
515
516 found_match = .false.
517 match_count = 0
518 binary_matched = .false.
519
520 ! Process batches until EOF
521 do while (src%read_lines_batch(batch))
522 ! Match all lines in batch
523 call match_lines_batch(src, batch, patterns, opts, compiled, match_results)
524
525 ! Process matches
526 do i = 1, batch%count
527 if (match_results(i)) then
528 found_match = .true.
529 match_count = match_count + 1
530
531 ! Handle binary files
532 if (src%is_binary .and. .not. opts%text_mode) then
533 if (.not. binary_matched) then
534 call print_binary_match(src%filename, opts)
535 binary_matched = .true.
536 end if
537 return
538 end if
539
540 ! Handle output modes
541 if (opts%quiet) then
542 return
543 else if (opts%files_with_matches) then
544 call print_filename(src%filename, opts)
545 return
546 else if (.not. opts%count_only) then
547 ! Get line text and print it
548 line = src%get_line_text(batch%lines(i))
549 if (opts%color_mode == COLOR_ALWAYS) then
550 call find_matches(line, patterns, opts, compiled, match_starts, match_ends, num_matches)
551 call print_match_colored(line, src%filename, batch%lines(i)%line_num, &
552 batch%lines(i)%byte_off, opts, match_starts, match_ends, num_matches)
553 else
554 call print_match(line, src%filename, batch%lines(i)%line_num, &
555 batch%lines(i)%byte_off, opts)
556 end if
557 end if
558
559 ! Check max count
560 if (opts%max_count > 0 .and. match_count >= opts%max_count) then
561 if (opts%count_only) then
562 call print_count(match_count, src%filename, opts)
563 end if
564 return
565 end if
566 end if
567 end do
568 end do
569
570 ! Handle count mode
571 if (opts%count_only) then
572 ! When -l is also set, only print filename for files with matches
573 if (opts%files_with_matches) then
574 if (match_count > 0) then
575 call print_filename(src%filename, opts)
576 end if
577 else
578 call print_count(match_count, src%filename, opts)
579 end if
580 end if
581
582 end function process_source_batch
583
584 subroutine find_matches(line, patterns, opts, compiled, match_starts, match_ends, num_matches)
585 !> Find all matches in a line, returning their positions
586 !> For -o mode, this finds all non-overlapping matches
587 character(len=*), intent(in) :: line
588 character(len=max_pattern_len), intent(in) :: patterns(:)
589 type(grep_options), intent(in) :: opts
590 type(compiled_patterns_t), intent(inout), optional :: compiled ! inout for DFA cache
591 integer, intent(out) :: match_starts(:), match_ends(:)
592 integer, intent(out) :: num_matches
593
594 integer :: i, pos, line_len, pat_len
595 type(match_result_t) :: res
596 type(pcre_match_result_t) :: pcre_res
597 character(len=:), allocatable :: search_line
598 character(len=max_pattern_len) :: search_pattern
599
600 num_matches = 0
601 line_len = len_trim(line)
602 if (line_len == 0) return
603
604 ! For fixed string mode
605 if (opts%pattern_type == PATTERN_FIXED) then
606 if (opts%ignore_case) then
607 search_line = to_lower(line)
608 else
609 search_line = line
610 end if
611
612 do i = 1, size(patterns)
613 ! Get pattern length (preserving whitespace patterns)
614 pat_len = pattern_len(patterns(i))
615 if (pat_len == 0) cycle
616
617 if (opts%ignore_case) then
618 search_pattern = to_lower(patterns(i)(1:pat_len))
619 else
620 search_pattern = patterns(i)(1:pat_len)
621 end if
622
623 pos = 1
624 do while (pos <= line_len)
625 pos = index(search_line(pos:line_len), search_pattern(1:pat_len))
626 if (pos == 0) exit
627
628 ! Adjust for substring offset
629 pos = pos + (pos - 1)
630 if (pos > line_len) exit
631
632 ! Record match
633 if (num_matches < size(match_starts)) then
634 num_matches = num_matches + 1
635 match_starts(num_matches) = pos
636 match_ends(num_matches) = pos + pat_len - 1
637 end if
638
639 ! Move past this match
640 pos = pos + pat_len
641 end do
642 end do
643 return
644 end if
645
646 ! For PCRE mode
647 if (opts%pattern_type == PATTERN_PERL) then
648 if (.not. present(compiled) .or. .not. compiled%compiled .or. .not. compiled%is_pcre) return
649
650 do i = 1, size(patterns)
651 pos = 1
652 do while (pos <= line_len)
653 pcre_res = pcre_search(compiled%pcres(i), line, start_offset=pos)
654 if (.not. pcre_res%matched) exit
655
656 ! Record match
657 if (num_matches < size(match_starts)) then
658 num_matches = num_matches + 1
659 match_starts(num_matches) = pcre_res%match_start
660 match_ends(num_matches) = pcre_res%match_end
661 end if
662
663 ! Move past this match (at least 1 char to avoid infinite loop)
664 if (pcre_res%match_end >= pcre_res%match_start) then
665 pos = pcre_res%match_end + 1
666 else
667 pos = pos + 1 ! Empty match, advance by 1
668 end if
669 end do
670 end do
671 return
672 end if
673
674 ! For BRE/ERE regex mode - try each pattern
675 do i = 1, size(patterns)
676 if (.not. present(compiled) .or. .not. compiled%compiled) cycle
677
678 pos = 1
679 do while (pos <= line_len)
680 res = regex_search(compiled%regexes(i), line(pos:), opts%ignore_case)
681 if (.not. res%matched) exit
682
683 ! Record match (adjust for substring offset)
684 if (num_matches < size(match_starts)) then
685 num_matches = num_matches + 1
686 match_starts(num_matches) = pos + res%match_start - 1
687 match_ends(num_matches) = pos + res%match_end - 1
688 end if
689
690 ! Move past this match (at least 1 char to avoid infinite loop)
691 if (res%match_end >= res%match_start) then
692 pos = pos + res%match_end
693 else
694 pos = pos + 1 ! Empty match, advance by 1
695 end if
696 end do
697 end do
698
699 end subroutine find_matches
700
701 function process_source(src, patterns, opts, compiled) result(found_match)
702 !> Process a single input source, return true if any matches found
703 !> Thread-safe: all buffers are locally allocated (no SAVE variables)
704 type(input_source), intent(inout) :: src
705 character(len=max_pattern_len), intent(in) :: patterns(:)
706 type(grep_options), intent(inout) :: opts
707 type(compiled_patterns_t), intent(inout), optional :: compiled ! inout for DFA cache
708 logical :: found_match
709
710 character(len=:), allocatable :: line
711 integer :: line_num
712 integer(i64) :: byte_off
713 integer :: match_count
714 logical :: line_matched
715 logical :: binary_matched
716
717 ! For -o mode
718 integer, parameter :: MAX_MATCHES_PER_LINE = 100
719 integer :: match_starts(MAX_MATCHES_PER_LINE)
720 integer :: match_ends(MAX_MATCHES_PER_LINE)
721 integer :: num_matches, j
722
723 ! For context lines - dynamically allocated (thread-safe)
724 type(context_entry_t), allocatable :: before_buffer(:)
725 integer :: buf_start, buf_count, buf_idx
726 integer :: after_remaining ! Lines of after-context still to print
727 integer :: last_printed_line ! Last line number we printed
728 logical :: need_separator ! Need to print -- before next output
729 logical :: use_context
730 integer :: k
731 integer(i64) :: file_size
732
733 ! Calculate line number width for -T alignment (based on file size)
734 if (opts%initial_tab) then
735 if (src%source_type == SOURCE_MMAP .and. src%mmap_file%size > 0) then
736 file_size = src%mmap_file%size
737 if (file_size < 10) then
738 opts%line_number_width = 1
739 else if (file_size < 100) then
740 opts%line_number_width = 2
741 else if (file_size < 1000) then
742 opts%line_number_width = 3
743 else if (file_size < 10000) then
744 opts%line_number_width = 4
745 else if (file_size < 100000) then
746 opts%line_number_width = 5
747 else if (file_size < 1000000) then
748 opts%line_number_width = 6
749 else
750 opts%line_number_width = 7
751 end if
752 else
753 ! Stdin or unknown: use large default for alignment
754 opts%line_number_width = 7
755 end if
756 end if
757
758 ! Try optimized batch mode for simple cases
759 if (present(compiled) .and. compiled%compiled .and. can_use_batch_mode(src, opts)) then
760 found_match = process_source_batch(src, patterns, opts, compiled)
761 return
762 end if
763
764 found_match = .false.
765 match_count = 0
766 binary_matched = .false.
767 buf_start = 1
768 buf_count = 0
769 after_remaining = 0
770 last_printed_line = 0
771 need_separator = .false.
772 use_context = (opts%before_context > 0 .or. opts%after_context > 0)
773
774 ! Allocate context buffer if needed
775 if (use_context .and. opts%before_context > 0) then
776 allocate(before_buffer(opts%before_context))
777 end if
778
779 ! Process lines (line-by-line fallback)
780 do
781 ! Read next line with dynamic allocation (no length limit)
782 if (opts%null_data) then
783 if (.not. src%read_line_null_dynamic(line, line_num, byte_off)) exit
784 else
785 if (.not. src%read_line_dynamic(line, line_num, byte_off)) exit
786 end if
787
788 if (present(compiled)) then
789 line_matched = match_line(line, patterns, opts, compiled)
790 else
791 line_matched = match_line(line, patterns, opts)
792 end if
793
794 if (line_matched) then
795 found_match = .true.
796 match_count = match_count + 1
797
798 ! Handle binary files
799 if (src%is_binary .and. .not. opts%text_mode) then
800 if (.not. binary_matched) then
801 call print_binary_match(src%filename, opts)
802 binary_matched = .true.
803 end if
804 exit
805 end if
806
807 ! Handle different output modes
808 if (opts%quiet) then
809 if (allocated(before_buffer)) deallocate(before_buffer)
810 return
811 else if (opts%files_with_matches) then
812 call print_filename(src%filename, opts)
813 if (allocated(before_buffer)) deallocate(before_buffer)
814 return
815 else if (opts%only_matching .and. .not. opts%count_only) then
816 ! -o mode: print each match (but -c takes priority)
817 if (present(compiled)) then
818 call find_matches(line, patterns, opts, compiled, match_starts, match_ends, num_matches)
819 else
820 call find_matches(line, patterns, opts, match_starts=match_starts, &
821 match_ends=match_ends, num_matches=num_matches)
822 end if
823 do j = 1, num_matches
824 call print_only_match(line, match_starts(j), match_ends(j), &
825 src%filename, line_num, byte_off, opts)
826 end do
827 else if (.not. opts%count_only .and. .not. opts%files_without_match) then
828 ! Context and normal mode
829 if (use_context) then
830 ! Determine first line we'll print (for separator check)
831 if (buf_count > 0 .and. opts%before_context > 0) then
832 buf_idx = mod(buf_start - 1, buf_count) + 1
833 k = before_buffer(buf_idx)%line_num ! First buffered line number
834 else
835 k = line_num
836 end if
837
838 ! Print separator if there's a gap between context groups
839 if (need_separator .and. last_printed_line > 0 .and. &
840 k > last_printed_line + 1) then
841 call print_separator(opts)
842 end if
843 need_separator = .true.
844
845 ! Print before-context from buffer (in correct order)
846 if (buf_count > 0 .and. opts%before_context > 0) then
847 do k = 0, buf_count - 1
848 ! Read from buffer in order: oldest to newest
849 buf_idx = mod(buf_start + k - 1, buf_count) + 1
850 if (before_buffer(buf_idx)%line_num > last_printed_line) then
851 call print_context_line(before_buffer(buf_idx)%text, src%filename, &
852 before_buffer(buf_idx)%line_num, before_buffer(buf_idx)%byte_off, opts)
853 last_printed_line = before_buffer(buf_idx)%line_num
854 end if
855 end do
856 ! Clear buffer after printing
857 buf_count = 0
858 buf_start = 1
859 end if
860 end if
861
862 ! Print the matching line
863 if (line_num > last_printed_line) then
864 ! Get match positions for color highlighting
865 if (opts%color_mode == COLOR_ALWAYS) then
866 if (present(compiled)) then
867 call find_matches(line, patterns, opts, compiled, match_starts, match_ends, num_matches)
868 else
869 call find_matches(line, patterns, opts, match_starts=match_starts, &
870 match_ends=match_ends, num_matches=num_matches)
871 end if
872 call print_match_colored(line, src%filename, line_num, byte_off, opts, &
873 match_starts, match_ends, num_matches)
874 else
875 call print_match(line, src%filename, line_num, byte_off, opts)
876 end if
877 last_printed_line = line_num
878 end if
879
880 ! Reset after-context counter
881 after_remaining = opts%after_context
882 end if
883
884 ! Check max count
885 if (opts%max_count > 0 .and. match_count >= opts%max_count) then
886 exit
887 end if
888
889 else
890 ! Non-matching line
891 if (use_context .and. .not. opts%count_only .and. .not. opts%quiet .and. &
892 .not. opts%files_with_matches .and. .not. opts%files_without_match .and. &
893 .not. opts%only_matching) then
894
895 ! Print as after-context if needed
896 if (after_remaining > 0 .and. line_num > last_printed_line) then
897 call print_context_line(line, src%filename, line_num, byte_off, opts)
898 last_printed_line = line_num
899 after_remaining = after_remaining - 1
900 else if (opts%before_context > 0) then
901 ! Store in before-context buffer (circular buffer)
902 if (buf_count < opts%before_context) then
903 buf_count = buf_count + 1
904 buf_idx = buf_count
905 else
906 ! Buffer is full, overwrite oldest entry
907 buf_idx = buf_start
908 buf_start = mod(buf_start, opts%before_context) + 1
909 end if
910 before_buffer(buf_idx)%text = line
911 before_buffer(buf_idx)%line_num = line_num
912 before_buffer(buf_idx)%byte_off = byte_off
913 end if
914 end if
915 end if
916 end do
917
918 ! Handle -c (count) mode
919 if (opts%count_only) then
920 ! When -l is also set, only print filename for files with matches
921 if (opts%files_with_matches) then
922 if (match_count > 0) then
923 call print_filename(src%filename, opts)
924 end if
925 else
926 call print_count(match_count, src%filename, opts)
927 end if
928 end if
929
930 ! Handle -L (files without match) mode
931 if (opts%files_without_match .and. .not. found_match) then
932 call print_filename(src%filename, opts)
933 end if
934
935 ! Clean up
936 if (allocated(before_buffer)) deallocate(before_buffer)
937
938 end function process_source
939
940 function count_subpatterns(pattern) result(count)
941 !> Count the number of subpatterns when splitting on newlines
942 !> Returns at least 1 for any non-empty pattern
943 character(len=*), intent(in) :: pattern
944 integer :: count
945
946 integer :: i, plen
947
948 plen = pattern_len(pattern)
949 if (plen == 0) then
950 count = 0
951 return
952 end if
953
954 count = 1
955 do i = 1, plen
956 if (pattern(i:i) == char(10)) then
957 count = count + 1
958 end if
959 end do
960
961 end function count_subpatterns
962
963 subroutine split_pattern_on_newlines(pattern, subpatterns, next_idx)
964 !> Split a pattern on newline characters into multiple subpatterns
965 !> Appends subpatterns to the array starting at next_idx+1
966 character(len=*), intent(in) :: pattern
967 character(len=max_pattern_len), intent(inout) :: subpatterns(:)
968 integer, intent(inout) :: next_idx
969
970 integer :: i, plen, start_pos, subpat_len
971
972 plen = pattern_len(pattern)
973 if (plen == 0) return
974
975 start_pos = 1
976 do i = 1, plen
977 if (pattern(i:i) == char(10)) then
978 ! Found a newline - extract subpattern
979 subpat_len = i - start_pos
980 if (subpat_len > 0) then
981 next_idx = next_idx + 1
982 subpatterns(next_idx) = pattern(start_pos:i-1)
983 ! Add null terminator
984 subpatterns(next_idx)(subpat_len+1:subpat_len+1) = char(0)
985 else
986 ! Empty subpattern (consecutive newlines or newline at start)
987 next_idx = next_idx + 1
988 subpatterns(next_idx) = char(0)
989 end if
990 start_pos = i + 1
991 end if
992 end do
993
994 ! Handle last segment (after final newline or entire string if no newlines)
995 if (start_pos <= plen) then
996 subpat_len = plen - start_pos + 1
997 next_idx = next_idx + 1
998 subpatterns(next_idx) = pattern(start_pos:plen)
999 ! Add null terminator
1000 if (subpat_len < max_pattern_len) then
1001 subpatterns(next_idx)(subpat_len+1:subpat_len+1) = char(0)
1002 end if
1003 end if
1004
1005 end subroutine split_pattern_on_newlines
1006
1007 end module ferp_matcher
1008