@@ -43,9 +43,10 @@ contains |
| 43 | 43 | type(compiled_patterns_t), intent(out) :: compiled |
| 44 | 44 | integer, intent(out) :: ierr |
| 45 | 45 | |
| 46 | | - integer :: i, n, plen |
| 46 | + integer :: i, j, n, plen, total_subpats, subpat_count |
| 47 | 47 | logical :: is_ere |
| 48 | 48 | character(len=max_pattern_len) :: pattern |
| 49 | + character(len=max_pattern_len), allocatable :: subpatterns(:) |
| 49 | 50 | |
| 50 | 51 | ierr = 0 |
| 51 | 52 | n = size(patterns) |
@@ -55,18 +56,36 @@ contains |
| 55 | 56 | |
| 56 | 57 | ! Use Boyer-Moore for fixed string patterns |
| 57 | 58 | if (compiled%is_fixed) then |
| 58 | | - allocate(compiled%bm_pats(n)) |
| 59 | + ! For -F mode, patterns containing newlines should be split into multiple patterns |
| 60 | + ! First count total subpatterns |
| 61 | + total_subpats = 0 |
| 62 | + do i = 1, n |
| 63 | + total_subpats = total_subpats + count_subpatterns(patterns(i)) |
| 64 | + end do |
| 65 | + |
| 66 | + allocate(compiled%bm_pats(total_subpats)) |
| 67 | + allocate(subpatterns(total_subpats)) |
| 59 | 68 | |
| 69 | + ! Split patterns on newlines |
| 70 | + j = 0 |
| 60 | 71 | do i = 1, n |
| 61 | | - plen = pattern_len(patterns(i)) |
| 72 | + call split_pattern_on_newlines(patterns(i), subpatterns, j) |
| 73 | + end do |
| 74 | + |
| 75 | + compiled%count = j |
| 76 | + |
| 77 | + ! Compile each subpattern |
| 78 | + do i = 1, j |
| 79 | + plen = pattern_len(subpatterns(i)) |
| 62 | 80 | ! For case-insensitive, convert pattern to lowercase |
| 63 | 81 | if (opts%ignore_case) then |
| 64 | | - call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .true.) |
| 82 | + call bm_compile(compiled%bm_pats(i), subpatterns(i)(1:plen), .true.) |
| 65 | 83 | else |
| 66 | | - call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .false.) |
| 84 | + call bm_compile(compiled%bm_pats(i), subpatterns(i)(1:plen), .false.) |
| 67 | 85 | end if |
| 68 | 86 | end do |
| 69 | 87 | |
| 88 | + deallocate(subpatterns) |
| 70 | 89 | compiled%compiled = .true. |
| 71 | 90 | return |
| 72 | 91 | end if |
@@ -174,7 +193,7 @@ contains |
| 174 | 193 | type(compiled_patterns_t), intent(inout), optional :: compiled ! inout for DFA cache |
| 175 | 194 | logical :: matches |
| 176 | 195 | |
| 177 | | - integer :: i |
| 196 | + integer :: i, num_patterns |
| 178 | 197 | character(len=:), allocatable :: search_line |
| 179 | 198 | character(len=max_pattern_len) :: search_pattern |
| 180 | 199 | |
@@ -187,8 +206,16 @@ contains |
| 187 | 206 | search_line = line |
| 188 | 207 | end if |
| 189 | 208 | |
| 209 | + ! Determine number of patterns to try |
| 210 | + ! For -F mode with compiled patterns, use compiled%count (may differ due to newline splitting) |
| 211 | + if (present(compiled) .and. compiled%compiled .and. compiled%is_fixed) then |
| 212 | + num_patterns = compiled%count |
| 213 | + else |
| 214 | + num_patterns = size(patterns) |
| 215 | + end if |
| 216 | + |
| 190 | 217 | ! Try each pattern |
| 191 | | - do i = 1, size(patterns) |
| 218 | + do i = 1, num_patterns |
| 192 | 219 | ! Match based on pattern type |
| 193 | 220 | select case (opts%pattern_type) |
| 194 | 221 | case (PATTERN_FIXED) |
@@ -542,7 +569,14 @@ contains |
| 542 | 569 | |
| 543 | 570 | ! Handle count mode |
| 544 | 571 | if (opts%count_only) then |
| 545 | | - call print_count(match_count, src%filename, opts) |
| 572 | + ! When -l is also set, only print filename for files with matches |
| 573 | + if (opts%files_with_matches) then |
| 574 | + if (match_count > 0) then |
| 575 | + call print_filename(src%filename, opts) |
| 576 | + end if |
| 577 | + else |
| 578 | + call print_count(match_count, src%filename, opts) |
| 579 | + end if |
| 546 | 580 | end if |
| 547 | 581 | |
| 548 | 582 | end function process_source_batch |
@@ -694,6 +728,32 @@ contains |
| 694 | 728 | logical :: need_separator ! Need to print -- before next output |
| 695 | 729 | logical :: use_context |
| 696 | 730 | integer :: k |
| 731 | + integer(i64) :: file_size |
| 732 | + |
| 733 | + ! Calculate line number width for -T alignment (based on file size) |
| 734 | + if (opts%initial_tab) then |
| 735 | + if (src%source_type == SOURCE_MMAP .and. src%mmap_file%size > 0) then |
| 736 | + file_size = src%mmap_file%size |
| 737 | + if (file_size < 10) then |
| 738 | + opts%line_number_width = 1 |
| 739 | + else if (file_size < 100) then |
| 740 | + opts%line_number_width = 2 |
| 741 | + else if (file_size < 1000) then |
| 742 | + opts%line_number_width = 3 |
| 743 | + else if (file_size < 10000) then |
| 744 | + opts%line_number_width = 4 |
| 745 | + else if (file_size < 100000) then |
| 746 | + opts%line_number_width = 5 |
| 747 | + else if (file_size < 1000000) then |
| 748 | + opts%line_number_width = 6 |
| 749 | + else |
| 750 | + opts%line_number_width = 7 |
| 751 | + end if |
| 752 | + else |
| 753 | + ! Stdin or unknown: use large default for alignment |
| 754 | + opts%line_number_width = 7 |
| 755 | + end if |
| 756 | + end if |
| 697 | 757 | |
| 698 | 758 | ! Try optimized batch mode for simple cases |
| 699 | 759 | if (present(compiled) .and. compiled%compiled .and. can_use_batch_mode(src, opts)) then |
@@ -752,7 +812,8 @@ contains |
| 752 | 812 | call print_filename(src%filename, opts) |
| 753 | 813 | if (allocated(before_buffer)) deallocate(before_buffer) |
| 754 | 814 | return |
| 755 | | - else if (opts%only_matching) then |
| 815 | + else if (opts%only_matching .and. .not. opts%count_only) then |
| 816 | + ! -o mode: print each match (but -c takes priority) |
| 756 | 817 | if (present(compiled)) then |
| 757 | 818 | call find_matches(line, patterns, opts, compiled, match_starts, match_ends, num_matches) |
| 758 | 819 | else |
@@ -856,7 +917,14 @@ contains |
| 856 | 917 | |
| 857 | 918 | ! Handle -c (count) mode |
| 858 | 919 | if (opts%count_only) then |
| 859 | | - call print_count(match_count, src%filename, opts) |
| 920 | + ! When -l is also set, only print filename for files with matches |
| 921 | + if (opts%files_with_matches) then |
| 922 | + if (match_count > 0) then |
| 923 | + call print_filename(src%filename, opts) |
| 924 | + end if |
| 925 | + else |
| 926 | + call print_count(match_count, src%filename, opts) |
| 927 | + end if |
| 860 | 928 | end if |
| 861 | 929 | |
| 862 | 930 | ! Handle -L (files without match) mode |
@@ -869,4 +937,71 @@ contains |
| 869 | 937 | |
| 870 | 938 | end function process_source |
| 871 | 939 | |
| 940 | + function count_subpatterns(pattern) result(count) |
| 941 | + !> Count the number of subpatterns when splitting on newlines |
| 942 | + !> Returns at least 1 for any non-empty pattern |
| 943 | + character(len=*), intent(in) :: pattern |
| 944 | + integer :: count |
| 945 | + |
| 946 | + integer :: i, plen |
| 947 | + |
| 948 | + plen = pattern_len(pattern) |
| 949 | + if (plen == 0) then |
| 950 | + count = 0 |
| 951 | + return |
| 952 | + end if |
| 953 | + |
| 954 | + count = 1 |
| 955 | + do i = 1, plen |
| 956 | + if (pattern(i:i) == char(10)) then |
| 957 | + count = count + 1 |
| 958 | + end if |
| 959 | + end do |
| 960 | + |
| 961 | + end function count_subpatterns |
| 962 | + |
| 963 | + subroutine split_pattern_on_newlines(pattern, subpatterns, next_idx) |
| 964 | + !> Split a pattern on newline characters into multiple subpatterns |
| 965 | + !> Appends subpatterns to the array starting at next_idx+1 |
| 966 | + character(len=*), intent(in) :: pattern |
| 967 | + character(len=max_pattern_len), intent(inout) :: subpatterns(:) |
| 968 | + integer, intent(inout) :: next_idx |
| 969 | + |
| 970 | + integer :: i, plen, start_pos, subpat_len |
| 971 | + |
| 972 | + plen = pattern_len(pattern) |
| 973 | + if (plen == 0) return |
| 974 | + |
| 975 | + start_pos = 1 |
| 976 | + do i = 1, plen |
| 977 | + if (pattern(i:i) == char(10)) then |
| 978 | + ! Found a newline - extract subpattern |
| 979 | + subpat_len = i - start_pos |
| 980 | + if (subpat_len > 0) then |
| 981 | + next_idx = next_idx + 1 |
| 982 | + subpatterns(next_idx) = pattern(start_pos:i-1) |
| 983 | + ! Add null terminator |
| 984 | + subpatterns(next_idx)(subpat_len+1:subpat_len+1) = char(0) |
| 985 | + else |
| 986 | + ! Empty subpattern (consecutive newlines or newline at start) |
| 987 | + next_idx = next_idx + 1 |
| 988 | + subpatterns(next_idx) = char(0) |
| 989 | + end if |
| 990 | + start_pos = i + 1 |
| 991 | + end if |
| 992 | + end do |
| 993 | + |
| 994 | + ! Handle last segment (after final newline or entire string if no newlines) |
| 995 | + if (start_pos <= plen) then |
| 996 | + subpat_len = plen - start_pos + 1 |
| 997 | + next_idx = next_idx + 1 |
| 998 | + subpatterns(next_idx) = pattern(start_pos:plen) |
| 999 | + ! Add null terminator |
| 1000 | + if (subpat_len < max_pattern_len) then |
| 1001 | + subpatterns(next_idx)(subpat_len+1:subpat_len+1) = char(0) |
| 1002 | + end if |
| 1003 | + end if |
| 1004 | + |
| 1005 | + end subroutine split_pattern_on_newlines |
| 1006 | + |
| 872 | 1007 | end module ferp_matcher |