@@ -43,9 +43,10 @@ contains |
| 43 | type(compiled_patterns_t), intent(out) :: compiled | 43 | type(compiled_patterns_t), intent(out) :: compiled |
| 44 | integer, intent(out) :: ierr | 44 | integer, intent(out) :: ierr |
| 45 | | 45 | |
| 46 | - integer :: i, n, plen | 46 | + integer :: i, j, n, plen, total_subpats, subpat_count |
| 47 | logical :: is_ere | 47 | logical :: is_ere |
| 48 | character(len=max_pattern_len) :: pattern | 48 | character(len=max_pattern_len) :: pattern |
| | 49 | + character(len=max_pattern_len), allocatable :: subpatterns(:) |
| 49 | | 50 | |
| 50 | ierr = 0 | 51 | ierr = 0 |
| 51 | n = size(patterns) | 52 | n = size(patterns) |
@@ -55,18 +56,36 @@ contains |
| 55 | | 56 | |
| 56 | ! Use Boyer-Moore for fixed string patterns | 57 | ! Use Boyer-Moore for fixed string patterns |
| 57 | if (compiled%is_fixed) then | 58 | if (compiled%is_fixed) then |
| 58 | - allocate(compiled%bm_pats(n)) | 59 | + ! For -F mode, patterns containing newlines should be split into multiple patterns |
| | 60 | + ! First count total subpatterns |
| | 61 | + total_subpats = 0 |
| | 62 | + do i = 1, n |
| | 63 | + total_subpats = total_subpats + count_subpatterns(patterns(i)) |
| | 64 | + end do |
| | 65 | + |
| | 66 | + allocate(compiled%bm_pats(total_subpats)) |
| | 67 | + allocate(subpatterns(total_subpats)) |
| 59 | | 68 | |
| | 69 | + ! Split patterns on newlines |
| | 70 | + j = 0 |
| 60 | do i = 1, n | 71 | do i = 1, n |
| 61 | - plen = pattern_len(patterns(i)) | 72 | + call split_pattern_on_newlines(patterns(i), subpatterns, j) |
| | 73 | + end do |
| | 74 | + |
| | 75 | + compiled%count = j |
| | 76 | + |
| | 77 | + ! Compile each subpattern |
| | 78 | + do i = 1, j |
| | 79 | + plen = pattern_len(subpatterns(i)) |
| 62 | ! For case-insensitive, convert pattern to lowercase | 80 | ! For case-insensitive, convert pattern to lowercase |
| 63 | if (opts%ignore_case) then | 81 | if (opts%ignore_case) then |
| 64 | - call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .true.) | 82 | + call bm_compile(compiled%bm_pats(i), subpatterns(i)(1:plen), .true.) |
| 65 | else | 83 | else |
| 66 | - call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .false.) | 84 | + call bm_compile(compiled%bm_pats(i), subpatterns(i)(1:plen), .false.) |
| 67 | end if | 85 | end if |
| 68 | end do | 86 | end do |
| 69 | | 87 | |
| | 88 | + deallocate(subpatterns) |
| 70 | compiled%compiled = .true. | 89 | compiled%compiled = .true. |
| 71 | return | 90 | return |
| 72 | end if | 91 | end if |
@@ -174,7 +193,7 @@ contains |
| 174 | type(compiled_patterns_t), intent(inout), optional :: compiled ! inout for DFA cache | 193 | type(compiled_patterns_t), intent(inout), optional :: compiled ! inout for DFA cache |
| 175 | logical :: matches | 194 | logical :: matches |
| 176 | | 195 | |
| 177 | - integer :: i | 196 | + integer :: i, num_patterns |
| 178 | character(len=:), allocatable :: search_line | 197 | character(len=:), allocatable :: search_line |
| 179 | character(len=max_pattern_len) :: search_pattern | 198 | character(len=max_pattern_len) :: search_pattern |
| 180 | | 199 | |
@@ -187,8 +206,16 @@ contains |
| 187 | search_line = line | 206 | search_line = line |
| 188 | end if | 207 | end if |
| 189 | | 208 | |
| | 209 | + ! Determine number of patterns to try |
| | 210 | + ! For -F mode with compiled patterns, use compiled%count (may differ due to newline splitting) |
| | 211 | + if (present(compiled) .and. compiled%compiled .and. compiled%is_fixed) then |
| | 212 | + num_patterns = compiled%count |
| | 213 | + else |
| | 214 | + num_patterns = size(patterns) |
| | 215 | + end if |
| | 216 | + |
| 190 | ! Try each pattern | 217 | ! Try each pattern |
| 191 | - do i = 1, size(patterns) | 218 | + do i = 1, num_patterns |
| 192 | ! Match based on pattern type | 219 | ! Match based on pattern type |
| 193 | select case (opts%pattern_type) | 220 | select case (opts%pattern_type) |
| 194 | case (PATTERN_FIXED) | 221 | case (PATTERN_FIXED) |
@@ -542,7 +569,14 @@ contains |
| 542 | | 569 | |
| 543 | ! Handle count mode | 570 | ! Handle count mode |
| 544 | if (opts%count_only) then | 571 | if (opts%count_only) then |
| 545 | - call print_count(match_count, src%filename, opts) | 572 | + ! When -l is also set, only print filename for files with matches |
| | 573 | + if (opts%files_with_matches) then |
| | 574 | + if (match_count > 0) then |
| | 575 | + call print_filename(src%filename, opts) |
| | 576 | + end if |
| | 577 | + else |
| | 578 | + call print_count(match_count, src%filename, opts) |
| | 579 | + end if |
| 546 | end if | 580 | end if |
| 547 | | 581 | |
| 548 | end function process_source_batch | 582 | end function process_source_batch |
@@ -694,6 +728,32 @@ contains |
| 694 | logical :: need_separator ! Need to print -- before next output | 728 | logical :: need_separator ! Need to print -- before next output |
| 695 | logical :: use_context | 729 | logical :: use_context |
| 696 | integer :: k | 730 | integer :: k |
| | 731 | + integer(i64) :: file_size |
| | 732 | + |
| | 733 | + ! Calculate line number width for -T alignment (based on file size) |
| | 734 | + if (opts%initial_tab) then |
| | 735 | + if (src%source_type == SOURCE_MMAP .and. src%mmap_file%size > 0) then |
| | 736 | + file_size = src%mmap_file%size |
| | 737 | + if (file_size < 10) then |
| | 738 | + opts%line_number_width = 1 |
| | 739 | + else if (file_size < 100) then |
| | 740 | + opts%line_number_width = 2 |
| | 741 | + else if (file_size < 1000) then |
| | 742 | + opts%line_number_width = 3 |
| | 743 | + else if (file_size < 10000) then |
| | 744 | + opts%line_number_width = 4 |
| | 745 | + else if (file_size < 100000) then |
| | 746 | + opts%line_number_width = 5 |
| | 747 | + else if (file_size < 1000000) then |
| | 748 | + opts%line_number_width = 6 |
| | 749 | + else |
| | 750 | + opts%line_number_width = 7 |
| | 751 | + end if |
| | 752 | + else |
| | 753 | + ! Stdin or unknown: use large default for alignment |
| | 754 | + opts%line_number_width = 7 |
| | 755 | + end if |
| | 756 | + end if |
| 697 | | 757 | |
| 698 | ! Try optimized batch mode for simple cases | 758 | ! Try optimized batch mode for simple cases |
| 699 | if (present(compiled) .and. compiled%compiled .and. can_use_batch_mode(src, opts)) then | 759 | if (present(compiled) .and. compiled%compiled .and. can_use_batch_mode(src, opts)) then |
@@ -752,7 +812,8 @@ contains |
| 752 | call print_filename(src%filename, opts) | 812 | call print_filename(src%filename, opts) |
| 753 | if (allocated(before_buffer)) deallocate(before_buffer) | 813 | if (allocated(before_buffer)) deallocate(before_buffer) |
| 754 | return | 814 | return |
| 755 | - else if (opts%only_matching) then | 815 | + else if (opts%only_matching .and. .not. opts%count_only) then |
| | 816 | + ! -o mode: print each match (but -c takes priority) |
| 756 | if (present(compiled)) then | 817 | if (present(compiled)) then |
| 757 | call find_matches(line, patterns, opts, compiled, match_starts, match_ends, num_matches) | 818 | call find_matches(line, patterns, opts, compiled, match_starts, match_ends, num_matches) |
| 758 | else | 819 | else |
@@ -856,7 +917,14 @@ contains |
| 856 | | 917 | |
| 857 | ! Handle -c (count) mode | 918 | ! Handle -c (count) mode |
| 858 | if (opts%count_only) then | 919 | if (opts%count_only) then |
| 859 | - call print_count(match_count, src%filename, opts) | 920 | + ! When -l is also set, only print filename for files with matches |
| | 921 | + if (opts%files_with_matches) then |
| | 922 | + if (match_count > 0) then |
| | 923 | + call print_filename(src%filename, opts) |
| | 924 | + end if |
| | 925 | + else |
| | 926 | + call print_count(match_count, src%filename, opts) |
| | 927 | + end if |
| 860 | end if | 928 | end if |
| 861 | | 929 | |
| 862 | ! Handle -L (files without match) mode | 930 | ! Handle -L (files without match) mode |
@@ -869,4 +937,71 @@ contains |
| 869 | | 937 | |
| 870 | end function process_source | 938 | end function process_source |
| 871 | | 939 | |
| | 940 | + function count_subpatterns(pattern) result(count) |
| | 941 | + !> Count the number of subpatterns when splitting on newlines |
| | 942 | + !> Returns at least 1 for any non-empty pattern |
| | 943 | + character(len=*), intent(in) :: pattern |
| | 944 | + integer :: count |
| | 945 | + |
| | 946 | + integer :: i, plen |
| | 947 | + |
| | 948 | + plen = pattern_len(pattern) |
| | 949 | + if (plen == 0) then |
| | 950 | + count = 0 |
| | 951 | + return |
| | 952 | + end if |
| | 953 | + |
| | 954 | + count = 1 |
| | 955 | + do i = 1, plen |
| | 956 | + if (pattern(i:i) == char(10)) then |
| | 957 | + count = count + 1 |
| | 958 | + end if |
| | 959 | + end do |
| | 960 | + |
| | 961 | + end function count_subpatterns |
| | 962 | + |
| | 963 | + subroutine split_pattern_on_newlines(pattern, subpatterns, next_idx) |
| | 964 | + !> Split a pattern on newline characters into multiple subpatterns |
| | 965 | + !> Appends subpatterns to the array starting at next_idx+1 |
| | 966 | + character(len=*), intent(in) :: pattern |
| | 967 | + character(len=max_pattern_len), intent(inout) :: subpatterns(:) |
| | 968 | + integer, intent(inout) :: next_idx |
| | 969 | + |
| | 970 | + integer :: i, plen, start_pos, subpat_len |
| | 971 | + |
| | 972 | + plen = pattern_len(pattern) |
| | 973 | + if (plen == 0) return |
| | 974 | + |
| | 975 | + start_pos = 1 |
| | 976 | + do i = 1, plen |
| | 977 | + if (pattern(i:i) == char(10)) then |
| | 978 | + ! Found a newline - extract subpattern |
| | 979 | + subpat_len = i - start_pos |
| | 980 | + if (subpat_len > 0) then |
| | 981 | + next_idx = next_idx + 1 |
| | 982 | + subpatterns(next_idx) = pattern(start_pos:i-1) |
| | 983 | + ! Add null terminator |
| | 984 | + subpatterns(next_idx)(subpat_len+1:subpat_len+1) = char(0) |
| | 985 | + else |
| | 986 | + ! Empty subpattern (consecutive newlines or newline at start) |
| | 987 | + next_idx = next_idx + 1 |
| | 988 | + subpatterns(next_idx) = char(0) |
| | 989 | + end if |
| | 990 | + start_pos = i + 1 |
| | 991 | + end if |
| | 992 | + end do |
| | 993 | + |
| | 994 | + ! Handle last segment (after final newline or entire string if no newlines) |
| | 995 | + if (start_pos <= plen) then |
| | 996 | + subpat_len = plen - start_pos + 1 |
| | 997 | + next_idx = next_idx + 1 |
| | 998 | + subpatterns(next_idx) = pattern(start_pos:plen) |
| | 999 | + ! Add null terminator |
| | 1000 | + if (subpat_len < max_pattern_len) then |
| | 1001 | + subpatterns(next_idx)(subpat_len+1:subpat_len+1) = char(0) |
| | 1002 | + end if |
| | 1003 | + end if |
| | 1004 | + |
| | 1005 | + end subroutine split_pattern_on_newlines |
| | 1006 | + |
| 872 | end module ferp_matcher | 1007 | end module ferp_matcher |