fortrangoingonforty/ferp / 7d332b5

Browse files

Fix -F newline splitting and flag interactions

- Split -F patterns on newlines (each line is an OR alternative)
- Add count_subpatterns() and split_pattern_on_newlines() helpers
- Use compiled%count for split pattern count in match_line
- Fix -c with -l: only print filename when count > 0
- Fix -o with -c: count mode takes priority over only-matching
Authored by mfwolffe <wolffemf@dukes.jmu.edu>
SHA
7d332b5abc3dbd6b2ffc8b62724498a3cd0eee24
Parents
a3c0ed6
Tree
f5fbf8a

1 changed file

StatusFile+-
M src/ferp_matcher.f90 145 10
src/ferp_matcher.f90modified
@@ -43,9 +43,10 @@ contains
4343
     type(compiled_patterns_t), intent(out) :: compiled
4444
     integer, intent(out) :: ierr
4545
 
46
-    integer :: i, n, plen
46
+    integer :: i, j, n, plen, total_subpats, subpat_count
4747
     logical :: is_ere
4848
     character(len=max_pattern_len) :: pattern
49
+    character(len=max_pattern_len), allocatable :: subpatterns(:)
4950
 
5051
     ierr = 0
5152
     n = size(patterns)
@@ -55,18 +56,36 @@ contains
5556
 
5657
     ! Use Boyer-Moore for fixed string patterns
5758
     if (compiled%is_fixed) then
58
-      allocate(compiled%bm_pats(n))
59
+      ! For -F mode, patterns containing newlines should be split into multiple patterns
60
+      ! First count total subpatterns
61
+      total_subpats = 0
62
+      do i = 1, n
63
+        total_subpats = total_subpats + count_subpatterns(patterns(i))
64
+      end do
65
+
66
+      allocate(compiled%bm_pats(total_subpats))
67
+      allocate(subpatterns(total_subpats))
5968
 
69
+      ! Split patterns on newlines
70
+      j = 0
6071
       do i = 1, n
61
-        plen = pattern_len(patterns(i))
72
+        call split_pattern_on_newlines(patterns(i), subpatterns, j)
73
+      end do
74
+
75
+      compiled%count = j
76
+
77
+      ! Compile each subpattern
78
+      do i = 1, j
79
+        plen = pattern_len(subpatterns(i))
6280
         ! For case-insensitive, convert pattern to lowercase
6381
         if (opts%ignore_case) then
64
-          call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .true.)
82
+          call bm_compile(compiled%bm_pats(i), subpatterns(i)(1:plen), .true.)
6583
         else
66
-          call bm_compile(compiled%bm_pats(i), patterns(i)(1:plen), .false.)
84
+          call bm_compile(compiled%bm_pats(i), subpatterns(i)(1:plen), .false.)
6785
         end if
6886
       end do
6987
 
88
+      deallocate(subpatterns)
7089
       compiled%compiled = .true.
7190
       return
7291
     end if
@@ -174,7 +193,7 @@ contains
174193
     type(compiled_patterns_t), intent(inout), optional :: compiled  ! inout for DFA cache
175194
     logical :: matches
176195
 
177
-    integer :: i
196
+    integer :: i, num_patterns
178197
     character(len=:), allocatable :: search_line
179198
     character(len=max_pattern_len) :: search_pattern
180199
 
@@ -187,8 +206,16 @@ contains
187206
       search_line = line
188207
     end if
189208
 
209
+    ! Determine number of patterns to try
210
+    ! For -F mode with compiled patterns, use compiled%count (may differ due to newline splitting)
211
+    if (present(compiled) .and. compiled%compiled .and. compiled%is_fixed) then
212
+      num_patterns = compiled%count
213
+    else
214
+      num_patterns = size(patterns)
215
+    end if
216
+
190217
     ! Try each pattern
191
-    do i = 1, size(patterns)
218
+    do i = 1, num_patterns
192219
       ! Match based on pattern type
193220
       select case (opts%pattern_type)
194221
         case (PATTERN_FIXED)
@@ -542,7 +569,14 @@ contains
542569
 
543570
     ! Handle count mode
544571
     if (opts%count_only) then
545
-      call print_count(match_count, src%filename, opts)
572
+      ! When -l is also set, only print filename for files with matches
573
+      if (opts%files_with_matches) then
574
+        if (match_count > 0) then
575
+          call print_filename(src%filename, opts)
576
+        end if
577
+      else
578
+        call print_count(match_count, src%filename, opts)
579
+      end if
546580
     end if
547581
 
548582
   end function process_source_batch
@@ -694,6 +728,32 @@ contains
694728
     logical :: need_separator  ! Need to print -- before next output
695729
     logical :: use_context
696730
     integer :: k
731
+    integer(i64) :: file_size
732
+
733
+    ! Calculate line number width for -T alignment (based on file size)
734
+    if (opts%initial_tab) then
735
+      if (src%source_type == SOURCE_MMAP .and. src%mmap_file%size > 0) then
736
+        file_size = src%mmap_file%size
737
+        if (file_size < 10) then
738
+          opts%line_number_width = 1
739
+        else if (file_size < 100) then
740
+          opts%line_number_width = 2
741
+        else if (file_size < 1000) then
742
+          opts%line_number_width = 3
743
+        else if (file_size < 10000) then
744
+          opts%line_number_width = 4
745
+        else if (file_size < 100000) then
746
+          opts%line_number_width = 5
747
+        else if (file_size < 1000000) then
748
+          opts%line_number_width = 6
749
+        else
750
+          opts%line_number_width = 7
751
+        end if
752
+      else
753
+        ! Stdin or unknown: use large default for alignment
754
+        opts%line_number_width = 7
755
+      end if
756
+    end if
697757
 
698758
     ! Try optimized batch mode for simple cases
699759
     if (present(compiled) .and. compiled%compiled .and. can_use_batch_mode(src, opts)) then
@@ -752,7 +812,8 @@ contains
752812
           call print_filename(src%filename, opts)
753813
           if (allocated(before_buffer)) deallocate(before_buffer)
754814
           return
755
-        else if (opts%only_matching) then
815
+        else if (opts%only_matching .and. .not. opts%count_only) then
816
+          ! -o mode: print each match (but -c takes priority)
756817
           if (present(compiled)) then
757818
             call find_matches(line, patterns, opts, compiled, match_starts, match_ends, num_matches)
758819
           else
@@ -856,7 +917,14 @@ contains
856917
 
857918
     ! Handle -c (count) mode
858919
     if (opts%count_only) then
859
-      call print_count(match_count, src%filename, opts)
920
+      ! When -l is also set, only print filename for files with matches
921
+      if (opts%files_with_matches) then
922
+        if (match_count > 0) then
923
+          call print_filename(src%filename, opts)
924
+        end if
925
+      else
926
+        call print_count(match_count, src%filename, opts)
927
+      end if
860928
     end if
861929
 
862930
     ! Handle -L (files without match) mode
@@ -869,4 +937,71 @@ contains
869937
 
870938
   end function process_source
871939
 
940
+  function count_subpatterns(pattern) result(count)
941
+    !> Count the number of subpatterns when splitting on newlines
942
+    !> Returns at least 1 for any non-empty pattern
943
+    character(len=*), intent(in) :: pattern
944
+    integer :: count
945
+
946
+    integer :: i, plen
947
+
948
+    plen = pattern_len(pattern)
949
+    if (plen == 0) then
950
+      count = 0
951
+      return
952
+    end if
953
+
954
+    count = 1
955
+    do i = 1, plen
956
+      if (pattern(i:i) == char(10)) then
957
+        count = count + 1
958
+      end if
959
+    end do
960
+
961
+  end function count_subpatterns
962
+
963
+  subroutine split_pattern_on_newlines(pattern, subpatterns, next_idx)
964
+    !> Split a pattern on newline characters into multiple subpatterns
965
+    !> Appends subpatterns to the array starting at next_idx+1
966
+    character(len=*), intent(in) :: pattern
967
+    character(len=max_pattern_len), intent(inout) :: subpatterns(:)
968
+    integer, intent(inout) :: next_idx
969
+
970
+    integer :: i, plen, start_pos, subpat_len
971
+
972
+    plen = pattern_len(pattern)
973
+    if (plen == 0) return
974
+
975
+    start_pos = 1
976
+    do i = 1, plen
977
+      if (pattern(i:i) == char(10)) then
978
+        ! Found a newline - extract subpattern
979
+        subpat_len = i - start_pos
980
+        if (subpat_len > 0) then
981
+          next_idx = next_idx + 1
982
+          subpatterns(next_idx) = pattern(start_pos:i-1)
983
+          ! Add null terminator
984
+          subpatterns(next_idx)(subpat_len+1:subpat_len+1) = char(0)
985
+        else
986
+          ! Empty subpattern (consecutive newlines or newline at start)
987
+          next_idx = next_idx + 1
988
+          subpatterns(next_idx) = char(0)
989
+        end if
990
+        start_pos = i + 1
991
+      end if
992
+    end do
993
+
994
+    ! Handle last segment (after final newline or entire string if no newlines)
995
+    if (start_pos <= plen) then
996
+      subpat_len = plen - start_pos + 1
997
+      next_idx = next_idx + 1
998
+      subpatterns(next_idx) = pattern(start_pos:plen)
999
+      ! Add null terminator
1000
+      if (subpat_len < max_pattern_len) then
1001
+        subpatterns(next_idx)(subpat_len+1:subpat_len+1) = char(0)
1002
+      end if
1003
+    end if
1004
+
1005
+  end subroutine split_pattern_on_newlines
1006
+
8721007
 end module ferp_matcher