| 1 | module disk_scanner |
| 2 | use, intrinsic :: iso_c_binding |
| 3 | use types |
| 4 | use file_system |
| 5 | use iso_fortran_env, only: int64 |
| 6 | use g, only: g_main_context_default, g_main_context_iteration |
| 7 | implicit none |
| 8 | private |
| 9 | |
| 10 | public :: scan_directory, build_tree, calculate_sizes, dump_tree_debug, set_progress_callback, & |
| 11 | set_show_hidden_files, group_small_files |
| 12 | |
| 13 | ! Directories to skip (reduce scan time and avoid issues) |
| 14 | character(len=*), parameter, dimension(7) :: SKIP_DIRS = & |
| 15 | [character(len=20) :: '.git', '.svn', '.hg', 'node_modules', '__pycache__', 'build', '.claude'] |
| 16 | |
| 17 | ! Hidden files visibility setting |
| 18 | logical, save :: show_hidden_files = .true. |
| 19 | |
| 20 | ! Small file grouping thresholds |
| 21 | real, parameter :: SMALL_FILE_THRESHOLD = 0.005 ! 0.5% of parent size |
| 22 | integer, parameter :: MIN_SMALL_FILES = 10 ! Minimum count to trigger grouping |
| 23 | |
| 24 | ! UI responsiveness - process GTK events every N directories |
| 25 | integer, parameter :: DIRS_PER_UI_UPDATE = 10 |
| 26 | integer, save :: dir_scan_counter = 0 |
| 27 | |
| 28 | ! Progress callback interface |
| 29 | abstract interface |
| 30 | subroutine progress_update_callback(fraction, message) |
| 31 | use, intrinsic :: iso_c_binding |
| 32 | real(c_double), intent(in) :: fraction |
| 33 | character(len=*), intent(in) :: message |
| 34 | end subroutine progress_update_callback |
| 35 | end interface |
| 36 | |
| 37 | ! Progress callback pointer |
| 38 | procedure(progress_update_callback), pointer, save :: progress_cb => null() |
| 39 | |
| 40 | contains |
| 41 | |
| 42 | ! Set progress callback |
| 43 | subroutine set_progress_callback(callback) |
| 44 | procedure(progress_update_callback) :: callback |
| 45 | progress_cb => callback |
| 46 | end subroutine set_progress_callback |
| 47 | |
| 48 | ! Check if directory should be skipped |
| 49 | function should_skip_dir(dirname) result(skip) |
| 50 | character(len=*), intent(in) :: dirname |
| 51 | logical :: skip |
| 52 | integer :: i |
| 53 | |
| 54 | skip = .false. |
| 55 | do i = 1, size(SKIP_DIRS) |
| 56 | if (trim(dirname) == trim(SKIP_DIRS(i))) then |
| 57 | skip = .true. |
| 58 | return |
| 59 | end if |
| 60 | end do |
| 61 | end function should_skip_dir |
| 62 | |
| 63 | ! Group small files into a synthetic node |
| 64 | ! This modifies node%children in-place if grouping occurs |
| 65 | subroutine group_small_files(node) |
| 66 | type(file_node), intent(inout) :: node |
| 67 | integer(int64) :: total_size, threshold_size, small_total |
| 68 | integer :: i, small_count, large_count |
| 69 | type(file_node), dimension(:), allocatable :: new_children |
| 70 | integer :: large_idx, small_idx |
| 71 | character(len=50) :: count_str |
| 72 | |
| 73 | ! Only group files in directories with children |
| 74 | if (.not. node%is_directory .or. .not. allocated(node%children)) return |
| 75 | if (node%num_children < MIN_SMALL_FILES) return |
| 76 | |
| 77 | ! Calculate total size of all children |
| 78 | total_size = 0_int64 |
| 79 | do i = 1, node%num_children |
| 80 | total_size = total_size + node%children(i)%size |
| 81 | end do |
| 82 | |
| 83 | ! Calculate threshold (0.5% of total) |
| 84 | threshold_size = int(real(total_size) * SMALL_FILE_THRESHOLD, int64) |
| 85 | |
| 86 | ! Count small files |
| 87 | small_count = 0 |
| 88 | small_total = 0_int64 |
| 89 | do i = 1, node%num_children |
| 90 | if (node%children(i)%size < threshold_size) then |
| 91 | small_count = small_count + 1 |
| 92 | small_total = small_total + node%children(i)%size |
| 93 | end if |
| 94 | end do |
| 95 | |
| 96 | ! Only group if we have enough small files |
| 97 | if (small_count < MIN_SMALL_FILES) return |
| 98 | |
| 99 | large_count = node%num_children - small_count |
| 100 | print *, "Grouping ", small_count, " small files in: ", trim(node%name) |
| 101 | |
| 102 | ! Allocate new children array: large files + 1 synthetic node |
| 103 | allocate(new_children(large_count + 1)) |
| 104 | |
| 105 | ! Copy large files and build small files array |
| 106 | large_idx = 0 |
| 107 | small_idx = 0 |
| 108 | |
| 109 | ! First pass: collect large files |
| 110 | do i = 1, node%num_children |
| 111 | if (node%children(i)%size >= threshold_size) then |
| 112 | large_idx = large_idx + 1 |
| 113 | ! Transfer ownership using move_alloc |
| 114 | call move_file_node(node%children(i), new_children(large_idx)) |
| 115 | end if |
| 116 | end do |
| 117 | |
| 118 | ! Create synthetic node for small files |
| 119 | large_idx = large_idx + 1 ! This is where synthetic node goes |
| 120 | new_children(large_idx)%is_directory = .true. |
| 121 | new_children(large_idx)%access_denied = .false. |
| 122 | new_children(large_idx)%size = small_total |
| 123 | new_children(large_idx)%original_size = small_total ! Initialize backup size |
| 124 | new_children(large_idx)%num_children = small_count |
| 125 | |
| 126 | ! Format name with count |
| 127 | write(count_str, '(A,I0,A)') "[", small_count, " small files]" |
| 128 | new_children(large_idx)%name = trim(count_str) |
| 129 | new_children(large_idx)%path = trim(node%path) // get_path_separator() // trim(count_str) |
| 130 | |
| 131 | ! Allocate children for synthetic node |
| 132 | allocate(new_children(large_idx)%children(small_count)) |
| 133 | |
| 134 | ! Second pass: collect small files into synthetic node |
| 135 | small_idx = 0 |
| 136 | do i = 1, node%num_children |
| 137 | if (node%children(i)%size < threshold_size) then |
| 138 | small_idx = small_idx + 1 |
| 139 | call move_file_node(node%children(i), new_children(large_idx)%children(small_idx)) |
| 140 | end if |
| 141 | end do |
| 142 | |
| 143 | ! Replace old children array with new one |
| 144 | ! Note: move_alloc automatically deallocates node%children if allocated |
| 145 | call move_alloc(new_children, node%children) |
| 146 | node%num_children = large_count + 1 |
| 147 | |
| 148 | end subroutine group_small_files |
| 149 | |
| 150 | ! Helper to move a file_node without deep copying |
| 151 | subroutine move_file_node(from, to) |
| 152 | type(file_node), intent(inout) :: from, to |
| 153 | |
| 154 | ! Move allocatable components |
| 155 | if (allocated(from%name)) call move_alloc(from%name, to%name) |
| 156 | if (allocated(from%path)) call move_alloc(from%path, to%path) |
| 157 | if (allocated(from%children)) call move_alloc(from%children, to%children) |
| 158 | |
| 159 | ! Copy simple components |
| 160 | to%size = from%size |
| 161 | to%original_size = from%original_size |
| 162 | to%is_directory = from%is_directory |
| 163 | to%access_denied = from%access_denied |
| 164 | to%num_children = from%num_children |
| 165 | to%bounds = from%bounds |
| 166 | to%color = from%color |
| 167 | to%cushion = from%cushion |
| 168 | to%is_selected = from%is_selected |
| 169 | to%is_hovered = from%is_hovered |
| 170 | end subroutine move_file_node |
| 171 | |
| 172 | ! Recursively deallocate a file tree node |
| 173 | recursive subroutine deallocate_child_tree(node) |
| 174 | type(file_node), intent(inout) :: node |
| 175 | integer :: i |
| 176 | |
| 177 | ! Deallocate children recursively |
| 178 | if (allocated(node%children)) then |
| 179 | do i = 1, node%num_children |
| 180 | call deallocate_child_tree(node%children(i)) |
| 181 | end do |
| 182 | deallocate(node%children) |
| 183 | end if |
| 184 | |
| 185 | ! Deallocate strings |
| 186 | if (allocated(node%name)) deallocate(node%name) |
| 187 | if (allocated(node%path)) deallocate(node%path) |
| 188 | end subroutine deallocate_child_tree |
| 189 | |
| 190 | ! Scan a directory and build a file tree (with optional depth limiting) |
| 191 | recursive subroutine scan_directory(path, node, current_depth) |
| 192 | character(len=*), intent(in) :: path |
| 193 | type(file_node), intent(inout) :: node |
| 194 | integer, intent(in), optional :: current_depth |
| 195 | character(len=256), dimension(:), allocatable :: entries |
| 196 | integer :: num_entries, i, valid_children, depth |
| 197 | character(len=512) :: child_path |
| 198 | integer, parameter :: MAX_DEPTH = 100 |
| 199 | integer, parameter :: MAX_FILES_PER_DIR = 10000 |
| 200 | type(c_ptr) :: context |
| 201 | |
| 202 | ! Handle depth parameter |
| 203 | if (present(current_depth)) then |
| 204 | depth = current_depth |
| 205 | else |
| 206 | depth = 0 |
| 207 | ! Reset counter at start of new top-level scan |
| 208 | dir_scan_counter = 0 |
| 209 | end if |
| 210 | |
| 211 | ! Process GTK events periodically to keep UI responsive |
| 212 | ! Only at top levels to avoid excessive overhead |
| 213 | if (depth == 0 .or. (depth <= 3 .and. mod(dir_scan_counter, DIRS_PER_UI_UPDATE) == 0)) then |
| 214 | context = g_main_context_default() |
| 215 | do while (g_main_context_iteration(context, 0_c_int) /= 0_c_int) |
| 216 | end do |
| 217 | |
| 218 | ! Update progress - estimate based on directories scanned |
| 219 | ! Progress range: 0.3 to 0.80 (before color assignment at 0.85) |
| 220 | if (associated(progress_cb) .and. dir_scan_counter > 0) then |
| 221 | ! Logarithmic progress for better perceived speed |
| 222 | ! log(60000)/11 ≈ 1.0, so this reaches ~80% after 60000 directories |
| 223 | ! This prevents saturation at 80% for large directories like ~ |
| 224 | call progress_cb(0.3_c_double + 0.50_c_double * min(1.0_c_double, & |
| 225 | log(real(dir_scan_counter, c_double)) / 11.0_c_double), & |
| 226 | 'Scanning directories...') |
| 227 | end if |
| 228 | end if |
| 229 | |
| 230 | ! Increment directory counter |
| 231 | dir_scan_counter = dir_scan_counter + 1 |
| 232 | |
| 233 | ! Set node properties |
| 234 | node%path = path |
| 235 | node%name = extract_filename(path) |
| 236 | node%num_children = 0 |
| 237 | node%access_denied = .false. |
| 238 | |
| 239 | ! Check if this is a symbolic link - skip if so |
| 240 | if (is_symlink(path)) then |
| 241 | node%is_directory = .false. |
| 242 | node%size = 0_int64 |
| 243 | node%original_size = 0_int64 |
| 244 | return |
| 245 | end if |
| 246 | |
| 247 | node%is_directory = is_directory(path) |
| 248 | |
| 249 | if (node%is_directory) then |
| 250 | ! Check depth limit |
| 251 | if (depth >= MAX_DEPTH) then |
| 252 | node%access_denied = .true. |
| 253 | node%size = 0_int64 |
| 254 | node%original_size = 0_int64 |
| 255 | return |
| 256 | end if |
| 257 | |
| 258 | ! Allocate entries array on heap instead of stack |
| 259 | allocate(entries(MAX_FILES_PER_DIR)) |
| 260 | |
| 261 | ! List directory contents (returns 0 on error/permission denied) |
| 262 | num_entries = list_directory(path, entries, MAX_FILES_PER_DIR) |
| 263 | print *, "DEBUG: list_directory('", trim(path), "') returned ", num_entries, " entries" |
| 264 | |
| 265 | ! If we got entries, scan them |
| 266 | if (num_entries > 0) then |
| 267 | ! First pass: count valid children |
| 268 | valid_children = 0 |
| 269 | do i = 1, num_entries |
| 270 | if (should_skip_dir(entries(i))) cycle |
| 271 | ! Skip hidden files if show_hidden_files is false |
| 272 | if (.not. show_hidden_files .and. is_hidden_file(entries(i))) cycle |
| 273 | child_path = trim(path) // get_path_separator() // trim(entries(i)) |
| 274 | if (is_symlink(child_path)) cycle |
| 275 | valid_children = valid_children + 1 |
| 276 | end do |
| 277 | |
| 278 | ! Allocate exact size needed |
| 279 | if (valid_children > 0) then |
| 280 | ! Deallocate old children array if it exists (from previous scan) |
| 281 | if (allocated(node%children)) then |
| 282 | ! Recursively deallocate each child first |
| 283 | do i = 1, node%num_children |
| 284 | call deallocate_child_tree(node%children(i)) |
| 285 | end do |
| 286 | deallocate(node%children) |
| 287 | end if |
| 288 | allocate(node%children(valid_children)) |
| 289 | node%num_children = 0 |
| 290 | |
| 291 | ! Second pass: scan children |
| 292 | do i = 1, num_entries |
| 293 | if (should_skip_dir(entries(i))) cycle |
| 294 | ! Skip hidden files if show_hidden_files is false |
| 295 | if (.not. show_hidden_files .and. is_hidden_file(entries(i))) cycle |
| 296 | child_path = trim(path) // get_path_separator() // trim(entries(i)) |
| 297 | if (is_symlink(child_path)) cycle |
| 298 | |
| 299 | ! Scan directly into node%children |
| 300 | node%num_children = node%num_children + 1 |
| 301 | call scan_directory(child_path, node%children(node%num_children), depth + 1) |
| 302 | end do |
| 303 | end if |
| 304 | end if |
| 305 | |
| 306 | ! Calculate directory size as sum of children |
| 307 | node%size = 0_int64 |
| 308 | if (allocated(node%children)) then |
| 309 | do i = 1, node%num_children |
| 310 | node%size = node%size + node%children(i)%size |
| 311 | end do |
| 312 | end if |
| 313 | |
| 314 | ! Group small files into synthetic node if applicable |
| 315 | call group_small_files(node) |
| 316 | |
| 317 | ! Initialize original_size backup (for filter restoration) |
| 318 | node%original_size = node%size |
| 319 | |
| 320 | ! Deallocate entries array |
| 321 | if (allocated(entries)) deallocate(entries) |
| 322 | else |
| 323 | ! File - get size directly |
| 324 | node%size = get_file_size(path) |
| 325 | ! Initialize original_size backup (for filter restoration) |
| 326 | node%original_size = node%size |
| 327 | end if |
| 328 | end subroutine scan_directory |
| 329 | |
| 330 | ! Build tree from a root path |
| 331 | subroutine build_tree(root_path, root_node) |
| 332 | character(len=*), intent(in) :: root_path |
| 333 | type(file_node), intent(inout) :: root_node ! Changed from intent(out) to avoid auto-deallocation |
| 334 | |
| 335 | call scan_directory(root_path, root_node) |
| 336 | end subroutine build_tree |
| 337 | |
| 338 | ! Calculate cumulative sizes (stub - already done in scan_directory) |
| 339 | recursive subroutine calculate_sizes(node) |
| 340 | type(file_node), intent(inout) :: node |
| 341 | integer :: i |
| 342 | |
| 343 | if (node%is_directory .and. allocated(node%children)) then |
| 344 | node%size = 0_int64 |
| 345 | do i = 1, node%num_children |
| 346 | call calculate_sizes(node%children(i)) |
| 347 | node%size = node%size + node%children(i)%size |
| 348 | end do |
| 349 | end if |
| 350 | end subroutine calculate_sizes |
| 351 | |
| 352 | ! Extract filename from path |
| 353 | function extract_filename(path) result(filename) |
| 354 | character(len=*), intent(in) :: path |
| 355 | character(len=:), allocatable :: filename |
| 356 | integer :: last_sep, i |
| 357 | character(len=1) :: sep |
| 358 | |
| 359 | sep = get_path_separator() |
| 360 | last_sep = 0 |
| 361 | |
| 362 | do i = len_trim(path), 1, -1 |
| 363 | if (path(i:i) == sep) then |
| 364 | last_sep = i |
| 365 | exit |
| 366 | end if |
| 367 | end do |
| 368 | |
| 369 | if (last_sep > 0 .and. last_sep < len_trim(path)) then |
| 370 | filename = trim(path(last_sep+1:)) |
| 371 | else |
| 372 | filename = trim(path) |
| 373 | end if |
| 374 | end function extract_filename |
| 375 | |
| 376 | ! Debug function to dump tree structure to file |
| 377 | subroutine dump_tree_debug(node, filename) |
| 378 | type(file_node), intent(in) :: node |
| 379 | character(len=*), intent(in) :: filename |
| 380 | integer :: unit, ios |
| 381 | |
| 382 | open(newunit=unit, file=filename, status='replace', iostat=ios) |
| 383 | if (ios /= 0) then |
| 384 | print *, "Warning: Could not open debug file: ", trim(filename) |
| 385 | return |
| 386 | end if |
| 387 | |
| 388 | write(unit, '(A)') '=== TREE STRUCTURE DEBUG ===' |
| 389 | call dump_node_recursive(node, unit, 0) |
| 390 | close(unit) |
| 391 | end subroutine dump_tree_debug |
| 392 | |
| 393 | ! Recursive helper for dump_tree_debug |
| 394 | recursive subroutine dump_node_recursive(node, unit, depth) |
| 395 | type(file_node), intent(in) :: node |
| 396 | integer, intent(in) :: unit, depth |
| 397 | character(len=512) :: indent |
| 398 | integer :: i |
| 399 | character(len=20) :: size_str |
| 400 | |
| 401 | ! Build indent string |
| 402 | indent = '' |
| 403 | do i = 1, depth * 2 |
| 404 | indent(i:i) = ' ' |
| 405 | end do |
| 406 | |
| 407 | ! Format size |
| 408 | if (node%size < 1024_int64) then |
| 409 | write(size_str, '(I0,A)') node%size, 'B' |
| 410 | else if (node%size < 1024_int64**2) then |
| 411 | write(size_str, '(F0.2,A)') real(node%size)/1024.0, 'KB' |
| 412 | else if (node%size < 1024_int64**3) then |
| 413 | write(size_str, '(F0.2,A)') real(node%size)/(1024.0**2), 'MB' |
| 414 | else |
| 415 | write(size_str, '(F0.2,A)') real(node%size)/(1024.0**3), 'GB' |
| 416 | end if |
| 417 | |
| 418 | ! Write node info |
| 419 | if (node%is_directory) then |
| 420 | write(unit, '(A,A,A,A,A,A,I0,A)') trim(indent(1:depth*2)), '[DIR] ', & |
| 421 | trim(node%name), ' (', trim(size_str), ', ', node%num_children, ' children)' |
| 422 | else |
| 423 | write(unit, '(A,A,A,A,A,A)') trim(indent(1:depth*2)), '[FILE] ', & |
| 424 | trim(node%name), ' (', trim(size_str), ')' |
| 425 | end if |
| 426 | |
| 427 | ! Recurse into children |
| 428 | if (allocated(node%children)) then |
| 429 | do i = 1, node%num_children |
| 430 | call dump_node_recursive(node%children(i), unit, depth + 1) |
| 431 | end do |
| 432 | end if |
| 433 | end subroutine dump_node_recursive |
| 434 | |
| 435 | ! Set whether to show hidden files (dotfiles) |
| 436 | subroutine set_show_hidden_files(show) |
| 437 | logical, intent(in) :: show |
| 438 | show_hidden_files = show |
| 439 | print *, "Disk scanner: show_hidden_files = ", show_hidden_files |
| 440 | end subroutine set_show_hidden_files |
| 441 | |
| 442 | ! Check if a filename is hidden (starts with '.') |
| 443 | pure function is_hidden_file(filename) result(is_hidden) |
| 444 | character(len=*), intent(in) :: filename |
| 445 | logical :: is_hidden |
| 446 | |
| 447 | is_hidden = .false. |
| 448 | if (len_trim(filename) > 0) then |
| 449 | if (filename(1:1) == '.') then |
| 450 | is_hidden = .true. |
| 451 | end if |
| 452 | end if |
| 453 | end function is_hidden_file |
| 454 | |
| 455 | end module disk_scanner |
| 456 |