Fortran · 15611 bytes Raw Blame History
1 module disk_scanner
2 use, intrinsic :: iso_c_binding
3 use types
4 use file_system
5 use iso_fortran_env, only: int64
6 use g, only: g_main_context_default, g_main_context_iteration
7 implicit none
8 private
9
10 public :: scan_directory, build_tree, calculate_sizes, dump_tree_debug, set_progress_callback, &
11 set_show_hidden_files, group_small_files
12
13 ! Directories to skip (reduce scan time and avoid issues)
14 character(len=*), parameter, dimension(7) :: SKIP_DIRS = &
15 [character(len=20) :: '.git', '.svn', '.hg', 'node_modules', '__pycache__', 'build', '.claude']
16
17 ! Hidden files visibility setting
18 logical, save :: show_hidden_files = .true.
19
20 ! Small file grouping thresholds
21 real, parameter :: SMALL_FILE_THRESHOLD = 0.005 ! 0.5% of parent size
22 integer, parameter :: MIN_SMALL_FILES = 10 ! Minimum count to trigger grouping
23
24 ! UI responsiveness - process GTK events every N directories
25 integer, parameter :: DIRS_PER_UI_UPDATE = 10
26 integer, save :: dir_scan_counter = 0
27
28 ! Progress callback interface
29 abstract interface
30 subroutine progress_update_callback(fraction, message)
31 use, intrinsic :: iso_c_binding
32 real(c_double), intent(in) :: fraction
33 character(len=*), intent(in) :: message
34 end subroutine progress_update_callback
35 end interface
36
37 ! Progress callback pointer
38 procedure(progress_update_callback), pointer, save :: progress_cb => null()
39
40 contains
41
42 ! Set progress callback
43 subroutine set_progress_callback(callback)
44 procedure(progress_update_callback) :: callback
45 progress_cb => callback
46 end subroutine set_progress_callback
47
48 ! Check if directory should be skipped
49 function should_skip_dir(dirname) result(skip)
50 character(len=*), intent(in) :: dirname
51 logical :: skip
52 integer :: i
53
54 skip = .false.
55 do i = 1, size(SKIP_DIRS)
56 if (trim(dirname) == trim(SKIP_DIRS(i))) then
57 skip = .true.
58 return
59 end if
60 end do
61 end function should_skip_dir
62
63 ! Group small files into a synthetic node
64 ! This modifies node%children in-place if grouping occurs
65 subroutine group_small_files(node)
66 type(file_node), intent(inout) :: node
67 integer(int64) :: total_size, threshold_size, small_total
68 integer :: i, small_count, large_count
69 type(file_node), dimension(:), allocatable :: new_children
70 integer :: large_idx, small_idx
71 character(len=50) :: count_str
72
73 ! Only group files in directories with children
74 if (.not. node%is_directory .or. .not. allocated(node%children)) return
75 if (node%num_children < MIN_SMALL_FILES) return
76
77 ! Calculate total size of all children
78 total_size = 0_int64
79 do i = 1, node%num_children
80 total_size = total_size + node%children(i)%size
81 end do
82
83 ! Calculate threshold (0.5% of total)
84 threshold_size = int(real(total_size) * SMALL_FILE_THRESHOLD, int64)
85
86 ! Count small files
87 small_count = 0
88 small_total = 0_int64
89 do i = 1, node%num_children
90 if (node%children(i)%size < threshold_size) then
91 small_count = small_count + 1
92 small_total = small_total + node%children(i)%size
93 end if
94 end do
95
96 ! Only group if we have enough small files
97 if (small_count < MIN_SMALL_FILES) return
98
99 large_count = node%num_children - small_count
100 print *, "Grouping ", small_count, " small files in: ", trim(node%name)
101
102 ! Allocate new children array: large files + 1 synthetic node
103 allocate(new_children(large_count + 1))
104
105 ! Copy large files and build small files array
106 large_idx = 0
107 small_idx = 0
108
109 ! First pass: collect large files
110 do i = 1, node%num_children
111 if (node%children(i)%size >= threshold_size) then
112 large_idx = large_idx + 1
113 ! Transfer ownership using move_alloc
114 call move_file_node(node%children(i), new_children(large_idx))
115 end if
116 end do
117
118 ! Create synthetic node for small files
119 large_idx = large_idx + 1 ! This is where synthetic node goes
120 new_children(large_idx)%is_directory = .true.
121 new_children(large_idx)%access_denied = .false.
122 new_children(large_idx)%size = small_total
123 new_children(large_idx)%original_size = small_total ! Initialize backup size
124 new_children(large_idx)%num_children = small_count
125
126 ! Format name with count
127 write(count_str, '(A,I0,A)') "[", small_count, " small files]"
128 new_children(large_idx)%name = trim(count_str)
129 new_children(large_idx)%path = trim(node%path) // get_path_separator() // trim(count_str)
130
131 ! Allocate children for synthetic node
132 allocate(new_children(large_idx)%children(small_count))
133
134 ! Second pass: collect small files into synthetic node
135 small_idx = 0
136 do i = 1, node%num_children
137 if (node%children(i)%size < threshold_size) then
138 small_idx = small_idx + 1
139 call move_file_node(node%children(i), new_children(large_idx)%children(small_idx))
140 end if
141 end do
142
143 ! Replace old children array with new one
144 ! Note: move_alloc automatically deallocates node%children if allocated
145 call move_alloc(new_children, node%children)
146 node%num_children = large_count + 1
147
148 end subroutine group_small_files
149
150 ! Helper to move a file_node without deep copying
151 subroutine move_file_node(from, to)
152 type(file_node), intent(inout) :: from, to
153
154 ! Move allocatable components
155 if (allocated(from%name)) call move_alloc(from%name, to%name)
156 if (allocated(from%path)) call move_alloc(from%path, to%path)
157 if (allocated(from%children)) call move_alloc(from%children, to%children)
158
159 ! Copy simple components
160 to%size = from%size
161 to%original_size = from%original_size
162 to%is_directory = from%is_directory
163 to%access_denied = from%access_denied
164 to%num_children = from%num_children
165 to%bounds = from%bounds
166 to%color = from%color
167 to%cushion = from%cushion
168 to%is_selected = from%is_selected
169 to%is_hovered = from%is_hovered
170 end subroutine move_file_node
171
172 ! Recursively deallocate a file tree node
173 recursive subroutine deallocate_child_tree(node)
174 type(file_node), intent(inout) :: node
175 integer :: i
176
177 ! Deallocate children recursively
178 if (allocated(node%children)) then
179 do i = 1, node%num_children
180 call deallocate_child_tree(node%children(i))
181 end do
182 deallocate(node%children)
183 end if
184
185 ! Deallocate strings
186 if (allocated(node%name)) deallocate(node%name)
187 if (allocated(node%path)) deallocate(node%path)
188 end subroutine deallocate_child_tree
189
190 ! Scan a directory and build a file tree (with optional depth limiting)
191 recursive subroutine scan_directory(path, node, current_depth)
192 character(len=*), intent(in) :: path
193 type(file_node), intent(inout) :: node
194 integer, intent(in), optional :: current_depth
195 character(len=256), dimension(:), allocatable :: entries
196 integer :: num_entries, i, valid_children, depth
197 character(len=512) :: child_path
198 integer, parameter :: MAX_DEPTH = 100
199 integer, parameter :: MAX_FILES_PER_DIR = 10000
200 type(c_ptr) :: context
201
202 ! Handle depth parameter
203 if (present(current_depth)) then
204 depth = current_depth
205 else
206 depth = 0
207 ! Reset counter at start of new top-level scan
208 dir_scan_counter = 0
209 end if
210
211 ! Process GTK events periodically to keep UI responsive
212 ! Only at top levels to avoid excessive overhead
213 if (depth == 0 .or. (depth <= 3 .and. mod(dir_scan_counter, DIRS_PER_UI_UPDATE) == 0)) then
214 context = g_main_context_default()
215 do while (g_main_context_iteration(context, 0_c_int) /= 0_c_int)
216 end do
217
218 ! Update progress - estimate based on directories scanned
219 ! Progress range: 0.3 to 0.80 (before color assignment at 0.85)
220 if (associated(progress_cb) .and. dir_scan_counter > 0) then
221 ! Logarithmic progress for better perceived speed
222 ! log(60000)/11 ≈ 1.0, so this reaches ~80% after 60000 directories
223 ! This prevents saturation at 80% for large directories like ~
224 call progress_cb(0.3_c_double + 0.50_c_double * min(1.0_c_double, &
225 log(real(dir_scan_counter, c_double)) / 11.0_c_double), &
226 'Scanning directories...')
227 end if
228 end if
229
230 ! Increment directory counter
231 dir_scan_counter = dir_scan_counter + 1
232
233 ! Set node properties
234 node%path = path
235 node%name = extract_filename(path)
236 node%num_children = 0
237 node%access_denied = .false.
238
239 ! Check if this is a symbolic link - skip if so
240 if (is_symlink(path)) then
241 node%is_directory = .false.
242 node%size = 0_int64
243 node%original_size = 0_int64
244 return
245 end if
246
247 node%is_directory = is_directory(path)
248
249 if (node%is_directory) then
250 ! Check depth limit
251 if (depth >= MAX_DEPTH) then
252 node%access_denied = .true.
253 node%size = 0_int64
254 node%original_size = 0_int64
255 return
256 end if
257
258 ! Allocate entries array on heap instead of stack
259 allocate(entries(MAX_FILES_PER_DIR))
260
261 ! List directory contents (returns 0 on error/permission denied)
262 num_entries = list_directory(path, entries, MAX_FILES_PER_DIR)
263 print *, "DEBUG: list_directory('", trim(path), "') returned ", num_entries, " entries"
264
265 ! If we got entries, scan them
266 if (num_entries > 0) then
267 ! First pass: count valid children
268 valid_children = 0
269 do i = 1, num_entries
270 if (should_skip_dir(entries(i))) cycle
271 ! Skip hidden files if show_hidden_files is false
272 if (.not. show_hidden_files .and. is_hidden_file(entries(i))) cycle
273 child_path = trim(path) // get_path_separator() // trim(entries(i))
274 if (is_symlink(child_path)) cycle
275 valid_children = valid_children + 1
276 end do
277
278 ! Allocate exact size needed
279 if (valid_children > 0) then
280 ! Deallocate old children array if it exists (from previous scan)
281 if (allocated(node%children)) then
282 ! Recursively deallocate each child first
283 do i = 1, node%num_children
284 call deallocate_child_tree(node%children(i))
285 end do
286 deallocate(node%children)
287 end if
288 allocate(node%children(valid_children))
289 node%num_children = 0
290
291 ! Second pass: scan children
292 do i = 1, num_entries
293 if (should_skip_dir(entries(i))) cycle
294 ! Skip hidden files if show_hidden_files is false
295 if (.not. show_hidden_files .and. is_hidden_file(entries(i))) cycle
296 child_path = trim(path) // get_path_separator() // trim(entries(i))
297 if (is_symlink(child_path)) cycle
298
299 ! Scan directly into node%children
300 node%num_children = node%num_children + 1
301 call scan_directory(child_path, node%children(node%num_children), depth + 1)
302 end do
303 end if
304 end if
305
306 ! Calculate directory size as sum of children
307 node%size = 0_int64
308 if (allocated(node%children)) then
309 do i = 1, node%num_children
310 node%size = node%size + node%children(i)%size
311 end do
312 end if
313
314 ! Group small files into synthetic node if applicable
315 call group_small_files(node)
316
317 ! Initialize original_size backup (for filter restoration)
318 node%original_size = node%size
319
320 ! Deallocate entries array
321 if (allocated(entries)) deallocate(entries)
322 else
323 ! File - get size directly
324 node%size = get_file_size(path)
325 ! Initialize original_size backup (for filter restoration)
326 node%original_size = node%size
327 end if
328 end subroutine scan_directory
329
330 ! Build tree from a root path
331 subroutine build_tree(root_path, root_node)
332 character(len=*), intent(in) :: root_path
333 type(file_node), intent(inout) :: root_node ! Changed from intent(out) to avoid auto-deallocation
334
335 call scan_directory(root_path, root_node)
336 end subroutine build_tree
337
338 ! Calculate cumulative sizes (stub - already done in scan_directory)
339 recursive subroutine calculate_sizes(node)
340 type(file_node), intent(inout) :: node
341 integer :: i
342
343 if (node%is_directory .and. allocated(node%children)) then
344 node%size = 0_int64
345 do i = 1, node%num_children
346 call calculate_sizes(node%children(i))
347 node%size = node%size + node%children(i)%size
348 end do
349 end if
350 end subroutine calculate_sizes
351
352 ! Extract filename from path
353 function extract_filename(path) result(filename)
354 character(len=*), intent(in) :: path
355 character(len=:), allocatable :: filename
356 integer :: last_sep, i
357 character(len=1) :: sep
358
359 sep = get_path_separator()
360 last_sep = 0
361
362 do i = len_trim(path), 1, -1
363 if (path(i:i) == sep) then
364 last_sep = i
365 exit
366 end if
367 end do
368
369 if (last_sep > 0 .and. last_sep < len_trim(path)) then
370 filename = trim(path(last_sep+1:))
371 else
372 filename = trim(path)
373 end if
374 end function extract_filename
375
376 ! Debug function to dump tree structure to file
377 subroutine dump_tree_debug(node, filename)
378 type(file_node), intent(in) :: node
379 character(len=*), intent(in) :: filename
380 integer :: unit, ios
381
382 open(newunit=unit, file=filename, status='replace', iostat=ios)
383 if (ios /= 0) then
384 print *, "Warning: Could not open debug file: ", trim(filename)
385 return
386 end if
387
388 write(unit, '(A)') '=== TREE STRUCTURE DEBUG ==='
389 call dump_node_recursive(node, unit, 0)
390 close(unit)
391 end subroutine dump_tree_debug
392
393 ! Recursive helper for dump_tree_debug
394 recursive subroutine dump_node_recursive(node, unit, depth)
395 type(file_node), intent(in) :: node
396 integer, intent(in) :: unit, depth
397 character(len=512) :: indent
398 integer :: i
399 character(len=20) :: size_str
400
401 ! Build indent string
402 indent = ''
403 do i = 1, depth * 2
404 indent(i:i) = ' '
405 end do
406
407 ! Format size
408 if (node%size < 1024_int64) then
409 write(size_str, '(I0,A)') node%size, 'B'
410 else if (node%size < 1024_int64**2) then
411 write(size_str, '(F0.2,A)') real(node%size)/1024.0, 'KB'
412 else if (node%size < 1024_int64**3) then
413 write(size_str, '(F0.2,A)') real(node%size)/(1024.0**2), 'MB'
414 else
415 write(size_str, '(F0.2,A)') real(node%size)/(1024.0**3), 'GB'
416 end if
417
418 ! Write node info
419 if (node%is_directory) then
420 write(unit, '(A,A,A,A,A,A,I0,A)') trim(indent(1:depth*2)), '[DIR] ', &
421 trim(node%name), ' (', trim(size_str), ', ', node%num_children, ' children)'
422 else
423 write(unit, '(A,A,A,A,A,A)') trim(indent(1:depth*2)), '[FILE] ', &
424 trim(node%name), ' (', trim(size_str), ')'
425 end if
426
427 ! Recurse into children
428 if (allocated(node%children)) then
429 do i = 1, node%num_children
430 call dump_node_recursive(node%children(i), unit, depth + 1)
431 end do
432 end if
433 end subroutine dump_node_recursive
434
435 ! Set whether to show hidden files (dotfiles)
436 subroutine set_show_hidden_files(show)
437 logical, intent(in) :: show
438 show_hidden_files = show
439 print *, "Disk scanner: show_hidden_files = ", show_hidden_files
440 end subroutine set_show_hidden_files
441
442 ! Check if a filename is hidden (starts with '.')
443 pure function is_hidden_file(filename) result(is_hidden)
444 character(len=*), intent(in) :: filename
445 logical :: is_hidden
446
447 is_hidden = .false.
448 if (len_trim(filename) > 0) then
449 if (filename(1:1) == '.') then
450 is_hidden = .true.
451 end if
452 end if
453 end function is_hidden_file
454
455 end module disk_scanner
456