Fortran · 37641 bytes Raw Blame History
1 ! ==============================================================================
2 ! Module: pipeline_helpers
3 ! ==============================================================================
4 ! Shared helpers for command expansion used by both executor and ast_executor.
5 ! Extracted from executor.f90 and parser.f90 to break the ast_executor → executor
6 ! dependency for pipeline execution.
7 !
8 ! Functions:
9 ! - expand_tokens: Variable expansion, IFS field splitting, quote handling
10 ! - expand_command_globs: Glob pattern expansion for command tokens
11 ! - process_command_escapes: Post-glob backslash escape processing
12 ! - has_escaped_spaces: Check for backslash-escaped spaces
13 ! - interpret_ifs_escapes: Convert IFS escape sequences (\t, \n, \\)
14 module pipeline_helpers
15 use shell_types
16 use iso_fortran_env, only: error_unit
17 implicit none
18 private
19
20 public :: expand_tokens
21 public :: expand_command_globs
22 public :: process_command_escapes
23 public :: has_escaped_spaces
24 public :: interpret_ifs_escapes
25
26 contains
27
28 function has_escaped_spaces(str) result(has_escaped)
29 character(len=*), intent(in) :: str
30 logical :: has_escaped
31 integer :: i, len_str
32 character(len=1) :: backslash
33
34 has_escaped = .false.
35 len_str = len_trim(str)
36 backslash = char(92) ! ASCII code for backslash
37
38 do i = 1, len_str - 1
39 if (str(i:i) == backslash .and. str(i+1:i+1) == ' ') then
40 has_escaped = .true.
41 return
42 end if
43 end do
44 end function
45
46 ! Interpret escape sequences in IFS string (\t -> tab, \n -> newline)
47 subroutine interpret_ifs_escapes(input, output, output_len)
48 character(len=*), intent(in) :: input
49 character(len=*), intent(out) :: output
50 integer, intent(out) :: output_len
51 integer :: i, j, input_len
52 character(len=1) :: backslash
53
54 backslash = char(92) ! ASCII code for backslash
55 input_len = len(input) ! Use len(), not len_trim() - input might be all spaces (IFS=" ")
56 j = 1
57 i = 1
58 output = ''
59
60 do while (i <= input_len)
61 if (input(i:i) == backslash .and. i < input_len) then
62 ! Check for escape sequences
63 if (input(i+1:i+1) == 't') then
64 ! \t -> tab
65 output(j:j) = char(9)
66 j = j + 1
67 i = i + 2
68 else if (input(i+1:i+1) == 'n') then
69 ! \n -> newline
70 output(j:j) = char(10)
71 j = j + 1
72 i = i + 2
73 else if (input(i+1:i+1) == backslash) then
74 ! \\ -> backslash
75 output(j:j) = backslash
76 j = j + 1
77 i = i + 2
78 else
79 ! Unknown escape, keep backslash and next char
80 output(j:j) = input(i:i)
81 j = j + 1
82 i = i + 1
83 end if
84 else
85 ! Regular character
86 output(j:j) = input(i:i)
87 j = j + 1
88 i = i + 1
89 end if
90 end do
91 output_len = j - 1 ! Return actual length of output
92 end subroutine
93
94 subroutine expand_tokens(cmd, shell)
95 use expansion, only: field_split
96 type(command_t), intent(inout) :: cmd
97 type(shell_state_t), intent(inout) :: shell
98 integer :: i, j, total_tokens, temp_cap
99 character(len=:), allocatable :: expanded
100 type(string_t), allocatable :: temp_tokens(:)
101 integer, allocatable :: temp_token_lengths(:) ! Track actual lengths of expanded tokens
102 logical, allocatable :: temp_token_quoted(:) ! Track if original token was quoted
103 logical :: is_format_string
104 ! Heap-allocated to avoid static storage in recursive context
105 character(len=:), allocatable :: split_words(:)
106 integer :: split_cap, split_char_len
107 character(len=256) :: ifs_to_use
108 integer :: word_count, k, ifs_check_i, ifs_len_to_use
109 logical :: should_split, has_quotes, has_equals, has_escaped, has_ifs_char, ifs_explicitly_set
110 logical :: is_double_bracket_cmd
111 logical :: was_originally_quoted
112
113 split_cap = 256
114 split_char_len = MAX_TOKEN_LEN
115 allocate(character(len=split_char_len) :: split_words(split_cap))
116
117 ! Allocate temporary storage for expanded tokens
118 temp_cap = max(cmd%num_tokens * 10, 256)
119 allocate(temp_tokens(temp_cap))
120 allocate(temp_token_lengths(temp_cap))
121 allocate(temp_token_quoted(temp_cap))
122 temp_token_lengths = 0
123 temp_token_quoted = .false.
124 total_tokens = 0
125
126 ! Check if this is a [[ ]] command - word splitting is NOT performed in [[ ]]
127 ! per POSIX and bash behavior
128 is_double_bracket_cmd = .false.
129 if (cmd%num_tokens > 0) then
130 if (trim(cmd%tokens(1)) == '[[') then
131 is_double_bracket_cmd = .true.
132 end if
133 end if
134
135 ! Determine IFS characters to use
136 ! Interpret escape sequences in IFS (\t -> tab, \n -> newline)
137 ! POSIX: When IFS is set to empty, field splitting is disabled
138 ! Check if IFS was explicitly set by user (in global variables or local scope)
139 ifs_explicitly_set = .false.
140 ! Check global variables array
141 do ifs_check_i = 1, shell%num_variables
142 if (trim(shell%variables(ifs_check_i)%name) == 'IFS') then
143 ifs_explicitly_set = .true.
144 exit
145 end if
146 end do
147 ! Also check local variable scope (for local IFS=...)
148 if (.not. ifs_explicitly_set .and. shell%function_depth > 0) then
149 block
150 integer :: lv_depth, lv_i
151 do lv_depth = shell%function_depth, 1, -1
152 if (lv_depth <= size(shell%local_var_counts)) then
153 do lv_i = 1, shell%local_var_counts(lv_depth)
154 if (trim(shell%local_vars(lv_depth, lv_i)%name) == 'IFS') then
155 ifs_explicitly_set = .true.
156 exit
157 end if
158 end do
159 end if
160 if (ifs_explicitly_set) exit
161 end do
162 end block
163 end if
164
165 if (ifs_explicitly_set) then
166 ! IFS is explicitly set - use its value (even if empty)
167 ! Use ifs_len to preserve trailing whitespace (e.g., IFS=" ")
168 if (shell%ifs_len > 0) then
169 call interpret_ifs_escapes(shell%ifs(1:shell%ifs_len), ifs_to_use, ifs_len_to_use)
170 else
171 ifs_to_use = '' ! Empty IFS disables field splitting
172 ifs_len_to_use = 0
173 end if
174 else
175 ! IFS not set - use default
176 ifs_to_use = ' '//char(9)//char(10) ! space, tab, newline (default IFS)
177 ifs_len_to_use = 3
178 end if
179
180 do i = 1, cmd%num_tokens
181 ! Track if this token was originally quoted (for preserving trailing whitespace)
182 was_originally_quoted = .false.
183 if (allocated(cmd%token_quoted) .and. i <= size(cmd%token_quoted)) then
184 was_originally_quoted = cmd%token_quoted(i)
185 else if (allocated(cmd%token_quote_type) .and. i <= size(cmd%token_quote_type)) then
186 was_originally_quoted = (cmd%token_quote_type(i) == QUOTE_SINGLE .or. &
187 cmd%token_quote_type(i) == QUOTE_DOUBLE)
188 end if
189
190 ! POSIX: Special handling for "$@" - expands to separate quoted arguments
191 ! When a double-quoted token is exactly $@ or just contains $@ as the whole expansion,
192 ! we need to add each positional parameter as a separate token
193 if (allocated(cmd%token_quote_type) .and. &
194 i <= size(cmd%token_quote_type) .and. &
195 cmd%token_quote_type(i) == QUOTE_DOUBLE .and. &
196 trim(cmd%tokens(i)) == '$@') then
197 ! "$@" - add each positional parameter as a separate quoted token
198 do j = 1, shell%num_positional
199 total_tokens = total_tokens + 1
200 if (total_tokens > temp_cap) call grow_temp_arrays()
201 temp_tokens(total_tokens)%str = trim(shell%positional_params(j)%str)
202 temp_token_lengths(total_tokens) = len_trim(shell%positional_params(j)%str)
203 temp_token_quoted(total_tokens) = .true. ! Positional params from "$@" are quoted
204 end do
205 cycle ! Skip normal token processing
206 end if
207
208 ! Check if this token was single-quoted (no expansion)
209 if (allocated(cmd%token_quote_type) .and. &
210 i <= size(cmd%token_quote_type) .and. &
211 cmd%token_quote_type(i) == QUOTE_SINGLE) then
212 ! Single quotes - no expansion, use literal value but strip sentinels/quotes
213 ! Lexer uses char(2) for start sentinel and char(3) for end sentinel
214 ! Old parser path uses actual quote characters
215 block
216 character(len=:), allocatable :: stripped
217 integer :: strip_j, strip_k, strip_len, start_pos, end_pos
218 ! Use token_lengths to preserve trailing spaces if available
219 if (allocated(cmd%token_lengths) .and. i <= size(cmd%token_lengths) .and. &
220 cmd%token_lengths(i) > 0) then
221 strip_len = cmd%token_lengths(i)
222 else
223 strip_len = len_trim(cmd%tokens(i))
224 end if
225
226 ! Determine start and end positions, skipping outer quotes if present
227 start_pos = 1
228 end_pos = strip_len
229 if (strip_len >= 2) then
230 ! Check for actual quote characters at start and end (old parser path)
231 if (cmd%tokens(i)(1:1) == "'" .and. cmd%tokens(i)(strip_len:strip_len) == "'") then
232 start_pos = 2
233 end_pos = strip_len - 1
234 end if
235 end if
236
237 allocate(character(len=end_pos - start_pos + 1) :: stripped)
238 strip_k = 1
239 do strip_j = start_pos, end_pos
240 ! Skip single-quote sentinels (for new lexer path)
241 if (cmd%tokens(i)(strip_j:strip_j) /= char(2) .and. &
242 cmd%tokens(i)(strip_j:strip_j) /= char(3)) then
243 stripped(strip_k:strip_k) = cmd%tokens(i)(strip_j:strip_j)
244 strip_k = strip_k + 1
245 end if
246 end do
247 if (strip_k > 1) then
248 expanded = stripped(1:strip_k-1)
249 else
250 expanded = ''
251 end if
252 end block
253 else
254 ! No quotes or double quotes - perform expansion
255 block
256 use parser, only: expand_variables
257 use expansion, only: expand_braces_to_words
258 logical :: is_double_quoted_token
259 is_double_quoted_token = .false.
260 if (allocated(cmd%token_quote_type) .and. &
261 i <= size(cmd%token_quote_type)) then
262 is_double_quoted_token = (cmd%token_quote_type(i) == QUOTE_DOUBLE)
263 end if
264
265 if (is_double_quoted_token) then
266 ! Double-quoted: no brace expansion, preserve trailing whitespace
267 if (allocated(cmd%token_lengths) .and. i <= size(cmd%token_lengths)) then
268 if (cmd%token_lengths(i) > 0) then
269 call expand_variables(cmd%tokens(i)(1:cmd%token_lengths(i)), expanded, shell, was_quoted_in=.true.)
270 else
271 expanded = ''
272 end if
273 else
274 call expand_variables(cmd%tokens(i), expanded, shell, was_quoted_in=.true.)
275 end if
276 else if (index(cmd%tokens(i), '{') > 0 .and. index(cmd%tokens(i), '}') > 0 .and. &
277 index(cmd%tokens(i), '${') == 0) then
278 ! Unquoted token with braces (not ${...}): expand braces into
279 ! separate words first, then variable-expand each word individually.
280 ! This bypasses MAX_TOKEN_LEN limits and matches bash/zsh behavior.
281 block
282 character(len=MAX_TOKEN_LEN), allocatable :: brace_words(:)
283 character(len=:), allocatable :: var_expanded
284 integer :: bw_count, bw_i
285 ! Fast-path variables for range detection
286 integer :: br_open, br_close, br_dots, br_dots2, br_step
287 integer :: br_start, br_end, br_cur, br_count
288 integer :: br_sc, br_ec, br_cc
289 character(len=:), allocatable :: br_prefix, br_suffix
290 character(len=16) :: br_num
291 integer :: br_nlen, br_plen, br_slen
292 logical :: br_fast
293
294 br_fast = .false.
295 ! Try fast path: simple {N..M} or {N..M..S} or {a..z} with optional prefix/suffix
296 ! Must have exactly one { and one } with no nesting or $
297 br_open = index(trim(cmd%tokens(i)), '{')
298 br_close = index(trim(cmd%tokens(i)), '}', back=.true.)
299 if (br_open > 0 .and. br_close > br_open .and. &
300 index(cmd%tokens(i)(br_open+1:br_close-1), '{') == 0 .and. &
301 index(cmd%tokens(i)(br_open+1:br_close-1), ',') == 0) then
302 br_dots = index(cmd%tokens(i)(br_open+1:br_close-1), '..')
303 if (br_dots > 0) then
304 br_dots = br_open + br_dots ! absolute position
305 ! Extract prefix and suffix
306 if (br_open > 1) then
307 br_prefix = cmd%tokens(i)(1:br_open-1)
308 else
309 br_prefix = ''
310 end if
311 if (br_close < len_trim(cmd%tokens(i))) then
312 br_suffix = cmd%tokens(i)(br_close+1:len_trim(cmd%tokens(i)))
313 else
314 br_suffix = ''
315 end if
316 br_plen = len(br_prefix)
317 br_slen = len(br_suffix)
318
319 ! Check for step: second .. after the first
320 br_dots2 = index(cmd%tokens(i)(br_dots+2:br_close-1), '..')
321 br_step = 1
322 if (br_dots2 > 0) then
323 br_dots2 = br_dots + 1 + br_dots2 ! absolute position
324 read(cmd%tokens(i)(br_dots2+2:br_close-1), *, iostat=bw_i) br_step
325 if (bw_i /= 0) br_step = 1
326 else
327 br_dots2 = 0
328 end if
329
330 ! Try numeric range
331 if (br_dots2 > 0) then
332 read(cmd%tokens(i)(br_open+1:br_dots-1), *, iostat=bw_i) br_start
333 else
334 read(cmd%tokens(i)(br_open+1:br_dots-1), *, iostat=bw_i) br_start
335 end if
336 if (bw_i == 0) then
337 if (br_dots2 > 0) then
338 read(cmd%tokens(i)(br_dots+2:br_dots2-1), *, iostat=bw_i) br_end
339 else
340 read(cmd%tokens(i)(br_dots+2:br_close-1), *, iostat=bw_i) br_end
341 end if
342 end if
343
344 if (bw_i == 0) then
345 ! Numeric range — generate tokens directly
346 br_fast = .true.
347 if (br_start <= br_end) then
348 br_count = (br_end - br_start) / br_step + 1
349 else
350 br_count = (br_start - br_end) / br_step + 1
351 end if
352 ! Pre-grow to avoid repeated doubling
353 if (total_tokens + br_count > temp_cap) &
354 call grow_temp_arrays(total_tokens + br_count)
355 br_cur = br_start
356 if (br_start <= br_end) then
357 do while (br_cur <= br_end)
358 write(br_num, '(I0)') br_cur
359 br_nlen = len_trim(br_num)
360 total_tokens = total_tokens + 1
361 if (br_plen > 0 .and. br_slen > 0) then
362 temp_tokens(total_tokens)%str = br_prefix // br_num(1:br_nlen) // br_suffix
363 else if (br_plen > 0) then
364 temp_tokens(total_tokens)%str = br_prefix // br_num(1:br_nlen)
365 else if (br_slen > 0) then
366 temp_tokens(total_tokens)%str = br_num(1:br_nlen) // br_suffix
367 else
368 temp_tokens(total_tokens)%str = br_num(1:br_nlen)
369 end if
370 temp_token_lengths(total_tokens) = br_plen + br_nlen + br_slen
371 temp_token_quoted(total_tokens) = .false.
372 br_cur = br_cur + br_step
373 end do
374 else
375 do while (br_cur >= br_end)
376 write(br_num, '(I0)') br_cur
377 br_nlen = len_trim(br_num)
378 total_tokens = total_tokens + 1
379 if (br_plen > 0 .and. br_slen > 0) then
380 temp_tokens(total_tokens)%str = br_prefix // br_num(1:br_nlen) // br_suffix
381 else if (br_plen > 0) then
382 temp_tokens(total_tokens)%str = br_prefix // br_num(1:br_nlen)
383 else if (br_slen > 0) then
384 temp_tokens(total_tokens)%str = br_num(1:br_nlen) // br_suffix
385 else
386 temp_tokens(total_tokens)%str = br_num(1:br_nlen)
387 end if
388 temp_token_lengths(total_tokens) = br_plen + br_nlen + br_slen
389 temp_token_quoted(total_tokens) = .false.
390 br_cur = br_cur - br_step
391 end do
392 end if
393 cycle
394 else if (len_trim(cmd%tokens(i)(br_open+1:br_dots-1)) == 1 .and. &
395 ((br_dots2 > 0 .and. len_trim(cmd%tokens(i)(br_dots+2:br_dots2-1)) == 1) .or. &
396 (br_dots2 == 0 .and. len_trim(cmd%tokens(i)(br_dots+2:br_close-1)) == 1))) then
397 ! Alpha range — generate tokens directly
398 br_fast = .true.
399 br_sc = ichar(cmd%tokens(i)(br_open+1:br_open+1))
400 if (br_dots2 > 0) then
401 br_ec = ichar(cmd%tokens(i)(br_dots+2:br_dots+2))
402 else
403 br_ec = ichar(cmd%tokens(i)(br_dots+2:br_dots+2))
404 end if
405 if (br_sc <= br_ec) then
406 br_count = (br_ec - br_sc) / br_step + 1
407 else
408 br_count = (br_sc - br_ec) / br_step + 1
409 end if
410 if (total_tokens + br_count > temp_cap) &
411 call grow_temp_arrays(total_tokens + br_count)
412 br_cc = br_sc
413 if (br_sc <= br_ec) then
414 do while (br_cc <= br_ec)
415 total_tokens = total_tokens + 1
416 if (br_plen > 0 .and. br_slen > 0) then
417 temp_tokens(total_tokens)%str = br_prefix // char(br_cc) // br_suffix
418 else if (br_plen > 0) then
419 temp_tokens(total_tokens)%str = br_prefix // char(br_cc)
420 else if (br_slen > 0) then
421 temp_tokens(total_tokens)%str = char(br_cc) // br_suffix
422 else
423 temp_tokens(total_tokens)%str = char(br_cc)
424 end if
425 temp_token_lengths(total_tokens) = br_plen + 1 + br_slen
426 temp_token_quoted(total_tokens) = .false.
427 br_cc = br_cc + br_step
428 end do
429 else
430 do while (br_cc >= br_ec)
431 total_tokens = total_tokens + 1
432 if (br_plen > 0 .and. br_slen > 0) then
433 temp_tokens(total_tokens)%str = br_prefix // char(br_cc) // br_suffix
434 else if (br_plen > 0) then
435 temp_tokens(total_tokens)%str = br_prefix // char(br_cc)
436 else if (br_slen > 0) then
437 temp_tokens(total_tokens)%str = char(br_cc) // br_suffix
438 else
439 temp_tokens(total_tokens)%str = char(br_cc)
440 end if
441 temp_token_lengths(total_tokens) = br_plen + 1 + br_slen
442 temp_token_quoted(total_tokens) = .false.
443 br_cc = br_cc - br_step
444 end do
445 end if
446 cycle
447 end if
448 end if
449 end if
450
451 ! General brace expansion fallback (comma lists, nested braces)
452 if (.not. br_fast) then
453 call expand_braces_to_words(trim(cmd%tokens(i)), brace_words, bw_count)
454
455 if (bw_count > 1) then
456 ! Multiple words from brace expansion — add each as a separate token
457 do bw_i = 1, bw_count
458 ! Expand variables in each brace word individually
459 if (index(brace_words(bw_i), '$') > 0 .or. &
460 index(brace_words(bw_i), '`') > 0) then
461 call expand_variables(trim(brace_words(bw_i)), var_expanded, shell, was_quoted_in=.false.)
462 total_tokens = total_tokens + 1
463 if (total_tokens > temp_cap) call grow_temp_arrays()
464 if (allocated(var_expanded)) then
465 temp_tokens(total_tokens)%str = var_expanded
466 temp_token_lengths(total_tokens) = len(var_expanded)
467 else
468 temp_tokens(total_tokens)%str = brace_words(bw_i)
469 temp_token_lengths(total_tokens) = len_trim(brace_words(bw_i))
470 end if
471 else
472 total_tokens = total_tokens + 1
473 if (total_tokens > temp_cap) call grow_temp_arrays()
474 temp_tokens(total_tokens)%str = brace_words(bw_i)
475 temp_token_lengths(total_tokens) = len_trim(brace_words(bw_i))
476 end if
477 temp_token_quoted(total_tokens) = .false.
478 end do
479 deallocate(brace_words)
480 cycle ! All words already added as separate tokens
481 else
482 ! Single word — fall through to normal variable expansion
483 expanded = trim(brace_words(1))
484 deallocate(brace_words)
485 if (index(expanded, '$') > 0 .or. index(expanded, '`') > 0) then
486 call expand_variables(expanded, var_expanded, shell, was_quoted_in=.false.)
487 if (allocated(var_expanded)) expanded = var_expanded
488 end if
489 end if
490 end if
491 end block
492 else
493 ! No braces — standard variable expansion
494 call expand_variables(cmd%tokens(i), expanded, shell, was_quoted_in=.false.)
495 end if
496 end block
497 end if
498
499 ! Determine if we should split this token on IFS characters
500 ! Only split if:
501 ! 1. Contains IFS characters
502 ! 2. NOT quoted (doesn't contain quote characters)
503 ! 3. NOT an assignment (doesn't contain =, like alias ll='...' or var=value)
504 ! 4. NOT escaped (doesn't contain escaped IFS chars)
505 should_split = .false.
506
507 ! Check if expanded string contains any IFS character
508 has_ifs_char = .false.
509 if (ifs_len_to_use > 0) then
510 do k = 1, len(expanded)
511 ! Only check against actual IFS chars (first ifs_len_to_use chars of ifs_to_use)
512 if (index(ifs_to_use(1:ifs_len_to_use), expanded(k:k)) > 0) then
513 has_ifs_char = .true.
514 exit
515 end if
516 end do
517 end if
518 ! If ifs_len_to_use == 0 (empty IFS), has_ifs_char stays false, disabling field splitting
519
520 if (has_ifs_char) then
521 ! Check if ORIGINAL token was quoted (using metadata, not looking for quotes in string)
522 if (allocated(cmd%token_quoted) .and. i <= size(cmd%token_quoted)) then
523 has_quotes = cmd%token_quoted(i)
524 else
525 ! Fallback: Check if ORIGINAL token had quotes (not expanded, since expand_variables strips them)
526 has_quotes = (index(cmd%tokens(i), '"') > 0 .or. index(cmd%tokens(i), "'") > 0)
527 end if
528 ! Check if it's an assignment (contains =)
529 has_equals = (index(expanded, '=') > 0)
530 ! Check if spaces are escaped with backslash in ORIGINAL token
531 has_escaped = has_escaped_spaces(cmd%tokens(i))
532 ! PARSER FIX: Check if token starts with % (printf format string)
533 is_format_string = (len_trim(expanded) > 0 .and. expanded(1:1) == '%')
534
535 ! Only split if no quotes, no equals sign, no escaped spaces, not a format string,
536 ! and NOT inside a [[ ]] expression (word splitting is disabled in [[ ]])
537 should_split = (.not. has_quotes .and. .not. has_equals .and. .not. has_escaped &
538 .and. .not. is_format_string .and. .not. is_double_bracket_cmd)
539 end if
540
541 if (should_split) then
542 ! Split the expanded string using IFS characters
543 word_count = 0
544 ! Grow split_words if expanded string needs more capacity or longer char length
545 if (allocated(expanded)) then
546 if (len(expanded) / 2 + 1 > split_cap .or. len(expanded) > split_char_len) then
547 deallocate(split_words)
548 split_cap = max(split_cap, len(expanded) / 2 + 1)
549 split_char_len = max(split_char_len, len(expanded))
550 allocate(character(len=split_char_len) :: split_words(split_cap))
551 end if
552 end if
553 ! Pass ifs_to_use with exact length - use substring to avoid trailing blanks
554 if (ifs_len_to_use > 0) then
555 call field_split(expanded, ifs_to_use(1:ifs_len_to_use), split_words, word_count)
556 else
557 ! Empty IFS - no splitting should happen (but we shouldn't reach here)
558 split_words(1) = expanded
559 word_count = 1
560 end if
561
562 ! Add all split words as separate tokens
563 do j = 1, word_count
564 total_tokens = total_tokens + 1
565 if (total_tokens > temp_cap) call grow_temp_arrays()
566 temp_tokens(total_tokens)%str = split_words(j)
567 temp_token_lengths(total_tokens) = len_trim(split_words(j))
568 ! Split tokens from unquoted expansion are not quoted
569 temp_token_quoted(total_tokens) = .false.
570 end do
571 else
572 ! No IFS chars or shouldn't split, just add as single token
573 ! POSIX: Skip empty tokens from unquoted variable expansion
574 ! Only keep empty strings if the original token was quoted
575 if (len_trim(expanded) == 0) then
576 ! Check if original token was quoted
577 if (allocated(cmd%token_quoted) .and. i <= size(cmd%token_quoted)) then
578 if (.not. cmd%token_quoted(i)) then
579 cycle ! Skip empty unquoted token
580 end if
581 else
582 ! No metadata - check token for quotes (fallback)
583 if (index(cmd%tokens(i), '"') == 0 .and. index(cmd%tokens(i), "'") == 0) then
584 cycle ! Skip empty unquoted token
585 end if
586 end if
587 end if
588 total_tokens = total_tokens + 1
589 if (total_tokens > temp_cap) call grow_temp_arrays()
590 temp_tokens(total_tokens)%str = expanded
591 ! Track actual length of expanded token (use len for allocatable to get real length)
592 if (allocated(expanded)) then
593 temp_token_lengths(total_tokens) = len(expanded)
594 else
595 temp_token_lengths(total_tokens) = 0
596 end if
597 ! Preserve quoted status for trailing whitespace preservation
598 temp_token_quoted(total_tokens) = was_originally_quoted
599 end if
600 end do
601
602 ! Copy string_t temp_tokens to cmd%tokens uniform-length character array.
603 ! Find max token length to allocate cmd%tokens with sufficient character width.
604 block
605 integer :: max_tok_len, tt
606 max_tok_len = MAX_TOKEN_LEN
607 do tt = 1, total_tokens
608 if (allocated(temp_tokens(tt)%str)) then
609 max_tok_len = max(max_tok_len, len(temp_tokens(tt)%str))
610 end if
611 end do
612 if (allocated(cmd%tokens)) deallocate(cmd%tokens)
613 allocate(character(len=max_tok_len) :: cmd%tokens(total_tokens))
614 do tt = 1, total_tokens
615 if (allocated(temp_tokens(tt)%str)) then
616 cmd%tokens(tt) = temp_tokens(tt)%str
617 else
618 cmd%tokens(tt) = ''
619 end if
620 end do
621 deallocate(temp_tokens)
622 end block
623 cmd%num_tokens = total_tokens
624
625 ! Update token_lengths and token_quoted — move to avoid extra allocation
626 if (allocated(cmd%token_lengths)) deallocate(cmd%token_lengths)
627 call move_alloc(temp_token_lengths, cmd%token_lengths)
628
629 if (allocated(cmd%token_quoted)) deallocate(cmd%token_quoted)
630 call move_alloc(temp_token_quoted, cmd%token_quoted)
631
632 ! Resize token_quote_type and token_escaped to match expanded token count
633 if (allocated(cmd%token_quote_type)) then
634 block
635 integer, allocatable :: old_qt(:)
636 integer :: old_qt_size
637 old_qt_size = size(cmd%token_quote_type)
638 if (old_qt_size < total_tokens) then
639 allocate(old_qt(old_qt_size))
640 old_qt = cmd%token_quote_type
641 deallocate(cmd%token_quote_type)
642 allocate(cmd%token_quote_type(total_tokens))
643 cmd%token_quote_type = 0
644 cmd%token_quote_type(1:old_qt_size) = old_qt(1:old_qt_size)
645 deallocate(old_qt)
646 end if
647 end block
648 end if
649 if (allocated(cmd%token_escaped)) then
650 block
651 logical, allocatable :: old_esc(:)
652 integer :: old_esc_size
653 old_esc_size = size(cmd%token_escaped)
654 if (old_esc_size < total_tokens) then
655 allocate(old_esc(old_esc_size))
656 old_esc = cmd%token_escaped
657 deallocate(cmd%token_escaped)
658 allocate(cmd%token_escaped(total_tokens))
659 cmd%token_escaped = .false.
660 cmd%token_escaped(1:old_esc_size) = old_esc(1:old_esc_size)
661 deallocate(old_esc)
662 end if
663 end block
664 end if
665
666 ! temp_tokens, temp_token_lengths, temp_token_quoted moved above via move_alloc
667
668 contains
669
670 subroutine grow_temp_arrays(min_cap)
671 integer, intent(in), optional :: min_cap
672 type(string_t), allocatable :: new_tokens(:)
673 integer, allocatable :: new_lengths(:)
674 logical, allocatable :: new_quoted(:)
675 integer :: new_cap, gk
676
677 new_cap = temp_cap * 2
678 if (present(min_cap)) then
679 do while (new_cap < min_cap)
680 new_cap = new_cap * 2
681 end do
682 end if
683 allocate(new_tokens(new_cap))
684 allocate(new_lengths(new_cap))
685 allocate(new_quoted(new_cap))
686 new_lengths = 0
687 new_quoted = .false.
688 do gk = 1, temp_cap
689 if (allocated(temp_tokens(gk)%str)) then
690 new_tokens(gk)%str = temp_tokens(gk)%str
691 else
692 new_tokens(gk)%str = ''
693 end if
694 end do
695 new_lengths(1:temp_cap) = temp_token_lengths(1:temp_cap)
696 new_quoted(1:temp_cap) = temp_token_quoted(1:temp_cap)
697 call move_alloc(new_tokens, temp_tokens)
698 call move_alloc(new_lengths, temp_token_lengths)
699 call move_alloc(new_quoted, temp_token_quoted)
700 temp_cap = new_cap
701 end subroutine grow_temp_arrays
702
703 end subroutine
704
705 subroutine expand_command_globs(cmd, shell)
706 use glob, only: expand_glob_patterns
707 type(command_t), intent(inout) :: cmd
708 type(shell_state_t), intent(in) :: shell
709
710 character(len=MAX_TOKEN_LEN), allocatable :: expanded_tokens(:)
711 character(len=:), allocatable :: original_tokens(:)
712 integer :: expanded_count, i, tok_char_len
713 logical :: has_expandable
714
715 if (.not. allocated(cmd%tokens) .or. cmd%num_tokens == 0) return
716
717 ! Skip glob expansion if noglob option is enabled (set -f)
718 if (shell%option_noglob) return
719
720 ! Preserve the character width from expand_tokens (may be > MAX_TOKEN_LEN)
721 tok_char_len = len(cmd%tokens(1))
722
723 ! Save original tokens with matching character width
724 allocate(character(len=tok_char_len) :: original_tokens(cmd%num_tokens))
725 do i = 1, cmd%num_tokens
726 original_tokens(i) = cmd%tokens(i)
727 end do
728
729 ! Don't glob expand tokens that were quoted, escaped, or have backslashes
730 ! Check metadata if available, otherwise fall back to checking for backslash
731 has_expandable = .false.
732 do i = 1, cmd%num_tokens
733 ! Skip if token was quoted (prevents glob expansion per POSIX)
734 if (allocated(cmd%token_quoted)) then
735 if (i <= size(cmd%token_quoted) .and. cmd%token_quoted(i)) then
736 cycle ! Skip this token - it was quoted
737 end if
738 end if
739
740 ! Skip if token was escaped (metadata available) or has backslash (fallback)
741 if (allocated(cmd%token_escaped)) then
742 ! Use metadata if available
743 if (i <= size(cmd%token_escaped) .and. cmd%token_escaped(i)) then
744 cycle ! Skip this token - it was escaped
745 end if
746 else if (index(cmd%tokens(i), '\') > 0) then
747 ! Fallback: check for backslash in token
748 cycle ! Skip this token - it has a backslash
749 end if
750
751 ! Check if token has glob characters
752 if (index(cmd%tokens(i), '*') > 0 .or. &
753 index(cmd%tokens(i), '?') > 0 .or. &
754 index(cmd%tokens(i), '[') > 0) then
755 has_expandable = .true.
756 exit
757 end if
758 end do
759
760 if (.not. has_expandable) then
761 ! No tokens need glob expansion
762 if (allocated(original_tokens)) deallocate(original_tokens)
763 return
764 end if
765
766 ! Expand glob patterns (pass token_quoted to prevent glob expansion on quoted tokens)
767 if (allocated(cmd%token_quoted)) then
768 call expand_glob_patterns(original_tokens, cmd%num_tokens, expanded_tokens, expanded_count, cmd%token_quoted)
769 else
770 call expand_glob_patterns(original_tokens, cmd%num_tokens, expanded_tokens, expanded_count)
771 end if
772
773 ! Replace command tokens with expanded ones
774 if (allocated(cmd%tokens)) deallocate(cmd%tokens)
775
776 if (expanded_count > 0) then
777 allocate(character(len=max(tok_char_len, MAX_TOKEN_LEN)) :: cmd%tokens(expanded_count))
778 do i = 1, expanded_count
779 cmd%tokens(i) = expanded_tokens(i)
780 end do
781 cmd%num_tokens = expanded_count
782
783 ! Update token_lengths to match new tokens (use trimmed length)
784 if (allocated(cmd%token_lengths)) deallocate(cmd%token_lengths)
785 allocate(cmd%token_lengths(expanded_count))
786 do i = 1, expanded_count
787 cmd%token_lengths(i) = len_trim(expanded_tokens(i))
788 end do
789
790 ! Reset token_quoted and token_escaped for expanded tokens
791 ! (glob-expanded filenames are not quoted)
792 if (allocated(cmd%token_quoted)) deallocate(cmd%token_quoted)
793 allocate(cmd%token_quoted(expanded_count))
794 cmd%token_quoted = .false.
795
796 if (allocated(cmd%token_escaped)) deallocate(cmd%token_escaped)
797 allocate(cmd%token_escaped(expanded_count))
798 cmd%token_escaped = .false.
799 else
800 ! No expansion occurred - restore original
801 allocate(character(len=tok_char_len) :: cmd%tokens(cmd%num_tokens))
802 do i = 1, cmd%num_tokens
803 cmd%tokens(i) = original_tokens(i)
804 end do
805 end if
806
807 ! Cleanup
808 if (allocated(expanded_tokens)) deallocate(expanded_tokens)
809 if (allocated(original_tokens)) deallocate(original_tokens)
810 end subroutine
811
812 subroutine process_command_escapes(cmd)
813 type(command_t), intent(inout) :: cmd
814 integer :: i, j, k, token_len
815 logical :: in_quotes
816 character(len=1) :: quote_char, backslash
817 integer :: result_len
818
819 backslash = char(92) ! ASCII for backslash
820
821 ! Use the character length of the token array for result buffer
822 result_len = len(cmd%tokens(1))
823
824 do i = 1, cmd%num_tokens
825 token_len = len_trim(cmd%tokens(i))
826 block
827 character(len=result_len) :: result
828 result = ''
829 k = 0 ! Count of characters written to result
830 j = 1
831 in_quotes = .false.
832 quote_char = ' '
833
834 do while (j <= token_len)
835 ! Track quote state
836 if (.not. in_quotes .and. (cmd%tokens(i)(j:j) == '"' .or. cmd%tokens(i)(j:j) == "'")) then
837 in_quotes = .true.
838 quote_char = cmd%tokens(i)(j:j)
839 k = k + 1
840 result(k:k) = cmd%tokens(i)(j:j)
841 j = j + 1
842 else if (in_quotes .and. cmd%tokens(i)(j:j) == quote_char) then
843 in_quotes = .false.
844 k = k + 1
845 result(k:k) = cmd%tokens(i)(j:j)
846 j = j + 1
847 else if (.not. in_quotes .and. cmd%tokens(i)(j:j) == backslash .and. j < token_len) then
848 ! Check what character follows the backslash
849 ! Only process structural escapes (space, glob characters)
850 if (cmd%tokens(i)(j+1:j+1) == ' ' .or. &
851 cmd%tokens(i)(j+1:j+1) == '*' .or. &
852 cmd%tokens(i)(j+1:j+1) == '?' .or. &
853 cmd%tokens(i)(j+1:j+1) == '[') then
854 ! Structural escape - skip backslash, keep next char
855 j = j + 1
856 k = k + 1
857 result(k:k) = cmd%tokens(i)(j:j)
858 j = j + 1
859 else
860 ! Non-structural escape (like \n, \t) - keep both backslash and next char
861 k = k + 1
862 result(k:k) = backslash
863 j = j + 1
864 if (j <= token_len) then
865 k = k + 1
866 result(k:k) = cmd%tokens(i)(j:j)
867 j = j + 1
868 end if
869 end if
870 else
871 ! Regular character
872 k = k + 1
873 result(k:k) = cmd%tokens(i)(j:j)
874 j = j + 1
875 end if
876 end do
877
878 ! Only copy the actual content (k characters)
879 if (k > 0) then
880 cmd%tokens(i) = result(1:k)
881 else
882 cmd%tokens(i) = ''
883 end if
884 end block
885 end do
886 end subroutine
887
888 end module pipeline_helpers
889