Fortran · 106871 bytes Raw Blame History
1 ! ==============================================================================
2 ! Module: parser
3 ! Purpose: Command line parsing and tokenization
4 ! ==============================================================================
5 module parser
6 use shell_types
7 use system_interface
8 use variables ! includes check_nounset
9 use glob
10 use error_handling
11 use performance
12 use iso_fortran_env, only: error_unit, input_unit
13 #ifdef USE_C_STRINGS
14 use iso_c_binding, only: c_char, c_int, c_ptr, c_size_t, c_f_pointer
15 #endif
16 implicit none
17
18 ! Export backtick conversion for new parser
19 public :: convert_backticks_to_dollar_paren
20 public :: needs_compound_continuation
21 public :: remove_line_continuations
22
23 contains
24
25 subroutine parse_pipeline(input, pipeline)
26 character(len=*), intent(in) :: input
27 type(pipeline_t), intent(out) :: pipeline
28
29 character(len=len(input)) :: working_input
30 integer :: start, cmd_count
31 integer :: i, newline_pos
32 type(command_t), allocatable :: temp_commands(:)
33 logical :: background, in_quotes, in_param_expansion, in_for_arith, after_case_in
34 character(len=1) :: quote_char
35 integer :: paren_depth, brace_depth, case_depth
36
37 integer(int64) :: parse_start_time
38
39 ! Start performance timing
40 call start_timer('parse_pipeline', parse_start_time)
41
42 ! Validate input
43 if (.not. validate_command(input)) then
44 call parser_error(101, 'Invalid command input', 'parse_pipeline')
45 pipeline%num_commands = 0
46 return
47 end if
48
49 call debug_log('Parsing pipeline: ' // trim(input), 'parse_pipeline')
50
51 allocate(temp_commands(MAX_PIPELINE))
52 call track_allocation(MAX_PIPELINE * 1024, 'temp_commands')
53
54 ! Strip comments (# to end of line, but not inside quotes or ${})
55 working_input = input
56 in_quotes = .false.
57 quote_char = ' '
58 in_param_expansion = .false.
59 do i = 1, len_trim(working_input)
60 if (in_quotes) then
61 if (working_input(i:i) == quote_char) then
62 in_quotes = .false.
63 end if
64 else
65 ! Track ${...} parameter expansion
66 if (i > 1 .and. working_input(i:i) == '{') then
67 if (working_input(i-1:i-1) == '$') in_param_expansion = .true.
68 else if (in_param_expansion .and. working_input(i:i) == '}') then
69 in_param_expansion = .false.
70 end if
71
72 if (working_input(i:i) == '"' .or. working_input(i:i) == "'") then
73 in_quotes = .true.
74 quote_char = working_input(i:i)
75 else if (working_input(i:i) == '#' .and. .not. in_param_expansion) then
76 ! Only treat # as comment if not part of $# or ${...}
77 if (i > 1) then; if (working_input(i-1:i-1) == '$') then
78 ! This is $#, not a comment
79 cycle
80 end if; end if
81 ! Found comment - remove from # to end of line (but keep newline and rest)
82 ! Find the next newline
83 newline_pos = index(working_input(i:), char(10))
84 if (newline_pos > 0) then
85 ! There's a newline - remove comment but keep newline and everything after
86 working_input = working_input(:i-1) // working_input(i+newline_pos-1:)
87 else
88 ! No newline - truncate to end
89 working_input = working_input(:i-1)
90 end if
91 exit
92 end if
93 end if
94 end do
95 cmd_count = 0
96 start = 1
97 background = .false.
98
99 ! Check for background execution (&)
100 if (len_trim(working_input) > 0) then
101 if (working_input(len_trim(working_input):len_trim(working_input)) == '&') then
102 background = .true.
103 working_input = working_input(:len_trim(working_input)-1)
104 end if
105 end if
106
107 ! Convert backticks to $() format BEFORE tokenization
108 ! This ensures complete backtick expressions are converted together
109 working_input = convert_backticks_to_dollar_paren(working_input)
110
111 ! Parse commands and separators (track quotes to avoid splitting inside them)
112 i = 1
113 in_quotes = .false.
114 quote_char = ' '
115 in_for_arith = .false.
116 paren_depth = 0
117 brace_depth = 0
118 case_depth = 0
119 after_case_in = .false.
120 do while (i <= len_trim(working_input))
121 ! Track quote state
122 if (.not. in_quotes) then
123 if (working_input(i:i) == '"' .or. working_input(i:i) == "'") then
124 in_quotes = .true.
125 quote_char = working_input(i:i)
126 end if
127 else
128 if (working_input(i:i) == quote_char) then
129 in_quotes = .false.
130 end if
131 end if
132
133 ! Only check for operators outside quotes
134 if (.not. in_quotes) then
135 ! Detect for (( at the beginning of command
136 if (.not. in_for_arith .and. i <= len_trim(working_input)-1) then
137 if (working_input(i:i+1) == '((') then
138 ! Check if 'for' appears before this
139 if (start <= i-1) then
140 if (index(working_input(start:i-1), 'for') > 0) then
141 in_for_arith = .true.
142 paren_depth = 0 ! Will be counted as we process
143 end if
144 end if
145 end if
146 end if
147
148 ! Track parentheses depth (for subshells and for ((..)))
149 ! Skip tracking for $( which is command substitution
150 if (working_input(i:i) == '(') then
151 ! Check if this is $( command substitution - if so, skip tracking
152 if (i == 1) then
153 paren_depth = paren_depth + 1
154 else if (working_input(i-1:i-1) /= '$') then
155 paren_depth = paren_depth + 1
156 end if
157 else if (working_input(i:i) == ')') then
158 if (paren_depth > 0) then
159 paren_depth = paren_depth - 1
160 ! Exit for (( when we've closed all parens
161 if (in_for_arith .and. paren_depth == 0) then
162 in_for_arith = .false.
163 end if
164 end if
165 end if
166
167 ! Track brace depth for function definitions
168 if (working_input(i:i) == '{') then
169 brace_depth = brace_depth + 1
170 else if (working_input(i:i) == '}') then
171 brace_depth = brace_depth - 1
172 end if
173
174 ! Track case statement depth (case...esac)
175 ! Check for 'case' keyword at word boundary
176 if (i <= len_trim(working_input) - 3) then
177 if (working_input(i:i+3) == 'case') then
178 ! Verify it's a word boundary (space or start of command before it)
179 block
180 logical :: at_word_boundary
181 if (i == 1) then
182 at_word_boundary = .true.
183 else
184 at_word_boundary = (working_input(i-1:i-1) == ' ' .or. working_input(i-1:i-1) == ';')
185 end if
186 if (at_word_boundary) then
187 ! Verify word boundary after (space or special char)
188 if (i+4 > len_trim(working_input) .or. working_input(i+4:i+4) == ' ' .or. &
189 working_input(i+4:i+4) == ';') then
190 case_depth = case_depth + 1
191 after_case_in = .false. ! Reset when we see 'case'
192 end if
193 end if
194 end block
195 end if
196 end if
197
198 ! Check for ' in ' keyword inside case statement (split after it)
199 if (case_depth > 0 .and. .not. after_case_in) then
200 if (i <= len_trim(working_input) - 2) then
201 if (working_input(i:i+1) == 'in') then
202 ! Verify word boundary before (space)
203 if (i > 1) then; if (working_input(i-1:i-1) == ' ') then
204 ! Verify word boundary after (space or end)
205 if (i+2 > len_trim(working_input) .or. working_input(i+2:i+2) == ' ') then
206 ! Split after ' in '
207 ! Find the end of 'in' plus any trailing space
208 if (i+2 <= len_trim(working_input) .and. working_input(i+2:i+2) == ' ') then
209 cmd_count = cmd_count + 1
210 if (cmd_count <= MAX_PIPELINE) then
211 call parse_single_command(working_input(start:i+1), temp_commands(cmd_count))
212 temp_commands(cmd_count)%separator = SEP_SEMICOLON
213 end if
214 start = i + 3 ! Skip 'in '
215 after_case_in = .true.
216 else
217 cmd_count = cmd_count + 1
218 if (cmd_count <= MAX_PIPELINE) then
219 call parse_single_command(working_input(start:i+1), temp_commands(cmd_count))
220 temp_commands(cmd_count)%separator = SEP_SEMICOLON
221 end if
222 start = i + 2 ! Skip 'in'
223 after_case_in = .true.
224 end if
225 i = start - 1 ! Will be incremented at end of loop
226 cycle
227 end if
228 end if; end if ! i > 1 guard
229 end if
230 end if
231 end if
232
233 ! Check for 'esac' keyword at word boundary
234 if (i <= len_trim(working_input) - 3) then
235 if (working_input(i:i+3) == 'esac') then
236 ! Verify it's a word boundary (space or start before it)
237 block
238 logical :: esac_word_boundary
239 if (i == 1) then
240 esac_word_boundary = .true.
241 else
242 esac_word_boundary = (working_input(i-1:i-1) == ' ' .or. working_input(i-1:i-1) == ';')
243 end if
244 if (esac_word_boundary) then
245 ! Verify word boundary after (space, semicolon, or end)
246 if (i+4 > len_trim(working_input) .or. working_input(i+4:i+4) == ' ' .or. &
247 working_input(i+4:i+4) == ';') then
248 case_depth = case_depth - 1
249 if (case_depth < 0) case_depth = 0 ! Prevent negative
250 after_case_in = .false. ! Reset after esac
251 end if
252 end if
253 end block
254 end if
255 end if
256
257 ! Check for operators (but skip if inside parentheses/subshell)
258 if (i <= len_trim(working_input) - 1 .and. paren_depth == 0) then
259 if (working_input(i:i+1) == '&&') then
260 cmd_count = cmd_count + 1
261 if (cmd_count <= MAX_PIPELINE) then
262 call parse_single_command(working_input(start:i-1), temp_commands(cmd_count))
263 temp_commands(cmd_count)%separator = SEP_AND
264 end if
265 start = i + 2
266 i = i + 2
267 cycle
268 else if (working_input(i:i+1) == '||') then
269 cmd_count = cmd_count + 1
270 if (cmd_count <= MAX_PIPELINE) then
271 call parse_single_command(working_input(start:i-1), temp_commands(cmd_count))
272 temp_commands(cmd_count)%separator = SEP_OR
273 end if
274 start = i + 2
275 i = i + 2
276 cycle
277 end if
278 end if
279
280 block
281 logical :: pipe_not_after_pipe, pipe_not_after_gt, pipe_not_before_pipe
282 if (i == 1) then
283 pipe_not_after_pipe = .true.
284 pipe_not_after_gt = .true.
285 else
286 pipe_not_after_pipe = (working_input(i-1:i-1) /= '|')
287 pipe_not_after_gt = (working_input(i-1:i-1) /= '>')
288 end if
289 if (i == len_trim(working_input)) then
290 pipe_not_before_pipe = .true.
291 else
292 pipe_not_before_pipe = (working_input(i+1:i+1) /= '|')
293 end if
294 if (working_input(i:i) == '|' .and. pipe_not_after_pipe .and. &
295 pipe_not_after_gt .and. pipe_not_before_pipe) then
296 ! Don't split on | if we're in a case pattern (after 'case...in') or inside parentheses (subshell)
297 if (.not. after_case_in .and. paren_depth == 0) then
298 cmd_count = cmd_count + 1
299 if (cmd_count <= MAX_PIPELINE) then
300 call parse_single_command(working_input(start:i-1), temp_commands(cmd_count))
301 temp_commands(cmd_count)%separator = SEP_PIPE
302 end if
303 start = i + 1
304 end if
305 else if (working_input(i:i) == '&') then
306 ! Check it's not part of && (which is already handled above)
307 ! Also check it's not part of >& or <& (FD redirection)
308 block
309 logical :: amp_not_after_special, amp_not_before_amp
310 if (i == 1) then
311 amp_not_after_special = .true.
312 else
313 amp_not_after_special = (working_input(i-1:i-1) /= '&' .and. &
314 working_input(i-1:i-1) /= '>' .and. &
315 working_input(i-1:i-1) /= '<')
316 end if
317 if (i == len_trim(working_input)) then
318 amp_not_before_amp = .true.
319 else
320 amp_not_before_amp = (working_input(i+1:i+1) /= '&')
321 end if
322 if (amp_not_after_special .and. amp_not_before_amp) then
323 ! Single & - mark current command as background and split
324 cmd_count = cmd_count + 1
325 if (cmd_count <= MAX_PIPELINE) then
326 call parse_single_command(working_input(start:i-1), temp_commands(cmd_count))
327 temp_commands(cmd_count)%separator = SEP_SEMICOLON ! Execute like semicolon
328 temp_commands(cmd_count)%background = .true. ! But run in background
329 end if
330 start = i + 1
331 end if
332 end block ! amp_not_after_special bounds-safe check
333 else if (working_input(i:i) == ';') then
334 ! Check for ;; (double semicolon) which is used in case statements
335 if (i < len_trim(working_input) .and. working_input(i+1:i+1) == ';') then
336 ! This is ;; - inside case statements it's a pattern terminator
337 if (case_depth > 0) then
338 cmd_count = cmd_count + 1
339 if (cmd_count <= MAX_PIPELINE) then
340 call parse_single_command(working_input(start:i-1), temp_commands(cmd_count))
341 temp_commands(cmd_count)%separator = SEP_SEMICOLON
342 end if
343 start = i + 2 ! Skip both semicolons
344 i = i + 1 ! Will be incremented again at end of loop
345 after_case_in = .false. ! Reset after ;;, ready for next pattern
346 else
347 ! POSIX: ;; outside case is treated as two semicolons (null command + separator)
348 ! Parse command before first semicolon (if any)
349 if (i > start) then
350 cmd_count = cmd_count + 1
351 if (cmd_count <= MAX_PIPELINE) then
352 call parse_single_command(working_input(start:i-1), temp_commands(cmd_count))
353 temp_commands(cmd_count)%separator = SEP_SEMICOLON
354 end if
355 end if
356 start = i + 2 ! Skip both semicolons
357 i = i + 1 ! Will be incremented again at end of loop
358 end if
359 ! Only split on single semicolon if not inside for (( ... )), function braces, subshells, or case statement
360 else if (in_for_arith .or. brace_depth > 0 .or. paren_depth > 0 .or. case_depth > 0) then
361 ! Skip - we're inside for (( ... )), function { ... }, subshell ( ... ), or case...esac
362 else
363 ! POSIX: semicolon at start is a null command - just skip it
364 if (i == start) then
365 start = i + 1
366 else
367 cmd_count = cmd_count + 1
368 if (cmd_count <= MAX_PIPELINE) then
369 call parse_single_command(working_input(start:i-1), temp_commands(cmd_count))
370 temp_commands(cmd_count)%separator = SEP_SEMICOLON
371 end if
372 start = i + 1
373 end if
374 end if
375 end if
376 end block ! pipe_not_after_pipe bounds-safe check
377 end if
378
379 i = i + 1
380 end do
381
382 ! Don't forget the last command
383 if (start <= len_trim(working_input)) then
384 cmd_count = cmd_count + 1
385 if (cmd_count <= MAX_PIPELINE) then
386 call parse_single_command(working_input(start:), temp_commands(cmd_count))
387 temp_commands(cmd_count)%separator = SEP_NONE
388 end if
389 end if
390
391 ! Set background flag for last command
392 if (cmd_count > 0 .and. background) then
393 temp_commands(cmd_count)%background = .true.
394 end if
395
396 ! Copy to pipeline with explicit token copying
397 pipeline%num_commands = cmd_count
398 if (cmd_count > 0) then
399 allocate(pipeline%commands(cmd_count))
400 do i = 1, cmd_count
401 ! Copy non-allocatable components
402 pipeline%commands(i)%num_tokens = temp_commands(i)%num_tokens
403 pipeline%commands(i)%append_output = temp_commands(i)%append_output
404 pipeline%commands(i)%append_error = temp_commands(i)%append_error
405 pipeline%commands(i)%force_clobber = temp_commands(i)%force_clobber
406 pipeline%commands(i)%redirect_stderr_to_stdout = temp_commands(i)%redirect_stderr_to_stdout
407 pipeline%commands(i)%redirect_stdout_to_stderr = temp_commands(i)%redirect_stdout_to_stderr
408 pipeline%commands(i)%redirect_both_to_file = temp_commands(i)%redirect_both_to_file
409 pipeline%commands(i)%background = temp_commands(i)%background
410 pipeline%commands(i)%separator = temp_commands(i)%separator
411 pipeline%commands(i)%is_command_group = temp_commands(i)%is_command_group
412 pipeline%commands(i)%is_subshell = temp_commands(i)%is_subshell
413
414 ! Copy redirection array
415 pipeline%commands(i)%num_redirections = temp_commands(i)%num_redirections
416 pipeline%commands(i)%redirections = temp_commands(i)%redirections
417
418 ! Copy prefix assignments (VAR=value command)
419 pipeline%commands(i)%num_prefix_assignments = temp_commands(i)%num_prefix_assignments
420 if (allocated(temp_commands(i)%prefix_assignments) .and. &
421 temp_commands(i)%num_prefix_assignments > 0) then
422 pipeline%commands(i)%prefix_assignments = temp_commands(i)%prefix_assignments
423 end if
424
425 ! Copy allocatable components explicitly
426 if (allocated(temp_commands(i)%tokens)) then
427 allocate(character(len=MAX_TOKEN_LEN) :: pipeline%commands(i)%tokens(temp_commands(i)%num_tokens))
428 pipeline%commands(i)%tokens = temp_commands(i)%tokens
429 end if
430
431 if (allocated(temp_commands(i)%input_file)) then
432 pipeline%commands(i)%input_file = temp_commands(i)%input_file
433 end if
434 if (allocated(temp_commands(i)%output_file)) then
435 pipeline%commands(i)%output_file = temp_commands(i)%output_file
436 end if
437 if (allocated(temp_commands(i)%error_file)) then
438 pipeline%commands(i)%error_file = temp_commands(i)%error_file
439 end if
440 if (allocated(temp_commands(i)%heredoc_delimiter)) then
441 pipeline%commands(i)%heredoc_delimiter = temp_commands(i)%heredoc_delimiter
442 end if
443 if (allocated(temp_commands(i)%heredoc_content)) then
444 pipeline%commands(i)%heredoc_content = temp_commands(i)%heredoc_content
445 end if
446 ! Copy heredoc quoted flag
447 pipeline%commands(i)%heredoc_quoted = temp_commands(i)%heredoc_quoted
448
449 if (allocated(temp_commands(i)%here_string)) then
450 pipeline%commands(i)%here_string = temp_commands(i)%here_string
451 end if
452 if (allocated(temp_commands(i)%group_content)) then
453 pipeline%commands(i)%group_content = temp_commands(i)%group_content
454 end if
455 if (allocated(temp_commands(i)%subshell_content)) then
456 pipeline%commands(i)%subshell_content = temp_commands(i)%subshell_content
457 end if
458 end do
459 end if
460
461 deallocate(temp_commands)
462 call track_deallocation(MAX_PIPELINE * 1024, 'temp_commands')
463
464 ! End performance timing
465 call end_timer('parse_pipeline', parse_start_time, total_parse_time)
466 total_commands = total_commands + 1
467
468 ! Trigger auto memory optimization periodically
469 if (mod(total_commands, 50_int64) == 0) then
470 call auto_optimize_memory()
471 end if
472 end subroutine
473
474 subroutine parse_single_command(input, cmd)
475 character(len=*), intent(in) :: input
476 type(command_t), intent(out) :: cmd
477
478 character(len=len(input)) :: working_input
479 integer :: pos, end_pos, source_fd
480 character(len=MAX_TOKEN_LEN) :: temp_str
481
482
483 working_input = adjustl(input)
484 ! write(error_unit, '(a,a)') 'DEBUG parse_single_command input: ', trim(working_input)
485
486 ! Handle subshell grouping ( ... )
487 ! Check if input starts with ( and ends with )
488 if (len_trim(working_input) >= 3) then
489 if (working_input(1:1) == '(') then
490 ! Find the position of the closing )
491 pos = len_trim(working_input)
492 if (working_input(pos:pos) == ')') then
493 ! This is a subshell - mark it and store the inner content
494 cmd%is_subshell = .true.
495 cmd%subshell_content = adjustl(working_input(2:pos-1))
496 ! Don't tokenize the content - it will be re-parsed during execution
497 cmd%num_tokens = 0
498 return
499 end if
500 end if
501 end if
502
503 ! Handle command grouping { ... }
504 ! Check if input starts with { and ends with }
505 if (len_trim(working_input) >= 3) then
506 if (working_input(1:1) == '{') then
507 ! Find the position of the closing }
508 pos = len_trim(working_input)
509 if (working_input(pos:pos) == '}') then
510 ! This is a command group - mark it and store the inner content
511 cmd%is_command_group = .true.
512 cmd%group_content = adjustl(working_input(2:pos-1))
513 ! Don't tokenize the content - it will be re-parsed during execution
514 cmd%num_tokens = 0
515 return
516 end if
517 end if
518 end if
519
520 ! Skip redirection processing for arithmetic commands ((expression)) and for (( loops
521 if (len_trim(working_input) >= 2 .and. working_input(1:2) == '((') then
522 ! This is an arithmetic command - tokenize directly without processing redirects
523 call tokenize_with_substitution(trim(working_input), cmd%tokens, cmd%num_tokens)
524 return
525 end if
526
527 ! Also skip redirection processing for arithmetic for loops for((...))
528 if (len_trim(working_input) >= 5 .and. working_input(1:5) == 'for((') then
529 ! This is an arithmetic for loop - tokenize directly without processing redirects
530 call tokenize_with_substitution(trim(working_input), cmd%tokens, cmd%num_tokens)
531 return
532 end if
533
534 ! Also handle 'for ((' with space (standard bash syntax)
535 if (len_trim(working_input) >= 6 .and. working_input(1:6) == 'for ((') then
536 ! This is an arithmetic for loop with space - tokenize directly without processing redirects
537 call tokenize_with_substitution(trim(working_input), cmd%tokens, cmd%num_tokens)
538 return
539 end if
540
541 ! Check for here-string (<<<) - must come before here document
542 pos = find_outside_quotes(working_input, '<<<')
543 if (pos > 0) then
544 call extract_filename(working_input(pos+3:), temp_str)
545 cmd%here_string = trim(temp_str)
546 working_input = working_input(:pos-1)
547 else
548 ! Check for here document (<<)
549 pos = find_outside_quotes(working_input, '<<')
550 if (pos > 0) then
551 call extract_word(working_input(pos+2:), temp_str)
552 ! Strip quotes from delimiter if present and track if it was quoted
553 cmd%heredoc_delimiter = trim(temp_str)
554 cmd%heredoc_quoted = strip_heredoc_delimiter_quotes(cmd%heredoc_delimiter)
555 ! Try to extract heredoc content from input if it contains newlines
556 call extract_heredoc_from_input(input, trim(cmd%heredoc_delimiter), cmd%heredoc_content)
557 working_input = working_input(:pos-1)
558 end if
559 end if
560
561 ! Check for specific 2>&1 redirection (stderr to stdout) - keep this as it's common
562 ! and is best handled specially
563 pos = find_outside_quotes(working_input, '2>&1')
564 if (pos > 0) then
565 cmd%redirect_stderr_to_stdout = .true.
566 working_input = working_input(:pos-1) // ' ' // working_input(pos+4:)
567 end if
568
569 ! Note: >&2, 1>&2, and other FD redirections are now handled by the general
570 ! FD duplication code below
571
572 ! Check for variable FD redirections >&${var} and <&${var}
573 pos = find_outside_quotes(working_input, '>&$')
574 if (pos > 0 .and. pos + 3 <= len_trim(working_input)) then
575 if (working_input(pos+3:pos+3) == '{') then
576 ! Found >&${...} pattern
577 end_pos = index(working_input(pos+4:), '}')
578 if (end_pos > 0) then
579 end_pos = pos + 3 + end_pos ! Adjust for full string position
580 ! Store variable expression and add redirection
581 if (cmd%num_redirections < 10) then
582 cmd%num_redirections = cmd%num_redirections + 1
583 cmd%redirections(cmd%num_redirections)%type = REDIR_DUP_OUT
584 cmd%redirections(cmd%num_redirections)%fd = 1 ! Default stdout
585 cmd%redirections(cmd%num_redirections)%target_fd = -1 ! Will be resolved at runtime
586 cmd%redirections(cmd%num_redirections)%target_fd_expr = working_input(pos+2:end_pos) ! Include ${...}
587 ! Remove from working input
588 working_input = working_input(:pos-1) // ' ' // working_input(end_pos+1:)
589 end if
590 end if
591 end if
592 end if
593
594 pos = find_outside_quotes(working_input, '<&$')
595 if (pos > 0 .and. pos + 3 <= len_trim(working_input)) then
596 if (working_input(pos+3:pos+3) == '{') then
597 ! Found <&${...} pattern
598 end_pos = index(working_input(pos+4:), '}')
599 if (end_pos > 0) then
600 end_pos = pos + 3 + end_pos ! Adjust for full string position
601 ! Store variable expression and add redirection
602 if (cmd%num_redirections < 10) then
603 cmd%num_redirections = cmd%num_redirections + 1
604 cmd%redirections(cmd%num_redirections)%type = REDIR_DUP_IN
605 cmd%redirections(cmd%num_redirections)%fd = 0 ! Default stdin
606 cmd%redirections(cmd%num_redirections)%target_fd = -1 ! Will be resolved at runtime
607 cmd%redirections(cmd%num_redirections)%target_fd_expr = working_input(pos+2:end_pos) ! Include ${...}
608 ! Remove from working input
609 working_input = working_input(:pos-1) // ' ' // working_input(end_pos+1:)
610 end if
611 end if
612 end if
613 end if
614
615 ! Check for general FD duplication >&n FIRST (must come before file redirections)
616 ! Use find_outside_quotes and iterate to find all patterns
617 pos = find_outside_quotes(working_input, '>&')
618 do while (pos > 0)
619 ! Debug output
620 ! write(error_unit, '(a,i15,a,a)') 'DEBUG: Found >& at pos ', pos, ' in: ', trim(working_input)
621
622 ! Check what follows >&
623 if (pos+2 <= len_trim(working_input)) then
624 ! write(error_unit, '(a,a)') 'DEBUG: Character after >& is: ', working_input(pos+2:pos+2)
625
626 if (working_input(pos+2:pos+2) >= '0' .and. working_input(pos+2:pos+2) <= '9') then
627 ! Found >&n pattern - literal FD duplication
628 read(working_input(pos+2:pos+2), *) end_pos
629 ! write(error_unit, '(a,i15)') 'DEBUG: Processing >&n where n=', end_pos
630
631 ! Check if there's a source FD before >&
632 source_fd = 1 ! Default to stdout
633 if (pos > 1 .and. working_input(pos-1:pos-1) >= '0' .and. working_input(pos-1:pos-1) <= '9') then
634 read(working_input(pos-1:pos-1), *) source_fd
635 ! write(error_unit, '(a,i15,a,i15)') 'DEBUG: Found source FD=', source_fd, ' redirecting to target FD=', end_pos
636 end if
637
638 if (cmd%num_redirections < 10) then
639 cmd%num_redirections = cmd%num_redirections + 1
640 cmd%redirections(cmd%num_redirections)%type = REDIR_DUP_OUT
641 cmd%redirections(cmd%num_redirections)%fd = source_fd
642 cmd%redirections(cmd%num_redirections)%target_fd = end_pos
643 end if
644 ! Remove from working input - also remove source FD if present
645 if (pos > 1 .and. working_input(pos-1:pos-1) >= '0' .and. working_input(pos-1:pos-1) <= '9') then
646 working_input = working_input(:pos-2) // ' ' // working_input(pos+3:)
647 else
648 working_input = working_input(:pos-1) // ' ' // working_input(pos+3:)
649 end if
650 ! write(error_unit, '(a,a)') 'DEBUG: After removal: ', trim(working_input)
651 ! Search again from beginning since we modified the string
652 pos = find_outside_quotes(working_input, '>&')
653 else
654 ! Not a digit after >&, skip this occurrence and find next
655 ! write(error_unit, '(a)') 'DEBUG: Not a digit after >&, exiting FD dup handler'
656 ! For now, just exit - we'll let the general > handler deal with it
657 exit
658 end if
659 else
660 ! write(error_unit, '(a)') 'DEBUG: No character after >&, exiting'
661 exit
662 end if
663 end do
664
665 ! if (pos == 0) then
666 ! write(error_unit, '(a)') 'DEBUG: No >& patterns found'
667 ! end if
668
669 ! Check for <&n patterns
670 pos = find_outside_quotes(working_input, '<&')
671 do while (pos > 0)
672 ! Check what follows <&
673 if (pos+2 <= len_trim(working_input)) then
674 if (working_input(pos+2:pos+2) >= '0' .and. working_input(pos+2:pos+2) <= '9') then
675 ! Found <&n pattern - literal FD duplication
676 read(working_input(pos+2:pos+2), *) end_pos
677 if (cmd%num_redirections < 10) then
678 cmd%num_redirections = cmd%num_redirections + 1
679 cmd%redirections(cmd%num_redirections)%type = REDIR_DUP_IN
680 cmd%redirections(cmd%num_redirections)%fd = 0 ! Default stdin
681 cmd%redirections(cmd%num_redirections)%target_fd = end_pos
682 end if
683 ! Remove from working input
684 working_input = working_input(:pos-1) // ' ' // working_input(pos+3:)
685 ! Search again from beginning since we modified the string
686 pos = find_outside_quotes(working_input, '<&')
687 else
688 ! Not a digit after <&, skip this occurrence and find next
689 ! For now, just exit - we'll let other handlers deal with it
690 exit
691 end if
692 else
693 exit
694 end if
695 end do
696
697 ! Now continue with other redirections
698 if (.not. (cmd%redirect_stderr_to_stdout)) then
699 ! Check for &>file or &>>file (both stdout and stderr to file)
700 pos = find_outside_quotes(working_input, '&>>')
701 if (pos > 0) then
702 cmd%redirect_both_to_file = .true.
703 cmd%append_output = .true.
704 cmd%append_error = .true.
705 call extract_filename(working_input(pos+3:), temp_str)
706 cmd%output_file = trim(temp_str)
707 cmd%error_file = trim(temp_str)
708 working_input = working_input(:pos-1)
709 else
710 pos = find_outside_quotes(working_input, '&>')
711 if (pos > 0) then
712 cmd%redirect_both_to_file = .true.
713 cmd%append_output = .false.
714 cmd%append_error = .false.
715 call extract_filename(working_input(pos+2:), temp_str)
716 cmd%output_file = trim(temp_str)
717 cmd%error_file = trim(temp_str)
718 working_input = working_input(:pos-1)
719 end if
720 end if
721 end if
722
723 ! Check for error redirection (2>>)
724 pos = find_outside_quotes(working_input, '2>>')
725 if (pos > 0) then
726 cmd%append_error = .true.
727 call extract_filename(working_input(pos+3:), temp_str)
728 cmd%error_file = trim(temp_str)
729 working_input = working_input(:pos-1)
730 else
731 ! Check for error redirection (2>)
732 pos = find_outside_quotes(working_input, '2>')
733 if (pos > 0) then
734 cmd%append_error = .false.
735 call extract_filename(working_input(pos+2:), temp_str)
736 cmd%error_file = trim(temp_str)
737 working_input = working_input(:pos-1)
738 end if
739 end if
740
741 ! Check for force clobber FIRST (>|) before append (>>)
742 ! This prevents ">|" from matching ">>"
743 pos = find_outside_quotes(working_input, '>|')
744 if (pos > 0) then
745 cmd%append_output = .false.
746 cmd%force_clobber = .true.
747 call extract_filename(working_input(pos+2:), temp_str)
748 cmd%output_file = trim(temp_str)
749 working_input = working_input(:pos-1)
750 else
751 ! Check for output redirection (>>)
752 pos = find_outside_quotes(working_input, '>>')
753 if (pos > 0) then
754 cmd%append_output = .true.
755 call extract_filename(working_input(pos+2:), temp_str)
756 cmd%output_file = trim(temp_str)
757 working_input = working_input(:pos-1)
758 else
759 ! Check for output redirection (>)
760 pos = find_outside_quotes(working_input, '>')
761 if (pos > 0) then
762 cmd%append_output = .false.
763 call extract_filename(working_input(pos+1:), temp_str)
764 cmd%output_file = trim(temp_str)
765 working_input = working_input(:pos-1)
766 end if
767 end if
768 end if
769
770 ! Check for input redirection (<)
771 pos = find_outside_quotes(working_input, '<')
772 if (pos > 0) then
773 call extract_filename(working_input(pos+1:), temp_str)
774 cmd%input_file = trim(temp_str)
775 working_input = working_input(:pos-1)
776 end if
777
778 ! Tokenize the remaining command
779 call tokenize_with_substitution(trim(working_input), cmd%tokens, cmd%num_tokens)
780
781 ! Extract prefix assignments (VAR=value command)
782 call extract_prefix_assignments(cmd)
783
784 end subroutine
785
786 subroutine extract_filename(input, filename)
787 character(len=*), intent(in) :: input
788 character(len=*), intent(out) :: filename
789 integer :: i
790
791 filename = adjustl(input)
792
793 do i = 1, len_trim(filename)
794 if (filename(i:i) == ' ' .or. filename(i:i) == char(9)) then
795 filename = filename(:i-1)
796 exit
797 end if
798 end do
799 end subroutine
800
801 subroutine extract_word(input, word)
802 character(len=*), intent(in) :: input
803 character(len=*), intent(out) :: word
804 integer :: i
805
806 word = adjustl(input)
807
808 do i = 1, len_trim(word)
809 if (word(i:i) == ' ' .or. word(i:i) == char(9) .or. word(i:i) == char(10) .or. &
810 word(i:i) == '<' .or. word(i:i) == '>' .or. &
811 word(i:i) == '|' .or. word(i:i) == '&' .or. &
812 word(i:i) == ';') then
813 word = word(:i-1)
814 exit
815 end if
816 end do
817 end subroutine
818
819 ! Strip quotes from heredoc delimiter ('EOF' -> EOF, "EOF" -> EOF)
820 ! Returns .true. if quotes were found and removed
821 function strip_heredoc_delimiter_quotes(delimiter) result(was_quoted)
822 character(len=*), intent(inout) :: delimiter
823 logical :: was_quoted
824 integer :: len_delim
825 character(len=1) :: first_char, last_char
826
827 was_quoted = .false.
828 len_delim = len_trim(delimiter)
829 if (len_delim < 2) return
830
831 first_char = delimiter(1:1)
832 last_char = delimiter(len_delim:len_delim)
833
834 ! Check if surrounded by matching quotes
835 if ((first_char == "'" .and. last_char == "'") .or. &
836 (first_char == '"' .and. last_char == '"')) then
837 ! Remove surrounding quotes
838 delimiter = delimiter(2:len_delim-1)
839 was_quoted = .true.
840 end if
841 end function
842
843 ! Find position of character outside quotes
844 function find_outside_quotes(str, char) result(pos)
845 character(len=*), intent(in) :: str, char
846 integer :: pos
847 integer :: i
848 logical :: in_quotes, in_arith
849 character(len=1) :: quote_char
850 integer :: arith_depth
851
852 pos = 0
853 in_quotes = .false.
854 in_arith = .false.
855 quote_char = ' '
856 arith_depth = 0
857
858 do i = 1, len_trim(str)
859 ! Check for arithmetic expansion start: $((
860 if (.not. in_quotes .and. i <= len_trim(str) - 2) then
861 if (str(i:i+2) == '$((') then
862 in_arith = .true.
863 arith_depth = arith_depth + 2
864 cycle
865 end if
866 end if
867
868 ! Track parentheses inside arithmetic expressions
869 if (in_arith .and. .not. in_quotes) then
870 if (str(i:i) == '(') then
871 arith_depth = arith_depth + 1
872 else if (str(i:i) == ')') then
873 arith_depth = arith_depth - 1
874 if (arith_depth == 0) then
875 in_arith = .false.
876 end if
877 end if
878 end if
879
880 if (.not. in_quotes .and. .not. in_arith) then
881 if (str(i:i) == '"' .or. str(i:i) == "'") then
882 in_quotes = .true.
883 quote_char = str(i:i)
884 else if (str(i:min(i+len(char)-1, len_trim(str))) == char) then
885 pos = i
886 return
887 end if
888 else if (in_quotes) then
889 if (str(i:i) == quote_char) then
890 in_quotes = .false.
891 end if
892 end if
893 end do
894 end function
895
896 subroutine tokenize_with_substitution(input, tokens, num_tokens)
897 character(len=*), intent(in) :: input
898 character(len=:), allocatable, intent(out) :: tokens(:)
899 integer, intent(out) :: num_tokens
900
901 character(len=len(input)) :: working_copy
902 integer :: pos, start, token_count, i
903 character(len=MAX_TOKEN_LEN), allocatable :: temp_tokens(:)
904 logical :: in_quotes, in_arith, in_array_literal, in_cmd_subst, escaped
905 character :: quote_char
906 integer :: arith_depth, array_depth, cmd_depth
907
908 working_copy = adjustl(input)
909 if (len_trim(working_copy) == 0) then
910 num_tokens = 0
911 return
912 end if
913
914 ! Count tokens first - must track quotes, $(( )), and array literals
915 token_count = 0
916 pos = 1
917 do while (pos <= len_trim(working_copy))
918 ! Skip whitespace
919 do while (pos <= len_trim(working_copy) .and. working_copy(pos:pos) == ' ')
920 pos = pos + 1
921 end do
922 if (pos > len_trim(working_copy)) exit
923
924 ! Found start of token
925 start = pos ! Initialize start to beginning of token
926 token_count = token_count + 1
927 in_quotes = .false.
928 in_arith = .false.
929 arith_depth = 0
930 quote_char = ' '
931 in_array_literal = .false.
932 array_depth = 0
933 in_cmd_subst = .false.
934 cmd_depth = 0
935 escaped = .false.
936
937 ! Skip to end of token (respecting quotes and arithmetic)
938 ! Continue past len_trim when inside quotes to preserve trailing spaces
939 do while (pos <= len_trim(working_copy) .or. (in_quotes .and. pos <= len(working_copy)))
940 ! Handle backslash escaping outside quotes
941 if (.not. in_quotes .and. .not. escaped .and. working_copy(pos:pos) == '\') then
942 escaped = .true.
943 pos = pos + 1
944 cycle
945 end if
946
947 ! Check for quotes (unless escaped)
948 if (.not. in_arith .and. .not. escaped) then
949 if (.not. in_quotes .and. (working_copy(pos:pos) == '"' .or. working_copy(pos:pos) == "'")) then
950 in_quotes = .true.
951 quote_char = working_copy(pos:pos)
952 else if (in_quotes .and. working_copy(pos:pos) == quote_char) then
953 in_quotes = .false.
954 end if
955 end if
956
957 ! Check for $(( )) arithmetic expansion and (( )) arithmetic command
958 if (.not. in_quotes .and. .not. escaped) then
959 ! First, check for special patterns that start arithmetic mode
960 if (.not. in_arith .and. .not. in_cmd_subst) then
961 if (pos <= len_trim(working_copy) - 2 .and. working_copy(pos:pos+2) == '$((') then
962 in_arith = .true.
963 arith_depth = 2
964 pos = pos + 2 ! Skip the $(
965 else if (pos <= len_trim(working_copy) - 1 .and. working_copy(pos:pos+1) == '$(') then
966 ! $( command substitution - but NOT $((
967 in_cmd_subst = .true.
968 cmd_depth = 1
969 pos = pos + 1 ! Skip the $(
970 else if (pos == start .and. pos <= len_trim(working_copy) - 1 .and. &
971 working_copy(pos:pos+1) == '((') then
972 ! (( at start of token - arithmetic command
973 in_arith = .true.
974 arith_depth = 2
975 pos = pos + 1 ! Skip the first (
976 else if (pos == start+3 .and. start <= len_trim(working_copy) - 4 .and. &
977 working_copy(start:start+2) == 'for' .and. &
978 working_copy(pos:pos+1) == '((') then
979 ! for(( - arithmetic for loop
980 in_arith = .true.
981 arith_depth = 0 ! Will be counted below
982 end if
983 end if
984
985 ! Then, track parentheses depth if in arithmetic mode
986 if (in_arith) then
987 if (working_copy(pos:pos) == '(') then
988 arith_depth = arith_depth + 1
989 else if (working_copy(pos:pos) == ')') then
990 arith_depth = arith_depth - 1
991 if (arith_depth == 0) then
992 in_arith = .false.
993 end if
994 end if
995 end if
996
997 ! Track parentheses depth if in command substitution mode
998 if (in_cmd_subst) then
999 if (working_copy(pos:pos) == '(') then
1000 cmd_depth = cmd_depth + 1
1001 else if (working_copy(pos:pos) == ')') then
1002 cmd_depth = cmd_depth - 1
1003 if (cmd_depth == 0) then
1004 in_cmd_subst = .false.
1005 end if
1006 end if
1007 end if
1008 end if
1009
1010 ! Check for array literal: var=(...)
1011 if (.not. in_quotes .and. .not. in_arith) then
1012 if (pos > 1 .and. pos <= len_trim(working_copy) - 1 .and. &
1013 working_copy(pos-1:pos) == '=(') then
1014 ! Start of array literal
1015 in_array_literal = .true.
1016 array_depth = 1
1017 else if (in_array_literal) then
1018 if (working_copy(pos:pos) == '(') then
1019 array_depth = array_depth + 1
1020 else if (working_copy(pos:pos) == ')') then
1021 array_depth = array_depth - 1
1022 if (array_depth == 0) in_array_literal = .false.
1023 end if
1024 end if
1025 end if
1026
1027 ! Check for token boundary (space outside quotes/arithmetic/array/command-subst, and not escaped)
1028 if (.not. in_quotes .and. .not. in_arith .and. .not. in_array_literal .and. &
1029 .not. in_cmd_subst .and. .not. escaped .and. working_copy(pos:pos) == ' ') exit
1030
1031 ! Clear escaped flag after processing the escaped character
1032 if (escaped) escaped = .false.
1033
1034 pos = pos + 1
1035 end do
1036 end do
1037
1038 num_tokens = token_count
1039 if (num_tokens == 0) return
1040
1041 ! Allocate temporary storage
1042 allocate(temp_tokens(num_tokens))
1043
1044 ! Extract tokens into temporary array (same logic as counting)
1045 pos = 1
1046 token_count = 0
1047 do while (pos <= len_trim(working_copy) .and. token_count < num_tokens)
1048 ! Skip whitespace
1049 do while (pos <= len_trim(working_copy) .and. working_copy(pos:pos) == ' ')
1050 pos = pos + 1
1051 end do
1052 if (pos > len_trim(working_copy)) exit
1053
1054 start = pos
1055 in_quotes = .false.
1056 in_arith = .false.
1057 arith_depth = 0
1058 quote_char = ' '
1059 in_array_literal = .false.
1060 array_depth = 0
1061 in_cmd_subst = .false.
1062 cmd_depth = 0
1063 escaped = .false.
1064
1065 ! Find end of token (respecting quotes, arithmetic, and array literals)
1066 ! Continue past len_trim when inside quotes to preserve trailing spaces
1067 do while (pos <= len_trim(working_copy) .or. (in_quotes .and. pos <= len(working_copy)))
1068 ! Handle backslash escaping outside quotes
1069 if (.not. in_quotes .and. .not. escaped .and. working_copy(pos:pos) == '\') then
1070 escaped = .true.
1071 pos = pos + 1
1072 cycle
1073 end if
1074
1075 ! Check for quotes (unless escaped)
1076 if (.not. in_arith .and. .not. escaped) then
1077 if (.not. in_quotes .and. (working_copy(pos:pos) == '"' .or. working_copy(pos:pos) == "'")) then
1078 in_quotes = .true.
1079 quote_char = working_copy(pos:pos)
1080 else if (in_quotes .and. working_copy(pos:pos) == quote_char) then
1081 in_quotes = .false.
1082 end if
1083 end if
1084
1085 ! Check for $(( )) arithmetic expansion and (( )) arithmetic command
1086 if (.not. in_quotes .and. .not. escaped) then
1087 ! First, check for special patterns that start arithmetic mode
1088 if (.not. in_arith .and. .not. in_cmd_subst) then
1089 if (pos <= len_trim(working_copy) - 2 .and. working_copy(pos:pos+2) == '$((') then
1090 in_arith = .true.
1091 arith_depth = 2
1092 pos = pos + 2 ! Skip the $(
1093 else if (pos <= len_trim(working_copy) - 1 .and. working_copy(pos:pos+1) == '$(') then
1094 ! $( command substitution - but NOT $((
1095 in_cmd_subst = .true.
1096 cmd_depth = 1
1097 pos = pos + 1 ! Skip the $(
1098 else if (pos == start .and. pos <= len_trim(working_copy) - 1 .and. &
1099 working_copy(pos:pos+1) == '((') then
1100 ! (( at start of token - arithmetic command
1101 in_arith = .true.
1102 arith_depth = 2
1103 pos = pos + 1 ! Skip the first (
1104 else if (pos == start+3 .and. start <= len_trim(working_copy) - 4 .and. &
1105 working_copy(start:start+2) == 'for' .and. &
1106 working_copy(pos:pos+1) == '((') then
1107 ! for(( - arithmetic for loop
1108 in_arith = .true.
1109 arith_depth = 0 ! Will be counted below
1110 end if
1111 end if
1112
1113 ! Then, track parentheses depth if in arithmetic mode
1114 if (in_arith) then
1115 if (working_copy(pos:pos) == '(') then
1116 arith_depth = arith_depth + 1
1117 else if (working_copy(pos:pos) == ')') then
1118 arith_depth = arith_depth - 1
1119 if (arith_depth == 0) then
1120 in_arith = .false.
1121 end if
1122 end if
1123 end if
1124
1125 ! Track parentheses depth if in command substitution mode
1126 if (in_cmd_subst) then
1127 if (working_copy(pos:pos) == '(') then
1128 cmd_depth = cmd_depth + 1
1129 else if (working_copy(pos:pos) == ')') then
1130 cmd_depth = cmd_depth - 1
1131 if (cmd_depth == 0) then
1132 in_cmd_subst = .false.
1133 end if
1134 end if
1135 end if
1136 end if
1137
1138 ! Check for array literal: var=(...)
1139 if (.not. in_quotes .and. .not. in_arith) then
1140 if (pos > 1 .and. pos <= len_trim(working_copy) - 1 .and. &
1141 working_copy(pos-1:pos) == '=(') then
1142 ! Start of array literal
1143 in_array_literal = .true.
1144 array_depth = 1
1145 else if (in_array_literal) then
1146 if (working_copy(pos:pos) == '(') then
1147 array_depth = array_depth + 1
1148 else if (working_copy(pos:pos) == ')') then
1149 array_depth = array_depth - 1
1150 if (array_depth == 0) in_array_literal = .false.
1151 end if
1152 end if
1153 end if
1154
1155 ! Check for token boundary (space outside quotes/arithmetic/array/command-subst, and not escaped)
1156 if (.not. in_quotes .and. .not. in_arith .and. .not. in_array_literal .and. &
1157 .not. in_cmd_subst .and. .not. escaped .and. working_copy(pos:pos) == ' ') exit
1158
1159 ! Clear escaped flag after processing the escaped character
1160 if (escaped) escaped = .false.
1161
1162 pos = pos + 1
1163 end do
1164
1165 ! Store token (DON'T strip quotes yet - expand_variables needs to see them)
1166 ! DON'T process backslash escapes yet - glob expansion needs to see them
1167 token_count = token_count + 1
1168 temp_tokens(token_count) = working_copy(start:pos-1)
1169 end do
1170
1171 ! Now allocate the final deferred-length character array
1172 ! We'll use MAX_TOKEN_LEN as a uniform length for now
1173 allocate(character(len=MAX_TOKEN_LEN) :: tokens(num_tokens))
1174 do i = 1, num_tokens
1175 tokens(i) = temp_tokens(i)
1176 end do
1177
1178 deallocate(temp_tokens)
1179 end subroutine
1180
1181 ! Helper function to strip outer quotes from a token
1182 function strip_outer_quotes(token) result(stripped)
1183 character(len=*), intent(in) :: token
1184 character(len=len(token)) :: stripped
1185 integer :: token_len
1186
1187 stripped = token
1188 token_len = len_trim(token)
1189
1190 ! Check if token has matching outer quotes
1191 if (token_len >= 2) then
1192 ! Check for double quotes
1193 if (token(1:1) == '"' .and. token(token_len:token_len) == '"') then
1194 stripped = token(2:token_len-1)
1195 return
1196 end if
1197 ! Check for single quotes
1198 if (token(1:1) == "'" .and. token(token_len:token_len) == "'") then
1199 stripped = token(2:token_len-1)
1200 return
1201 end if
1202 end if
1203 end function
1204
1205 ! Helper function to strip ALL quotes from a token (for adjacent quotes like "a"b"c")
1206 function strip_all_quotes(token) result(stripped)
1207 character(len=*), intent(in) :: token
1208 character(len=len(token)) :: stripped
1209 integer :: i, j, token_len
1210 logical :: in_single_quote, in_double_quote
1211
1212 stripped = ''
1213 j = 1
1214 token_len = len_trim(token)
1215 in_single_quote = .false.
1216 in_double_quote = .false.
1217
1218 do i = 1, token_len
1219 if (token(i:i) == "'" .and. .not. in_double_quote) then
1220 ! Toggle single quote mode
1221 in_single_quote = .not. in_single_quote
1222 ! Don't include the quote character itself
1223 else if (token(i:i) == '"' .and. .not. in_single_quote) then
1224 ! Toggle double quote mode
1225 in_double_quote = .not. in_double_quote
1226 ! Don't include the quote character itself
1227 else
1228 ! Regular character - include it
1229 stripped(j:j) = token(i:i)
1230 j = j + 1
1231 end if
1232 end do
1233 end function
1234
1235 ! Helper function to process backslash escape sequences outside quotes
1236 function process_escapes(token) result(processed)
1237 character(len=*), intent(in) :: token
1238 character(len=len(token)) :: processed
1239 integer :: i, j, token_len
1240 logical :: in_quotes
1241 character :: quote_char
1242
1243 processed = ''
1244 i = 1
1245 j = 1
1246 token_len = len_trim(token)
1247 in_quotes = .false.
1248 quote_char = ' '
1249
1250 do while (i <= token_len)
1251 ! Track quotes
1252 if (.not. in_quotes .and. (token(i:i) == '"' .or. token(i:i) == "'")) then
1253 in_quotes = .true.
1254 quote_char = token(i:i)
1255 processed(j:j) = token(i:i)
1256 j = j + 1
1257 i = i + 1
1258 else if (in_quotes .and. token(i:i) == quote_char) then
1259 in_quotes = .false.
1260 processed(j:j) = token(i:i)
1261 j = j + 1
1262 i = i + 1
1263 else if (.not. in_quotes .and. token(i:i) == '\' .and. i < token_len) then
1264 ! Backslash escape outside quotes - skip the backslash, keep the next char
1265 i = i + 1
1266 processed(j:j) = token(i:i)
1267 j = j + 1
1268 i = i + 1
1269 else
1270 ! Regular character
1271 processed(j:j) = token(i:i)
1272 j = j + 1
1273 i = i + 1
1274 end if
1275 end do
1276 end function
1277
1278 subroutine expand_variables(token, expanded, shell, was_quoted_in)
1279 use expansion, only: expand_braces, arithmetic_expansion_shell, process_param_expansion
1280 character(len=*), intent(in) :: token
1281 character(len=:), allocatable, intent(out) :: expanded
1282 type(shell_state_t), intent(inout) :: shell
1283 logical, intent(in), optional :: was_quoted_in
1284
1285 character(len=:), allocatable :: result, working_token
1286 integer :: i, j, var_start, brace_depth, end_pos
1287 integer :: result_cap
1288 character(len=256) :: var_name ! Variable names are short; was MAX_TOKEN_LEN (4096)
1289 character(len=:), allocatable :: var_value, brace_expanded
1290 character(len=20) :: pid_str
1291 logical :: is_quoted, is_single_quoted
1292 logical :: escapes_already_processed ! True if lexer already processed escapes
1293
1294 ! Initialize growing result buffer
1295 result_cap = max(len(token) * 4, 16384)
1296 allocate(character(len=result_cap) :: result)
1297
1298 ! Check if token was originally quoted (from lexer metadata or token inspection)
1299 is_quoted = .false.
1300 is_single_quoted = .false.
1301 escapes_already_processed = .false.
1302
1303 ! Use the passed parameter if provided, otherwise fall back to checking the token
1304 if (present(was_quoted_in)) then
1305 is_quoted = was_quoted_in
1306 ! We don't track single vs double quotes in metadata, assume double if quoted
1307 is_single_quoted = .false.
1308 ! If was_quoted_in=true but token doesn't have outer quotes, the new lexer
1309 ! already stripped quotes and processed escapes - don't re-process them
1310 if (is_quoted .and. len_trim(token) >= 2) then
1311 if (.not. (token(1:1) == '"' .and. token(len_trim(token):len_trim(token)) == '"')) then
1312 escapes_already_processed = .true.
1313 end if
1314 else if (is_quoted .and. len_trim(token) < 2) then
1315 ! Short quoted token without surrounding quotes - escapes were processed
1316 escapes_already_processed = .true.
1317 end if
1318 else
1319 ! Legacy: check if token still has quotes (for backward compatibility)
1320 if (len_trim(token) >= 2) then
1321 if (token(1:1) == '"' .and. token(len_trim(token):len_trim(token)) == '"') then
1322 is_quoted = .true.
1323 else if (token(1:1) == "'" .and. token(len_trim(token):len_trim(token)) == "'") then
1324 is_quoted = .true.
1325 is_single_quoted = .true.
1326 end if
1327 end if
1328 end if
1329 ! Single quotes preserve everything literally - no expansion at all
1330 if (is_single_quoted) then
1331 ! Return the token with outer quotes stripped
1332 expanded = strip_outer_quotes(token)
1333 return
1334 end if
1335
1336 ! For double-quoted tokens that are all whitespace (no special chars),
1337 ! return the whitespace to preserve the argument
1338 ! This handles cases like " " or " " where len_trim would be 0
1339 ! Now that executor passes correct token length, we preserve exact whitespace count
1340 if (is_quoted .and. len_trim(token) == 0 .and. len(token) > 0) then
1341 ! Check if this is truly a whitespace token vs a token with quotes at boundaries
1342 block
1343 ! If token has quotes at boundaries, don't count them as content - let normal processing handle it
1344 if (len(token) >= 2) then
1345 if ((token(1:1) == '"' .and. token(len(token):len(token)) == '"') .or. &
1346 (token(1:1) == "'" .and. token(len(token):len(token)) == "'")) then
1347 ! Token is just quotes with possibly empty content - let normal processing handle it
1348 else
1349 ! Token is all whitespace (no quotes) - return the whitespace exactly as-is
1350 expanded = token
1351 return
1352 end if
1353 else
1354 ! Short token that's all whitespace - return it as-is
1355 expanded = token
1356 return
1357 end if
1358 end block
1359 end if
1360
1361 ! Apply brace expansion ONLY if token is not quoted
1362 if (.not. is_quoted) then
1363 brace_expanded = expand_braces(token)
1364 working_token = brace_expanded
1365 else
1366 working_token = token
1367 end if
1368
1369 i = 1
1370 j = 1
1371 ! For quoted tokens, use len(token) to preserve trailing whitespace
1372 ! For unquoted tokens, use len_trim() to skip padding
1373 if (is_quoted) then
1374 end_pos = len(token) ! Use actual passed token length, not buffer size
1375 ! If token has actual quote characters (not sentinels), skip them
1376 ! This handles tokens from the old parser path which don't use sentinels
1377 if (end_pos >= 2) then
1378 if (working_token(1:1) == '"' .and. working_token(end_pos:end_pos) == '"') then
1379 i = 2 ! Skip opening quote
1380 end_pos = end_pos - 1 ! Skip closing quote
1381 end if
1382 end if
1383 else
1384 end_pos = len_trim(working_token)
1385 end if
1386
1387 ! Track if we're inside single-quoted literal region (between char(2) markers)
1388 block
1389 logical :: in_single_quote_literal
1390 in_single_quote_literal = .false.
1391
1392 do while (i <= end_pos)
1393 ! Grow result buffer if needed (headroom for single-char writes)
1394 call ensure_result_cap(j + 256)
1395 ! Check for single-quote literal START sentinel (char(2))
1396 if (working_token(i:i) == char(2)) then
1397 in_single_quote_literal = .true.
1398 i = i + 1
1399 cycle
1400 end if
1401
1402 ! Check for single-quote literal END sentinel (char(3))
1403 if (working_token(i:i) == char(3)) then
1404 in_single_quote_literal = .false.
1405 i = i + 1
1406 cycle
1407 end if
1408
1409 ! In single-quoted literal region, copy everything literally (no expansion)
1410 if (in_single_quote_literal) then
1411 result(j:j) = working_token(i:i)
1412 i = i + 1
1413 j = j + 1
1414 cycle
1415 end if
1416
1417 ! Check for double-quote boundary sentinel (char(1)) - skip it
1418 if (working_token(i:i) == char(1)) then
1419 i = i + 1
1420 cycle
1421 end if
1422
1423 ! Check for backslash escape
1424 ! Handle \$ and \` even outside quotes since lexer keeps both chars for these
1425 if (working_token(i:i) == '\' .and. i < end_pos) then
1426 if (working_token(i+1:i+1) == '$') then
1427 ! \$ -> literal $ (lexer keeps both chars, we process here)
1428 i = i + 1 ! Skip backslash
1429 result(j:j) = '$'
1430 i = i + 1
1431 j = j + 1
1432 cycle
1433 else if (working_token(i+1:i+1) == '`') then
1434 ! \` -> literal ` (lexer keeps both chars, we process here)
1435 i = i + 1 ! Skip backslash
1436 result(j:j) = '`'
1437 i = i + 1
1438 j = j + 1
1439 cycle
1440 else if (is_quoted .and. .not. is_single_quoted .and. .not. escapes_already_processed) then
1441 ! In double quotes, backslash also escapes: " \ and newline
1442 ! BUT skip this if lexer already processed escapes (new lexer path)
1443 ! Note: \$ and \` are handled above because lexer keeps both chars for them
1444 if (working_token(i+1:i+1) == '"' .or. working_token(i+1:i+1) == '\') then
1445 ! Skip the backslash and add the escaped character
1446 i = i + 1
1447 result(j:j) = working_token(i:i)
1448 i = i + 1
1449 j = j + 1
1450 cycle
1451 end if
1452 end if
1453 ! Otherwise, keep the backslash (it's not escaping anything special)
1454 end if
1455
1456 ! POSIX: Tilde expansion is NOT performed inside double quotes
1457 block
1458 logical :: tilde_at_word_start
1459 if (i == 1) then
1460 tilde_at_word_start = .true.
1461 else
1462 tilde_at_word_start = (working_token(i-1:i-1) == ' ')
1463 end if
1464 if (working_token(i:i) == '~' .and. tilde_at_word_start &
1465 .and. .not. is_quoted) then
1466 ! Tilde expansion
1467 call process_tilde_expansion(working_token, i, result, j, shell)
1468 else if (working_token(i:i) == '$' .and. i < len_trim(working_token)) then
1469 i = i + 1
1470
1471 ! Check for special variables
1472 if (working_token(i:i) == '?') then
1473 write(pid_str, '(i15)') shell%last_exit_status
1474 pid_str = adjustl(pid_str) ! Left-justify to remove leading spaces
1475 result(j:j+len_trim(pid_str)-1) = trim(pid_str)
1476 j = j + len_trim(pid_str)
1477 i = i + 1
1478 else if (working_token(i:i) == '$') then
1479 ! Use shell%shell_pid (set at startup) so $$ returns same value in subshells
1480 write(pid_str, '(i0)') shell%shell_pid
1481 pid_str = adjustl(pid_str) ! Left-justify to remove leading spaces
1482 result(j:j+len_trim(pid_str)-1) = trim(pid_str)
1483 j = j + len_trim(pid_str)
1484 i = i + 1
1485 else if (working_token(i:i) == '\' .and. i < len_trim(working_token) .and. working_token(i+1:i+1) == '!') then
1486 ! Handle bash-escaped $\! (bash adds backslash before ! in some contexts)
1487 i = i + 1 ! Skip the backslash
1488 write(pid_str, '(i15)') shell%last_bg_pid
1489 pid_str = adjustl(pid_str) ! Left-justify to remove leading spaces
1490 result(j:j+len_trim(pid_str)-1) = trim(pid_str)
1491 j = j + len_trim(pid_str)
1492 i = i + 1
1493 else if (working_token(i:i) == '!') then
1494 write(pid_str, '(i15)') shell%last_bg_pid
1495 pid_str = adjustl(pid_str) ! Left-justify to remove leading spaces
1496 result(j:j+len_trim(pid_str)-1) = trim(pid_str)
1497 j = j + len_trim(pid_str)
1498 i = i + 1
1499 else if (working_token(i:i) == '@') then
1500 ! $@ - all positional parameters
1501 var_value = get_shell_variable(shell, '@')
1502 if (len_trim(var_value) > 0) then
1503 call ensure_result_cap(j + len_trim(var_value))
1504 result(j:j+len_trim(var_value)-1) = trim(var_value)
1505 j = j + len_trim(var_value)
1506 end if
1507 i = i + 1
1508 else if (working_token(i:i) == '#') then
1509 ! $# - number of positional parameters
1510 var_value = get_shell_variable(shell, '#')
1511 if (len_trim(var_value) > 0) then
1512 call ensure_result_cap(j + len_trim(var_value))
1513 result(j:j+len_trim(var_value)-1) = trim(var_value)
1514 j = j + len_trim(var_value)
1515 end if
1516 i = i + 1
1517 else if (working_token(i:i) == '*') then
1518 ! $* - all positional parameters as single word
1519 var_value = get_shell_variable(shell, '*')
1520 if (len_trim(var_value) > 0) then
1521 call ensure_result_cap(j + len_trim(var_value))
1522 result(j:j+len_trim(var_value)-1) = trim(var_value)
1523 j = j + len_trim(var_value)
1524 end if
1525 i = i + 1
1526 else if (working_token(i:i) == '-') then
1527 ! $- - current shell option flags
1528 var_value = get_shell_variable(shell, '-')
1529 if (len_trim(var_value) > 0) then
1530 call ensure_result_cap(j + len_trim(var_value))
1531 result(j:j+len_trim(var_value)-1) = trim(var_value)
1532 j = j + len_trim(var_value)
1533 end if
1534 i = i + 1
1535 else if (working_token(i:i) == '_') then
1536 ! Check if this is $_ alone or $_varname
1537 if (i+1 <= len_trim(working_token) .and. &
1538 (is_alnum(working_token(i+1:i+1)) .or. working_token(i+1:i+1) == '_')) then
1539 ! $_varname - underscore-prefixed variable name
1540 var_start = i
1541 do while (i <= len_trim(working_token) .and. &
1542 (is_alnum(working_token(i:i)) .or. working_token(i:i) == '_'))
1543 i = i + 1
1544 end do
1545 var_name = working_token(var_start:i-1)
1546 var_value = get_shell_variable(shell, trim(var_name))
1547 if (len_trim(var_value) > 0) then
1548 call ensure_result_cap(j + len_trim(var_value))
1549 result(j:j+len_trim(var_value)-1) = trim(var_value)
1550 j = j + len_trim(var_value)
1551 end if
1552 else
1553 ! $_ - last argument of previous command
1554 var_value = get_shell_variable(shell, '_')
1555 if (len_trim(var_value) > 0) then
1556 call ensure_result_cap(j + len_trim(var_value))
1557 result(j:j+len_trim(var_value)-1) = trim(var_value)
1558 j = j + len_trim(var_value)
1559 end if
1560 i = i + 1
1561 end if
1562 else if (working_token(i:i) >= '0' .and. working_token(i:i) <= '9') then
1563 ! $0, $1, $2, ... - positional parameters
1564 var_name = working_token(i:i)
1565 var_value = get_shell_variable(shell, trim(var_name))
1566 if (len_trim(var_value) > 0) then
1567 call ensure_result_cap(j + len_trim(var_value))
1568 result(j:j+len_trim(var_value)-1) = trim(var_value)
1569 j = j + len_trim(var_value)
1570 end if
1571 i = i + 1
1572 else if (working_token(i:i) == '(') then
1573 ! Check if it's $(( arithmetic expansion or $( command substitution
1574 if (i+1 <= len_trim(working_token) .and. working_token(i+1:i+1) == '(') then
1575 ! $((arithmetic)) expansion
1576 var_start = i - 1 ! Include the $ character
1577 i = i + 2 ! Skip both opening parens
1578 brace_depth = 2
1579
1580 do while (i <= len_trim(working_token) .and. brace_depth > 0)
1581 if (working_token(i:i) == '(') then
1582 brace_depth = brace_depth + 1
1583 else if (working_token(i:i) == ')') then
1584 brace_depth = brace_depth - 1
1585 end if
1586 i = i + 1
1587 end do
1588
1589 ! Extract full $((expr)) including delimiters
1590 var_name = working_token(var_start:i-1)
1591
1592 ! Evaluate arithmetic expansion with shell context
1593 var_value = arithmetic_expansion_shell(trim(var_name), shell)
1594 if (len_trim(var_value) > 0) then
1595 call ensure_result_cap(j + len_trim(var_value))
1596 result(j:j+len_trim(var_value)-1) = trim(var_value)
1597 j = j + len_trim(var_value)
1598 end if
1599 else
1600 ! $(command) command substitution
1601 i = i + 1
1602 var_start = i
1603 brace_depth = 1
1604
1605 do while (i <= len_trim(working_token) .and. brace_depth > 0)
1606 if (working_token(i:i) == '(') then
1607 brace_depth = brace_depth + 1
1608 else if (working_token(i:i) == ')') then
1609 brace_depth = brace_depth - 1
1610 end if
1611 i = i + 1
1612 end do
1613
1614 var_name = working_token(var_start:i-2) ! This is actually the command
1615
1616 ! Execute command substitution
1617 call execute_command_substitution(trim(var_name), var_value, shell)
1618 if (allocated(var_value) .and. len(var_value) > 0) then
1619 call ensure_result_cap(j + len(var_value))
1620 result(j:j+len(var_value)-1) = var_value
1621 j = j + len(var_value)
1622 end if
1623 end if
1624 else if (working_token(i:i) == '{') then
1625 ! ${VAR} or ${VAR:operation} parameter expansion
1626 i = i + 1
1627 var_start = i
1628 brace_depth = 1
1629
1630 do while (i <= len_trim(working_token) .and. brace_depth > 0)
1631 ! Check for nested ${ pattern (not standalone {)
1632 if (i > 1 .and. i < len_trim(working_token)) then
1633 if (working_token(i-1:i) == '${') then
1634 brace_depth = brace_depth + 1
1635 i = i + 1 ! Skip the { part of ${
1636 end if
1637 end if
1638 if (working_token(i:i) == '}') then
1639 brace_depth = brace_depth - 1
1640 end if
1641 i = i + 1
1642 end do
1643
1644 var_name = working_token(var_start:i-2)
1645
1646 ! Process parameter expansion (delegated to expansion module — issue #12)
1647 call process_param_expansion(var_name, var_value, shell)
1648 if (allocated(var_value) .and. len(var_value) > 0) then
1649 call ensure_result_cap(j + len_trim(var_value))
1650 result(j:j+len_trim(var_value)-1) = trim(var_value)
1651 j = j + len_trim(var_value)
1652 end if
1653 else
1654 ! Simple $VAR syntax
1655 var_start = i
1656 do while (i <= len_trim(working_token))
1657 if (.not. (is_alnum(working_token(i:i)) .or. working_token(i:i) == '_')) exit
1658 i = i + 1
1659 end do
1660
1661 var_name = working_token(var_start:i-1)
1662
1663 ! If no valid variable name was found, treat $ as literal
1664 if (len_trim(var_name) == 0) then
1665 result(j:j) = '$'
1666 j = j + 1
1667 else
1668 ! Check shell variables first
1669 ! Check if variable is set before expanding
1670 if (is_shell_variable_set(shell, trim(var_name))) then
1671 var_value = get_shell_variable(shell, trim(var_name))
1672 ! Use get_shell_variable_length to preserve ALL characters including whitespace
1673 ! This is crucial for variables like IFS=' ' where the space must be preserved
1674 brace_depth = get_shell_variable_length(shell, trim(var_name))
1675 if (brace_depth > 0) then
1676 call ensure_result_cap(j + brace_depth)
1677 result(j:j+brace_depth-1) = var_value(1:brace_depth)
1678 j = j + brace_depth
1679 end if
1680 else
1681 ! Fall back to environment variables
1682 var_value = get_environment_var(trim(var_name))
1683 if (allocated(var_value) .and. len(var_value) > 0) then
1684 call ensure_result_cap(j + len(var_value))
1685 result(j:j+len(var_value)-1) = var_value
1686 j = j + len(var_value)
1687 else
1688 ! Variable is not set - check if set -u is enabled
1689 if (check_nounset(shell, trim(var_name))) then
1690 shell%last_exit_status = 127 ! bash uses 127 for direct expansion errors
1691 shell%fatal_expansion_error = .true.
1692 shell%running = .false. ! Stop shell execution
1693 expanded = ''
1694 return
1695 end if
1696 end if
1697 end if
1698 end if
1699 end if
1700 else if (working_token(i:i) == '`') then
1701 ! Backtick command substitution
1702 i = i + 1
1703 var_start = i
1704
1705 ! Find closing backtick
1706 do while (i <= len_trim(working_token) .and. working_token(i:i) /= '`')
1707 i = i + 1
1708 end do
1709
1710 if (i <= len_trim(working_token) .and. working_token(i:i) == '`') then
1711 var_name = working_token(var_start:i-1) ! This is the command
1712 i = i + 1 ! Skip closing backtick
1713
1714 ! Execute command substitution
1715 call execute_command_substitution(trim(var_name), var_value, shell)
1716 if (allocated(var_value) .and. len(var_value) > 0) then
1717 call ensure_result_cap(j + len_trim(var_value))
1718 result(j:j+len_trim(var_value)-1) = trim(var_value)
1719 j = j + len_trim(var_value)
1720 end if
1721 else
1722 ! Unmatched backtick, treat as literal
1723 result(j:j) = '`'
1724 j = j + 1
1725 end if
1726 else if (working_token(i:i) == char(1)) then
1727 ! Skip sentinel character (marks quote boundary from lexer)
1728 i = i + 1
1729 else
1730 result(j:j) = working_token(i:i)
1731 i = i + 1
1732 j = j + 1
1733 end if
1734 end block ! tilde_at_word_start bounds-safe check
1735 end do
1736 end block ! End of in_single_quote_literal block
1737
1738 ! POSIX: Quote removal does NOT apply to the results of parameter expansion
1739 ! Only apply quote removal to quotes that were literally in the command, not in variable values
1740 ! So we do NOT call strip_outer_quotes here - that would incorrectly remove quotes from values like $VAR where VAR='"test"'
1741 ! Don't use trim() - preserve trailing whitespace from variable values
1742 if (j > 1) then
1743 expanded = result(1:j-1)
1744 else
1745 expanded = ''
1746 end if
1747
1748 contains
1749
1750 subroutine ensure_result_cap(needed)
1751 integer, intent(in) :: needed
1752 character(len=:), allocatable :: tmp
1753 integer :: new_cap
1754 if (needed <= result_cap) return
1755 new_cap = max(result_cap * 2, needed + 4096)
1756 allocate(character(len=new_cap) :: tmp)
1757 if (j > 1) tmp(1:j-1) = result(1:j-1)
1758 call move_alloc(tmp, result)
1759 result_cap = new_cap
1760 end subroutine
1761
1762 function is_alnum(ch) result(res)
1763 character, intent(in) :: ch
1764 logical :: res
1765 res = (ch >= 'a' .and. ch <= 'z') .or. &
1766 (ch >= 'A' .and. ch <= 'Z') .or. &
1767 (ch >= '0' .and. ch <= '9')
1768 end function
1769
1770 end subroutine
1771
1772 subroutine read_heredoc(delimiter, content, shell, strip_tabs)
1773 use shell_types, only: shell_state_t
1774 use variables, only: get_shell_variable
1775 character(len=*), intent(in) :: delimiter
1776 character(len=:), allocatable, intent(out) :: content
1777 type(shell_state_t), intent(inout) :: shell
1778 logical, intent(in), optional :: strip_tabs
1779
1780 character(len=MAX_TOKEN_LEN) :: line
1781 character(len=MAX_HEREDOC_LEN) :: buffer
1782 integer :: iostat, pos, tab_pos
1783 logical :: should_expand, do_strip_tabs
1784
1785 ! Determine if we should strip tabs
1786 do_strip_tabs = .false.
1787 if (present(strip_tabs)) do_strip_tabs = strip_tabs
1788
1789 ! Check if we have pending heredocs from -c flag (new array-based approach)
1790 if (shell%num_pending_heredocs > 0 .and. &
1791 shell%next_pending_heredoc <= shell%num_pending_heredocs) then
1792 ! Get the next pending heredoc
1793 buffer = trim(shell%pending_heredocs(shell%next_pending_heredoc)%content)
1794
1795 ! Check if we should expand variables
1796 should_expand = .not. shell%pending_heredocs(shell%next_pending_heredoc)%quoted
1797
1798 ! Expand variables if needed
1799 if (should_expand) then
1800 buffer = expand_heredoc_variables(buffer, shell)
1801 end if
1802
1803 allocate(character(len=len_trim(buffer)) :: content)
1804 content = trim(buffer)
1805
1806 ! Advance to next pending heredoc
1807 shell%next_pending_heredoc = shell%next_pending_heredoc + 1
1808
1809 ! Clear pending heredocs when all consumed
1810 if (shell%next_pending_heredoc > shell%num_pending_heredocs) then
1811 shell%num_pending_heredocs = 0
1812 shell%next_pending_heredoc = 1
1813 ! Also clear legacy single heredoc
1814 shell%has_pending_heredoc = .false.
1815 shell%pending_heredoc = ''
1816 shell%pending_heredoc_delimiter = ''
1817 shell%pending_heredoc_quoted = .false.
1818 shell%pending_heredoc_strip_tabs = .false.
1819 end if
1820 return
1821 end if
1822
1823 ! Legacy: Check single pending heredoc (backward compatibility)
1824 if (shell%has_pending_heredoc .and. &
1825 trim(shell%pending_heredoc_delimiter) == trim(delimiter)) then
1826 ! Use the pre-stored content (tabs already stripped by preprocess_heredocs_for_c if needed)
1827 buffer = trim(shell%pending_heredoc)
1828
1829 ! Check if we should expand variables
1830 should_expand = .not. shell%pending_heredoc_quoted
1831
1832 ! Expand variables if needed
1833 if (should_expand) then
1834 buffer = expand_heredoc_variables(buffer, shell)
1835 end if
1836
1837 allocate(character(len=len_trim(buffer)) :: content)
1838 content = trim(buffer)
1839
1840 ! Clear the pending heredoc
1841 shell%has_pending_heredoc = .false.
1842 shell%pending_heredoc = ''
1843 shell%pending_heredoc_delimiter = ''
1844 shell%pending_heredoc_quoted = .false.
1845 shell%pending_heredoc_strip_tabs = .false.
1846 return
1847 end if
1848
1849 ! Fall back to reading from stdin
1850 buffer = ''
1851 pos = 1
1852
1853 write(*, '(a)', advance='no') '> '
1854
1855 do
1856 read(*, '(a)', iostat=iostat) line
1857 if (iostat /= 0) exit
1858
1859 ! Strip leading tabs BEFORE delimiter comparison (<<- strips tabs)
1860 if (do_strip_tabs) then
1861 tab_pos = 1
1862 do while (tab_pos <= len_trim(line) .and. line(tab_pos:tab_pos) == char(9))
1863 tab_pos = tab_pos + 1
1864 end do
1865 line = line(tab_pos:)
1866 end if
1867
1868 if (trim(line) == trim(delimiter)) exit
1869
1870 if (pos > 1) then
1871 buffer(pos:pos) = char(10) ! newline
1872 pos = pos + 1
1873 end if
1874
1875 buffer(pos:pos+len_trim(line)-1) = trim(line)
1876 pos = pos + len_trim(line)
1877
1878 write(*, '(a)', advance='no') '> '
1879 end do
1880
1881 allocate(character(len=pos-1) :: content)
1882 content = buffer(:pos-1)
1883 end subroutine
1884
1885 ! Expand variables in heredoc content
1886 function expand_heredoc_variables(input, shell) result(output)
1887 use shell_types, only: shell_state_t
1888 use variables, only: get_shell_variable
1889 character(len=*), intent(in) :: input
1890 type(shell_state_t), intent(in) :: shell
1891 character(len=MAX_HEREDOC_LEN) :: output
1892
1893 integer :: i, j, var_start, var_end
1894 character(len=256) :: var_name
1895 character(len=:), allocatable :: var_value
1896
1897 output = ''
1898 i = 1
1899 j = 1
1900
1901 do while (i <= len_trim(input))
1902 if (input(i:i) == '$' .and. i < len_trim(input)) then
1903 ! Found potential variable
1904 var_start = i + 1
1905
1906 ! Check for ${var} format
1907 if (input(var_start:var_start) == '{') then
1908 var_start = var_start + 1
1909 var_end = var_start
1910 do while (var_end <= len_trim(input) .and. input(var_end:var_end) /= '}')
1911 var_end = var_end + 1
1912 end do
1913 if (var_end <= len_trim(input)) then
1914 var_name = input(var_start:var_end-1)
1915 var_value = get_shell_variable(shell, trim(var_name))
1916 output(j:j+len_trim(var_value)-1) = trim(var_value)
1917 j = j + len_trim(var_value)
1918 i = var_end + 1
1919 else
1920 output(j:j) = '$'
1921 j = j + 1
1922 i = i + 1
1923 end if
1924 else
1925 ! Check for $var format
1926 var_end = var_start
1927 do while (var_end <= len_trim(input) .and. &
1928 ((input(var_end:var_end) >= 'A' .and. input(var_end:var_end) <= 'Z') .or. &
1929 (input(var_end:var_end) >= 'a' .and. input(var_end:var_end) <= 'z') .or. &
1930 (input(var_end:var_end) >= '0' .and. input(var_end:var_end) <= '9') .or. &
1931 input(var_end:var_end) == '_'))
1932 var_end = var_end + 1
1933 end do
1934 if (var_end > var_start) then
1935 var_name = input(var_start:var_end-1)
1936 var_value = get_shell_variable(shell, trim(var_name))
1937 output(j:j+len_trim(var_value)-1) = trim(var_value)
1938 j = j + len_trim(var_value)
1939 i = var_end
1940 else
1941 output(j:j) = '$'
1942 j = j + 1
1943 i = i + 1
1944 end if
1945 end if
1946 else
1947 output(j:j) = input(i:i)
1948 j = j + 1
1949 i = i + 1
1950 end if
1951 end do
1952 end function
1953
1954 ! Strip leading tabs from each line in heredoc content
1955 function strip_leading_tabs(input) result(output)
1956 character(len=*), intent(in) :: input
1957 character(len=MAX_HEREDOC_LEN) :: output
1958 integer :: i, j
1959 logical :: at_line_start
1960
1961 output = ''
1962 i = 1
1963 j = 1
1964 at_line_start = .true.
1965
1966 do while (i <= len_trim(input))
1967 if (at_line_start .and. input(i:i) == char(9)) then
1968 ! Skip leading tab
1969 i = i + 1
1970 else
1971 ! Copy character
1972 at_line_start = .false.
1973 output(j:j) = input(i:i)
1974 if (input(i:i) == char(10)) then
1975 at_line_start = .true.
1976 end if
1977 j = j + 1
1978 i = i + 1
1979 end if
1980 end do
1981 end function
1982
1983 ! Extract heredoc content from input string (for -c mode)
1984 subroutine extract_heredoc_from_input(input, delimiter, content)
1985 character(len=*), intent(in) :: input, delimiter
1986 character(len=:), allocatable, intent(out) :: content
1987
1988 integer :: i, line_start, line_end, content_start
1989 integer :: newline_pos
1990 character(len=len(input)) :: current_line
1991 character(len=MAX_HEREDOC_LEN) :: buffer
1992 integer :: buffer_pos
1993 logical :: found_end
1994
1995 ! Check if input contains newlines (heredoc marker)
1996 newline_pos = index(input, char(10))
1997 if (newline_pos == 0) then
1998 ! No newlines, can't extract heredoc content
1999 return
2000 end if
2001
2002 ! Find where heredoc content starts (after first newline following <<DELIM)
2003 content_start = 0
2004 do i = 1, len(input)
2005 if (input(i:i) == '<' .and. i < len(input) - 1) then
2006 if (input(i+1:i+1) == '<') then
2007 ! Found <<, look for newline after delimiter
2008 do newline_pos = i+2, len(input)
2009 if (input(newline_pos:newline_pos) == char(10)) then
2010 content_start = newline_pos + 1
2011 exit
2012 end if
2013 end do
2014 if (content_start > 0) exit
2015 end if
2016 end if
2017 end do
2018
2019 if (content_start == 0 .or. content_start > len(input)) then
2020 ! No content after heredoc marker
2021 return
2022 end if
2023
2024 ! Extract lines until we find the delimiter
2025 buffer = ''
2026 buffer_pos = 1
2027 line_start = content_start
2028 found_end = .false.
2029
2030 do while (line_start <= len(input) .and. .not. found_end)
2031 ! Find end of current line (newline or end of string)
2032 line_end = line_start
2033 do while (line_end <= len(input) .and. input(line_end:line_end) /= char(10))
2034 line_end = line_end + 1
2035 end do
2036
2037 ! Extract current line (handle case where line_end went past end of input)
2038 if (line_end > len(input)) then
2039 ! No newline found, extract to end of input
2040 if (line_start <= len(input)) then
2041 current_line = input(line_start:len(input))
2042 else
2043 current_line = ''
2044 end if
2045 else if (line_end > line_start) then
2046 ! Newline found, extract up to (but not including) the newline
2047 current_line = input(line_start:line_end-1)
2048 else
2049 current_line = ''
2050 end if
2051
2052 ! Check if this line matches the delimiter
2053 if (trim(current_line) == trim(delimiter)) then
2054 found_end = .true.
2055 exit
2056 end if
2057
2058 ! Add line to buffer
2059 if (buffer_pos > 1) then
2060 ! Add newline before this line
2061 buffer(buffer_pos:buffer_pos) = char(10)
2062 buffer_pos = buffer_pos + 1
2063 end if
2064
2065 if (len_trim(current_line) > 0) then
2066 buffer(buffer_pos:buffer_pos+len_trim(current_line)-1) = trim(current_line)
2067 buffer_pos = buffer_pos + len_trim(current_line)
2068 end if
2069
2070 ! Move to next line
2071 line_start = line_end + 1
2072 end do
2073
2074 ! Allocate and return content (with trailing newline to match POSIX)
2075 if (buffer_pos > 1) then
2076 ! Add trailing newline
2077 buffer(buffer_pos:buffer_pos) = char(10)
2078 buffer_pos = buffer_pos + 1
2079 allocate(character(len=buffer_pos-1) :: content)
2080 content = buffer(:buffer_pos-1)
2081 end if
2082 end subroutine
2083
2084 ! Convert backtick command substitution to $() format
2085 function convert_backticks_to_dollar_paren(input) result(output)
2086 character(len=*), intent(in) :: input
2087 character(len=:), allocatable :: output
2088 character(len=len(input)*2) :: temp_result
2089 integer :: i, j, backtick_start
2090 logical :: in_backticks, in_single_quote, in_double_quote
2091 character(len=1) :: backslash
2092
2093 backslash = char(92)
2094 temp_result = ''
2095 i = 1
2096 j = 1
2097 in_backticks = .false.
2098 in_single_quote = .false.
2099 in_double_quote = .false.
2100 backtick_start = 0
2101
2102 do while (i <= len_trim(input))
2103 ! Track quote state (but not inside backticks)
2104 if (.not. in_backticks) then
2105 ! Fortran .or. does NOT short-circuit, so check i > 1 separately
2106 ! to avoid input(0:0) out-of-bounds access
2107 block
2108 logical :: not_escaped
2109 if (i == 1) then
2110 not_escaped = .true.
2111 else
2112 not_escaped = (input(i-1:i-1) /= backslash)
2113 end if
2114 if (input(i:i) == "'" .and. not_escaped) then
2115 in_single_quote = .not. in_single_quote
2116 else if (input(i:i) == '"' .and. not_escaped) then
2117 in_double_quote = .not. in_double_quote
2118 end if
2119 end block
2120 end if
2121
2122 ! Inside backticks: handle escaped backticks as nested substitution delimiters
2123 ! POSIX: \` inside backticks means start/end of nested command substitution
2124 if (in_backticks .and. input(i:i) == backslash .and. i < len_trim(input)) then
2125 if (input(i+1:i+1) == '`') then
2126 ! Escaped backtick inside backticks = nested command substitution
2127 ! Convert to $() for the nested level
2128 temp_result(j:j+1) = '$('
2129 j = j + 2
2130 i = i + 2
2131 ! Find the matching closing \` and convert it too
2132 block
2133 integer :: k, nest_level
2134 nest_level = 1
2135 k = i
2136 do while (k <= len_trim(input) .and. nest_level > 0)
2137 if (input(k:k) == backslash .and. k < len_trim(input) .and. input(k+1:k+1) == '`') then
2138 nest_level = nest_level - 1
2139 if (nest_level == 0) then
2140 ! Copy everything up to here, then add closing )
2141 do while (i < k)
2142 temp_result(j:j) = input(i:i)
2143 j = j + 1
2144 i = i + 1
2145 end do
2146 temp_result(j:j) = ')'
2147 j = j + 1
2148 i = k + 2 ! Skip the \`
2149 exit
2150 else
2151 k = k + 2
2152 end if
2153 else
2154 k = k + 1
2155 end if
2156 end do
2157 end block
2158 cycle
2159 else if (input(i+1:i+1) == '$' .or. input(i+1:i+1) == backslash .or. &
2160 input(i+1:i+1) == char(10)) then
2161 ! Other escapes: consume backslash, copy the character
2162 temp_result(j:j) = input(i+1:i+1)
2163 j = j + 1
2164 i = i + 2
2165 cycle
2166 end if
2167 end if
2168
2169 ! Process backticks (not inside single quotes)
2170 ! Fortran .or. does NOT short-circuit, so check i > 1 separately
2171 block
2172 logical :: backtick_not_escaped
2173 if (i == 1) then
2174 backtick_not_escaped = .true.
2175 else
2176 backtick_not_escaped = (input(i-1:i-1) /= backslash)
2177 end if
2178 if (input(i:i) == '`' .and. .not. in_single_quote .and. backtick_not_escaped) then
2179 if (.not. in_backticks) then
2180 ! Start of backtick command substitution
2181 in_backticks = .true.
2182 backtick_start = i
2183 temp_result(j:j+1) = '$('
2184 j = j + 2
2185 else
2186 ! End of backtick command substitution
2187 in_backticks = .false.
2188 temp_result(j:j) = ')'
2189 j = j + 1
2190 end if
2191 i = i + 1
2192 else
2193 ! Regular character
2194 temp_result(j:j) = input(i:i)
2195 i = i + 1
2196 j = j + 1
2197 end if
2198 end block ! backtick_not_escaped
2199 end do
2200
2201 allocate(character(len=j-1) :: output)
2202 output = temp_result(1:j-1)
2203 end function
2204
2205 subroutine execute_command_substitution(command, output, shell)
2206 use command_capture, only: execute_command_and_capture
2207 character(len=*), intent(in) :: command
2208 character(len=:), allocatable, intent(out) :: output
2209 type(shell_state_t), intent(inout) :: shell
2210
2211 ! POSIX: errexit should not trigger in command substitution
2212 shell%in_command_substitution = .true.
2213
2214 ! Execute in current shell context to preserve functions, variables, etc.
2215 call execute_command_and_capture(shell, command, output)
2216
2217 shell%in_command_substitution = .false.
2218
2219 if (.not. allocated(output)) output = ''
2220
2221 ! Remove trailing newlines (but NOT other whitespace like spaces)
2222 do while (len(output) > 0 .and. output(len(output):len(output)) == char(10))
2223 output = output(:len(output)-1)
2224 end do
2225 end subroutine
2226
2227 ! Simple pattern matching for shell expansions (supports * wildcard)
2228 function shell_pattern_match(text, pattern) result(matches)
2229 character(len=*), intent(in) :: text, pattern
2230 logical :: matches
2231 integer :: t_pos, p_pos, star_pos, match_pos
2232
2233 matches = .false.
2234 t_pos = 1
2235 p_pos = 1
2236 star_pos = 0
2237 match_pos = 0
2238
2239 do while (t_pos <= len_trim(text))
2240 if (p_pos <= len_trim(pattern) .and. &
2241 (pattern(p_pos:p_pos) == text(t_pos:t_pos) .or. pattern(p_pos:p_pos) == '?')) then
2242 t_pos = t_pos + 1
2243 p_pos = p_pos + 1
2244 else if (p_pos <= len_trim(pattern) .and. pattern(p_pos:p_pos) == '*') then
2245 star_pos = p_pos
2246 match_pos = t_pos
2247 p_pos = p_pos + 1
2248 else if (star_pos > 0) then
2249 p_pos = star_pos + 1
2250 match_pos = match_pos + 1
2251 t_pos = match_pos
2252 else
2253 return
2254 end if
2255 end do
2256
2257 do while (p_pos <= len_trim(pattern) .and. pattern(p_pos:p_pos) == '*')
2258 p_pos = p_pos + 1
2259 end do
2260
2261 matches = (p_pos > len_trim(pattern))
2262 end function
2263
2264
2265 subroutine process_tilde_expansion(token, pos, result, result_pos, shell)
2266 character(len=*), intent(in) :: token
2267 integer, intent(inout) :: pos, result_pos
2268 character(len=*), intent(inout) :: result
2269 type(shell_state_t), intent(in) :: shell
2270
2271 character(len=MAX_TOKEN_LEN) :: username, home_path
2272 character(len=:), allocatable :: home_dir, shell_var
2273 integer :: start_pos
2274
2275 ! Skip the tilde
2276 pos = pos + 1
2277
2278 ! POSIX: ~+ expands to PWD, ~- expands to OLDPWD
2279 if (pos <= len_trim(token) .and. token(pos:pos) == '+') then
2280 ! ~+ - expand to PWD (check shell variable first, then environment)
2281 shell_var = get_shell_variable(shell, 'PWD')
2282 if (len_trim(shell_var) > 0) then
2283 result(result_pos:result_pos+len_trim(shell_var)-1) = trim(shell_var)
2284 result_pos = result_pos + len_trim(shell_var)
2285 else
2286 home_dir = get_environment_var('PWD')
2287 if (allocated(home_dir) .and. len(home_dir) > 0) then
2288 result(result_pos:result_pos+len(home_dir)-1) = home_dir
2289 result_pos = result_pos + len(home_dir)
2290 else
2291 ! Fallback: return ~+ literally
2292 result(result_pos:result_pos+1) = '~+'
2293 result_pos = result_pos + 2
2294 end if
2295 end if
2296 pos = pos + 1 ! Skip the +
2297 return
2298 else if (pos <= len_trim(token) .and. token(pos:pos) == '-') then
2299 ! ~- - expand to OLDPWD (check shell variable first, then environment)
2300 shell_var = get_shell_variable(shell, 'OLDPWD')
2301 if (len_trim(shell_var) > 0) then
2302 result(result_pos:result_pos+len_trim(shell_var)-1) = trim(shell_var)
2303 result_pos = result_pos + len_trim(shell_var)
2304 else
2305 home_dir = get_environment_var('OLDPWD')
2306 if (allocated(home_dir) .and. len(home_dir) > 0) then
2307 result(result_pos:result_pos+len(home_dir)-1) = home_dir
2308 result_pos = result_pos + len(home_dir)
2309 else
2310 ! Fallback: return ~- literally
2311 result(result_pos:result_pos+1) = '~-'
2312 result_pos = result_pos + 2
2313 end if
2314 end if
2315 pos = pos + 1 ! Skip the -
2316 return
2317 else if (pos > len_trim(token) .or. token(pos:pos) == '/' .or. token(pos:pos) == ' ') then
2318 ! Simple ~ expansion - use HOME environment variable
2319 home_dir = get_environment_var('HOME')
2320 if (allocated(home_dir) .and. len(home_dir) > 0) then
2321 result(result_pos:result_pos+len(home_dir)-1) = home_dir
2322 result_pos = result_pos + len(home_dir)
2323 else
2324 ! Fallback to /home/user if HOME not set
2325 home_dir = get_environment_var('USER')
2326 if (allocated(home_dir) .and. len(home_dir) > 0) then
2327 home_path = '/home/' // home_dir
2328 result(result_pos:result_pos+len_trim(home_path)-1) = trim(home_path)
2329 result_pos = result_pos + len_trim(home_path)
2330 else
2331 ! Last resort fallback
2332 result(result_pos:result_pos+5) = '/home/'
2333 result_pos = result_pos + 6
2334 end if
2335 end if
2336 else
2337 ! ~username expansion
2338 start_pos = pos
2339 do while (pos <= len_trim(token) .and. token(pos:pos) /= '/' .and. token(pos:pos) /= ' ')
2340 pos = pos + 1
2341 end do
2342
2343 if (pos > start_pos) then
2344 username = token(start_pos:pos-1)
2345 else
2346 username = ''
2347 end if
2348
2349 ! Simple implementation: assume user home is in /home/username
2350 ! In a full implementation, you'd use getpwnam() system call
2351 home_path = '/home/' // trim(username)
2352 result(result_pos:result_pos+len_trim(home_path)-1) = trim(home_path)
2353 result_pos = result_pos + len_trim(home_path)
2354
2355 ! Don't increment pos here as it's already at the next character
2356 pos = pos - 1 ! Adjust because main loop will increment
2357 end if
2358 end subroutine
2359
2360 ! Detect and replace process substitution <(...) and >(...) with FIFO paths
2361 subroutine process_substitutions(shell, input, output)
2362 use substitution, only: create_fifo_for_subst, set_fifo_pid
2363 type(shell_state_t), intent(inout) :: shell
2364 character(len=*), intent(in) :: input
2365 character(len=*), intent(out) :: output
2366
2367 integer :: i, start_pos, paren_depth, out_pos, fifo_len
2368 character(len=MAX_PATH_LEN) :: fifo_path, command
2369 logical :: is_input_subst
2370 integer(c_pid_t) :: pid
2371 character(len=1) :: subst_type
2372
2373 output = ''
2374 out_pos = 1
2375 i = 1
2376
2377 do while (i <= len_trim(input))
2378 ! Check for <( or >(
2379 ! IMPORTANT: Fortran .and. does NOT short-circuit, so we must use
2380 ! a nested if to avoid substring out-of-bounds access
2381 if (i+1 <= len(input) .and. i+1 <= len_trim(input)) then
2382 if (input(i:i+1) == '<(' .or. input(i:i+1) == '>(') then
2383
2384 subst_type = input(i:i)
2385 is_input_subst = (subst_type == '<')
2386
2387 ! Find matching closing parenthesis
2388 start_pos = i + 2
2389 paren_depth = 1
2390 i = start_pos
2391
2392 do while (i <= len_trim(input) .and. paren_depth > 0)
2393 if (input(i:i) == '(') then
2394 paren_depth = paren_depth + 1
2395 else if (input(i:i) == ')') then
2396 paren_depth = paren_depth - 1
2397 end if
2398 i = i + 1
2399 end do
2400
2401 if (paren_depth == 0) then
2402 ! Extract the command
2403 command = input(start_pos:i-2)
2404
2405 ! Create FIFO
2406 fifo_path = create_fifo_for_subst(shell, is_input_subst)
2407
2408 if (len_trim(fifo_path) > 0) then
2409 ! Fork background process to execute command with proper redirection
2410 pid = c_fork()
2411
2412 if (pid == 0) then
2413 ! Child process
2414 call execute_proc_subst_command(trim(command), trim(fifo_path), is_input_subst)
2415 call c_exit(0)
2416 else
2417 ! Parent process - track the PID
2418 call set_fifo_pid(shell, fifo_path, pid)
2419
2420 ! Replace <(command) or >(command) with FIFO path
2421 fifo_len = len_trim(fifo_path)
2422 if (out_pos + fifo_len - 1 <= len(output)) then
2423 output(out_pos:out_pos+fifo_len-1) = trim(fifo_path)
2424 out_pos = out_pos + fifo_len
2425 end if
2426 end if
2427 else
2428 write(error_unit, '(A)') 'fortsh: failed to create process substitution'
2429 ! Keep original if failed - but this is a rare error case
2430 end if
2431 end if
2432 else
2433 ! Regular character - copy without trimming
2434 if (out_pos <= len(output)) then
2435 output(out_pos:out_pos) = input(i:i)
2436 out_pos = out_pos + 1
2437 end if
2438 i = i + 1
2439 end if
2440 else
2441 ! Last character or beyond trim - copy as regular character
2442 if (out_pos <= len(output)) then
2443 output(out_pos:out_pos) = input(i:i)
2444 out_pos = out_pos + 1
2445 end if
2446 i = i + 1
2447 end if
2448 end do
2449 end subroutine
2450
2451 ! Execute a process substitution command with proper redirection
2452 subroutine execute_proc_subst_command(command, fifo_path, is_input)
2453 character(len=*), intent(in) :: command, fifo_path
2454 logical, intent(in) :: is_input
2455 character(len=512) :: full_command
2456 character(len=256), target :: shell_cmd, command_c
2457 character(len=16), target :: shell_flag
2458 type(c_ptr), target :: argv(4)
2459 integer :: result
2460
2461 ! Build redirected command using shell
2462 if (is_input) then
2463 ! <(command) - redirect command's stdout to FIFO
2464 write(full_command, '(A,A,A,A)') trim(command), ' > ', trim(fifo_path), c_null_char
2465 else
2466 ! >(command) - redirect FIFO to command's stdin
2467 write(full_command, '(A,A,A,A)') trim(command), ' < ', trim(fifo_path), c_null_char
2468 end if
2469
2470 ! Execute via /bin/sh -c
2471 shell_cmd = '/bin/sh'//c_null_char
2472 shell_flag = '-c'//c_null_char
2473 command_c = trim(full_command)
2474
2475 argv(1) = c_loc(shell_cmd)
2476 argv(2) = c_loc(shell_flag)
2477 argv(3) = c_loc(command_c)
2478 argv(4) = c_null_ptr
2479
2480 result = c_execvp(c_loc(shell_cmd), c_loc(argv))
2481 if (result < 0) then
2482 write(error_unit, '(A)') 'fortsh: failed to execute process substitution command'
2483 call c_exit(1)
2484 end if
2485 end subroutine
2486
2487 ! Execute a command via system shell (for process substitution)
2488 subroutine execute_command_via_shell(command)
2489 character(len=*), intent(in) :: command
2490 character(len=256), target :: shell_cmd, command_c
2491 character(len=16), target :: shell_flag
2492 type(c_ptr), target :: argv(4)
2493 integer :: result
2494
2495 ! Build command: /bin/sh -c "command"
2496 shell_cmd = '/bin/sh'//c_null_char
2497 shell_flag = '-c'//c_null_char
2498 command_c = trim(command)//c_null_char
2499
2500 argv(1) = c_loc(shell_cmd)
2501 argv(2) = c_loc(shell_flag)
2502 argv(3) = c_loc(command_c)
2503 argv(4) = c_null_ptr
2504
2505 ! Execute /bin/sh -c "command"
2506 result = c_execvp(c_loc(shell_cmd), c_loc(argv))
2507 if (result < 0) then
2508 write(error_unit, '(A)') 'fortsh: failed to execute process substitution command'
2509 call c_exit(1)
2510 end if
2511 end subroutine
2512
2513 ! Extract prefix assignments (VAR=value command) from tokenized command
2514 ! Moves VAR=value pairs to cmd%prefix_assignments and removes them from tokens
2515 subroutine extract_prefix_assignments(cmd)
2516 type(command_t), intent(inout) :: cmd
2517 integer :: i, eq_pos, first_cmd_token
2518 character(len=256) :: token
2519 logical :: is_assignment
2520 character(len=MAX_TOKEN_LEN), allocatable :: new_tokens(:)
2521 integer :: new_token_count
2522
2523 if (.not. allocated(cmd%tokens) .or. cmd%num_tokens == 0) return
2524
2525 cmd%num_prefix_assignments = 0
2526 first_cmd_token = 0
2527
2528 ! Scan tokens from the beginning to find prefix assignments
2529 do i = 1, cmd%num_tokens
2530 token = trim(cmd%tokens(i))
2531
2532 ! Check if token is a valid assignment (VAR=value)
2533 is_assignment = .false.
2534 eq_pos = index(token, '=')
2535
2536 if (eq_pos > 1) then
2537 ! Has '=' and something before it
2538 ! Check if everything before '=' is a valid variable name
2539 ! (letters, numbers, underscore, but must start with letter or underscore)
2540 is_assignment = is_valid_var_name(token(:eq_pos-1))
2541 end if
2542
2543 if (is_assignment) then
2544 ! This is a prefix assignment
2545 if (cmd%num_prefix_assignments < MAX_PREFIX_ASSIGNMENTS) then
2546 if (.not. allocated(cmd%prefix_assignments)) then
2547 allocate(character(len=MAX_TOKEN_LEN) :: cmd%prefix_assignments(MAX_PREFIX_ASSIGNMENTS))
2548 end if
2549 cmd%num_prefix_assignments = cmd%num_prefix_assignments + 1
2550 cmd%prefix_assignments(cmd%num_prefix_assignments) = trim(token)
2551 end if
2552 else
2553 ! First non-assignment token - this is where the command starts
2554 first_cmd_token = i
2555 exit
2556 end if
2557 end do
2558
2559 ! If we found prefix assignments, remove them from tokens
2560 if (cmd%num_prefix_assignments > 0 .and. first_cmd_token > 0) then
2561 new_token_count = cmd%num_tokens - cmd%num_prefix_assignments
2562
2563 if (new_token_count > 0) then
2564 ! Allocate new token array with remaining tokens
2565 allocate(new_tokens(new_token_count))
2566
2567 ! Copy remaining tokens
2568 do i = 1, new_token_count
2569 new_tokens(i) = cmd%tokens(first_cmd_token + i - 1)
2570 end do
2571
2572 ! Replace tokens array
2573 deallocate(cmd%tokens)
2574 cmd%tokens = new_tokens
2575 cmd%num_tokens = new_token_count
2576 else
2577 ! All tokens were assignments, no actual command
2578 deallocate(cmd%tokens)
2579 cmd%num_tokens = 0
2580 end if
2581 end if
2582 end subroutine
2583
2584 ! Check if a string is a valid shell variable name
2585 function is_valid_var_name(name) result(is_valid)
2586 character(len=*), intent(in) :: name
2587 logical :: is_valid
2588 integer :: i, check_len, bracket_pos
2589 character :: ch
2590
2591 is_valid = .false.
2592 check_len = len_trim(name)
2593 if (check_len == 0) return
2594
2595 ! Accept array subscript names: var[subscript]
2596 bracket_pos = index(name(1:check_len), '[')
2597 if (bracket_pos > 0) then
2598 check_len = bracket_pos - 1
2599 if (check_len == 0) return
2600 end if
2601
2602 ! First character must be letter or underscore
2603 ch = name(1:1)
2604 if (.not. ((ch >= 'A' .and. ch <= 'Z') .or. &
2605 (ch >= 'a' .and. ch <= 'z') .or. &
2606 ch == '_')) then
2607 return
2608 end if
2609
2610 ! Remaining characters can be letters, digits, or underscores
2611 do i = 2, check_len
2612 ch = name(i:i)
2613 if (.not. ((ch >= 'A' .and. ch <= 'Z') .or. &
2614 (ch >= 'a' .and. ch <= 'z') .or. &
2615 (ch >= '0' .and. ch <= '9') .or. &
2616 ch == '_')) then
2617 return
2618 end if
2619 end do
2620
2621 is_valid = .true.
2622 end function
2623
2624 ! Check if a line has unclosed quotes (needs continuation)
2625 function has_unclosed_quote(line) result(has_unclosed)
2626 character(len=*), intent(in) :: line
2627 logical :: has_unclosed
2628 integer :: i
2629 logical :: in_single_quote, in_double_quote
2630 character :: prev_char
2631
2632 has_unclosed = .false.
2633 in_single_quote = .false.
2634 in_double_quote = .false.
2635 prev_char = ' '
2636
2637 do i = 1, len_trim(line)
2638 ! Check for escape character
2639 if (prev_char == '\') then
2640 ! Skip this character as it's escaped
2641 prev_char = ' ' ! Reset escape
2642 cycle
2643 end if
2644
2645 ! Track quote state
2646 if (line(i:i) == "'" .and. .not. in_double_quote) then
2647 in_single_quote = .not. in_single_quote
2648 else if (line(i:i) == '"' .and. .not. in_single_quote) then
2649 in_double_quote = .not. in_double_quote
2650 end if
2651
2652 prev_char = line(i:i)
2653 end do
2654
2655 ! If either quote type is unclosed, we need continuation
2656 has_unclosed = in_single_quote .or. in_double_quote
2657 end function
2658
2659 function ends_with_continuation_backslash(line) result(needs_continuation)
2660 character(len=*), intent(in) :: line
2661 logical :: needs_continuation
2662 integer :: i, line_len
2663 logical :: in_single_quote, in_double_quote
2664 character :: prev_char
2665
2666 needs_continuation = .false.
2667 line_len = len_trim(line)
2668
2669 ! Empty line doesn't need continuation
2670 if (line_len == 0) return
2671
2672 ! Quick check: if line doesn't end with backslash, no continuation needed
2673 if (line(line_len:line_len) /= '\') return
2674
2675 ! Now we need to check if the trailing backslash is inside quotes
2676 in_single_quote = .false.
2677 in_double_quote = .false.
2678 prev_char = ' '
2679
2680 do i = 1, line_len
2681 ! Check for escape character (in double quotes or unquoted)
2682 if (prev_char == '\' .and. .not. in_single_quote) then
2683 ! Skip this character as it's escaped
2684 prev_char = ' ' ! Reset escape
2685 cycle
2686 end if
2687
2688 ! Track quote state
2689 if (line(i:i) == "'" .and. .not. in_double_quote) then
2690 in_single_quote = .not. in_single_quote
2691 else if (line(i:i) == '"' .and. .not. in_single_quote) then
2692 in_double_quote = .not. in_double_quote
2693 end if
2694
2695 prev_char = line(i:i)
2696 end do
2697
2698 ! If we're not inside quotes and line ends with backslash, need continuation
2699 ! (prev_char is backslash at this point since it's the last char)
2700 needs_continuation = .not. in_single_quote .and. .not. in_double_quote
2701 end function
2702
2703 ! Detect if a command line has a heredoc operator (<<) whose content hasn't been provided.
2704 ! Returns the delimiter string if found, empty if no heredoc pending.
2705 function get_heredoc_delimiter(line) result(delimiter)
2706 character(len=*), intent(in) :: line
2707 character(len=256) :: delimiter
2708 integer :: i, dstart, dend, line_len
2709 logical :: in_sq, in_dq, strip_tabs, delim_quoted
2710
2711 delimiter = ''
2712 line_len = len_trim(line)
2713 in_sq = .false.; in_dq = .false.
2714 i = 1
2715
2716 do while (i < line_len)
2717 ! Track quotes
2718 if (line(i:i) == "'" .and. .not. in_dq) then
2719 in_sq = .not. in_sq
2720 i = i + 1; cycle
2721 end if
2722 if (line(i:i) == '"' .and. .not. in_sq) then
2723 in_dq = .not. in_dq
2724 i = i + 1; cycle
2725 end if
2726 if (in_sq .or. in_dq) then
2727 i = i + 1; cycle
2728 end if
2729
2730 ! Look for << not inside quotes (but NOT <<< which is here-string)
2731 if (line(i:i) == '<' .and. i + 1 <= line_len .and. line(i+1:i+1) == '<') then
2732 ! Skip <<< (here-string, not heredoc)
2733 if (i + 2 <= line_len .and. line(i+2:i+2) == '<') then
2734 i = i + 3
2735 cycle
2736 end if
2737 i = i + 2
2738 ! Skip <<- (strip tabs variant)
2739 strip_tabs = .false.
2740 if (i <= line_len .and. line(i:i) == '-') then
2741 strip_tabs = .true.
2742 i = i + 1
2743 end if
2744 ! Skip whitespace
2745 do while (i <= line_len .and. (line(i:i) == ' ' .or. line(i:i) == char(9)))
2746 i = i + 1
2747 end do
2748 if (i > line_len) return
2749
2750 ! Extract delimiter — may be quoted
2751 delim_quoted = .false.
2752 if (line(i:i) == "'" .or. line(i:i) == '"') then
2753 delim_quoted = .true.
2754 dstart = i + 1
2755 dend = index(line(dstart:line_len), line(i:i))
2756 if (dend > 0) then
2757 dend = dstart + dend - 2
2758 else
2759 dend = line_len ! unclosed quote
2760 end if
2761 else
2762 dstart = i
2763 dend = i
2764 do while (dend + 1 <= line_len .and. line(dend+1:dend+1) /= ' ' .and. &
2765 line(dend+1:dend+1) /= char(9) .and. line(dend+1:dend+1) /= ';')
2766 dend = dend + 1
2767 end do
2768 end if
2769
2770 if (dend >= dstart) then
2771 delimiter = line(dstart:dend)
2772 return
2773 end if
2774 end if
2775 i = i + 1
2776 end do
2777 end function
2778
2779 function needs_compound_continuation(input) result(needs_more)
2780 use lexer, only: tokenize
2781 character(len=*), intent(in) :: input
2782 logical :: needs_more
2783 type(token_t), allocatable :: tokens(:)
2784 integer :: num_tokens, i
2785 integer :: if_depth, do_depth, case_depth, brace_depth
2786
2787 needs_more = .false.
2788
2789 ! Tokenize the input using the lexer
2790 allocate(tokens(MAX_TOKENS))
2791 call tokenize(input, tokens, num_tokens)
2792
2793 if_depth = 0
2794 do_depth = 0
2795 case_depth = 0
2796 brace_depth = 0
2797
2798 do i = 1, num_tokens
2799 if (tokens(i)%token_type /= TOKEN_KEYWORD) cycle
2800 select case (trim(tokens(i)%value))
2801 case ('if')
2802 if_depth = if_depth + 1
2803 case ('fi')
2804 if_depth = if_depth - 1
2805 case ('do')
2806 do_depth = do_depth + 1
2807 case ('done')
2808 do_depth = do_depth - 1
2809 case ('case')
2810 case_depth = case_depth + 1
2811 case ('esac')
2812 case_depth = case_depth - 1
2813 case ('{')
2814 brace_depth = brace_depth + 1
2815 case ('}')
2816 brace_depth = brace_depth - 1
2817 end select
2818 end do
2819
2820 needs_more = (if_depth > 0 .or. do_depth > 0 .or. case_depth > 0 .or. brace_depth > 0)
2821 end function
2822
2823 function remove_line_continuations(input) result(output)
2824 character(len=*), intent(in) :: input
2825 character(len=len(input)) :: output
2826 integer :: i, j
2827
2828 output = ''
2829 i = 1
2830 j = 1
2831
2832 do while (i <= len_trim(input))
2833 ! Check for backslash followed by newline
2834 if (i < len_trim(input) .and. input(i:i) == char(92)) then
2835 if (input(i+1:i+1) == char(10)) then
2836 ! Skip both the backslash and newline
2837 i = i + 2
2838 cycle
2839 end if
2840 end if
2841
2842 ! Copy character to output
2843 output(j:j) = input(i:i)
2844 i = i + 1
2845 j = j + 1
2846 end do
2847 end function
2848
2849 end module parser